Merge branch 'fix/hda' into for-linus
[pandora-kernel.git] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59 #include "acl.h"
60
61 struct ocfs2_xattr_def_value_root {
62         struct ocfs2_xattr_value_root   xv;
63         struct ocfs2_extent_rec         er;
64 };
65
66 struct ocfs2_xattr_bucket {
67         /* The inode these xattrs are associated with */
68         struct inode *bu_inode;
69
70         /* The actual buffers that make up the bucket */
71         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
72
73         /* How many blocks make up one bucket for this filesystem */
74         int bu_blocks;
75 };
76
77 struct ocfs2_xattr_set_ctxt {
78         handle_t *handle;
79         struct ocfs2_alloc_context *meta_ac;
80         struct ocfs2_alloc_context *data_ac;
81         struct ocfs2_cached_dealloc_ctxt dealloc;
82 };
83
84 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
85 #define OCFS2_XATTR_INLINE_SIZE 80
86 #define OCFS2_XATTR_HEADER_GAP  4
87 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
88                                          - sizeof(struct ocfs2_xattr_header) \
89                                          - OCFS2_XATTR_HEADER_GAP)
90 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
91                                          - sizeof(struct ocfs2_xattr_block) \
92                                          - sizeof(struct ocfs2_xattr_header) \
93                                          - OCFS2_XATTR_HEADER_GAP)
94
95 static struct ocfs2_xattr_def_value_root def_xv = {
96         .xv.xr_list.l_count = cpu_to_le16(1),
97 };
98
99 struct xattr_handler *ocfs2_xattr_handlers[] = {
100         &ocfs2_xattr_user_handler,
101         &ocfs2_xattr_acl_access_handler,
102         &ocfs2_xattr_acl_default_handler,
103         &ocfs2_xattr_trusted_handler,
104         &ocfs2_xattr_security_handler,
105         NULL
106 };
107
108 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
109         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
110         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
111                                         = &ocfs2_xattr_acl_access_handler,
112         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
113                                         = &ocfs2_xattr_acl_default_handler,
114         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
115         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
116 };
117
118 struct ocfs2_xattr_info {
119         int             xi_name_index;
120         const char      *xi_name;
121         int             xi_name_len;
122         const void      *xi_value;
123         size_t          xi_value_len;
124 };
125
126 struct ocfs2_xattr_search {
127         struct buffer_head *inode_bh;
128         /*
129          * xattr_bh point to the block buffer head which has extended attribute
130          * when extended attribute in inode, xattr_bh is equal to inode_bh.
131          */
132         struct buffer_head *xattr_bh;
133         struct ocfs2_xattr_header *header;
134         struct ocfs2_xattr_bucket *bucket;
135         void *base;
136         void *end;
137         struct ocfs2_xattr_entry *here;
138         int not_found;
139 };
140
141 /* Operations on struct ocfs2_xa_entry */
142 struct ocfs2_xa_loc;
143 struct ocfs2_xa_loc_operations {
144         /*
145          * Journal functions
146          */
147         int (*xlo_journal_access)(handle_t *handle, struct ocfs2_xa_loc *loc,
148                                   int type);
149         void (*xlo_journal_dirty)(handle_t *handle, struct ocfs2_xa_loc *loc);
150
151         /*
152          * Return a pointer to the appropriate buffer in loc->xl_storage
153          * at the given offset from loc->xl_header.
154          */
155         void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
156
157         /* Can we reuse the existing entry for the new value? */
158         int (*xlo_can_reuse)(struct ocfs2_xa_loc *loc,
159                              struct ocfs2_xattr_info *xi);
160
161         /* How much space is needed for the new value? */
162         int (*xlo_check_space)(struct ocfs2_xa_loc *loc,
163                                struct ocfs2_xattr_info *xi);
164
165         /*
166          * Return the offset of the first name+value pair.  This is
167          * the start of our downward-filling free space.
168          */
169         int (*xlo_get_free_start)(struct ocfs2_xa_loc *loc);
170
171         /*
172          * Remove the name+value at this location.  Do whatever is
173          * appropriate with the remaining name+value pairs.
174          */
175         void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
176
177         /* Fill xl_entry with a new entry */
178         void (*xlo_add_entry)(struct ocfs2_xa_loc *loc, u32 name_hash);
179
180         /* Add name+value storage to an entry */
181         void (*xlo_add_namevalue)(struct ocfs2_xa_loc *loc, int size);
182
183         /*
184          * Initialize the value buf's access and bh fields for this entry.
185          * ocfs2_xa_fill_value_buf() will handle the xv pointer.
186          */
187         void (*xlo_fill_value_buf)(struct ocfs2_xa_loc *loc,
188                                    struct ocfs2_xattr_value_buf *vb);
189 };
190
191 /*
192  * Describes an xattr entry location.  This is a memory structure
193  * tracking the on-disk structure.
194  */
195 struct ocfs2_xa_loc {
196         /* This xattr belongs to this inode */
197         struct inode *xl_inode;
198
199         /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
200         struct ocfs2_xattr_header *xl_header;
201
202         /* Bytes from xl_header to the end of the storage */
203         int xl_size;
204
205         /*
206          * The ocfs2_xattr_entry this location describes.  If this is
207          * NULL, this location describes the on-disk structure where it
208          * would have been.
209          */
210         struct ocfs2_xattr_entry *xl_entry;
211
212         /*
213          * Internal housekeeping
214          */
215
216         /* Buffer(s) containing this entry */
217         void *xl_storage;
218
219         /* Operations on the storage backing this location */
220         const struct ocfs2_xa_loc_operations *xl_ops;
221 };
222
223 /*
224  * Convenience functions to calculate how much space is needed for a
225  * given name+value pair
226  */
227 static int namevalue_size(int name_len, uint64_t value_len)
228 {
229         if (value_len > OCFS2_XATTR_INLINE_SIZE)
230                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
231         else
232                 return OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
233 }
234
235 static int namevalue_size_xi(struct ocfs2_xattr_info *xi)
236 {
237         return namevalue_size(xi->xi_name_len, xi->xi_value_len);
238 }
239
240 static int namevalue_size_xe(struct ocfs2_xattr_entry *xe)
241 {
242         u64 value_len = le64_to_cpu(xe->xe_value_size);
243
244         BUG_ON((value_len > OCFS2_XATTR_INLINE_SIZE) &&
245                ocfs2_xattr_is_local(xe));
246         return namevalue_size(xe->xe_name_len, value_len);
247 }
248
249
250 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
251                                              struct ocfs2_xattr_header *xh,
252                                              int index,
253                                              int *block_off,
254                                              int *new_offset);
255
256 static int ocfs2_xattr_block_find(struct inode *inode,
257                                   int name_index,
258                                   const char *name,
259                                   struct ocfs2_xattr_search *xs);
260 static int ocfs2_xattr_index_block_find(struct inode *inode,
261                                         struct buffer_head *root_bh,
262                                         int name_index,
263                                         const char *name,
264                                         struct ocfs2_xattr_search *xs);
265
266 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
267                                         struct buffer_head *blk_bh,
268                                         char *buffer,
269                                         size_t buffer_size);
270
271 static int ocfs2_xattr_create_index_block(struct inode *inode,
272                                           struct ocfs2_xattr_search *xs,
273                                           struct ocfs2_xattr_set_ctxt *ctxt);
274
275 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
276                                              struct ocfs2_xattr_info *xi,
277                                              struct ocfs2_xattr_search *xs,
278                                              struct ocfs2_xattr_set_ctxt *ctxt);
279
280 typedef int (xattr_tree_rec_func)(struct inode *inode,
281                                   struct buffer_head *root_bh,
282                                   u64 blkno, u32 cpos, u32 len, void *para);
283 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
284                                            struct buffer_head *root_bh,
285                                            xattr_tree_rec_func *rec_func,
286                                            void *para);
287 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
288                                         struct ocfs2_xattr_bucket *bucket,
289                                         void *para);
290 static int ocfs2_rm_xattr_cluster(struct inode *inode,
291                                   struct buffer_head *root_bh,
292                                   u64 blkno,
293                                   u32 cpos,
294                                   u32 len,
295                                   void *para);
296
297 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
298                                   u64 src_blk, u64 last_blk, u64 to_blk,
299                                   unsigned int start_bucket,
300                                   u32 *first_hash);
301 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
302                                         struct ocfs2_dinode *di,
303                                         struct ocfs2_xattr_info *xi,
304                                         struct ocfs2_xattr_search *xis,
305                                         struct ocfs2_xattr_search *xbs,
306                                         struct ocfs2_refcount_tree **ref_tree,
307                                         int *meta_need,
308                                         int *credits);
309 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
310                                            struct ocfs2_xattr_bucket *bucket,
311                                            int offset,
312                                            struct ocfs2_xattr_value_root **xv,
313                                            struct buffer_head **bh);
314
315 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
316 {
317         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
318 }
319
320 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
321 {
322         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
323 }
324
325 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
326 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
327 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
328
329 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
330 {
331         struct ocfs2_xattr_bucket *bucket;
332         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
333
334         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
335
336         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
337         if (bucket) {
338                 bucket->bu_inode = inode;
339                 bucket->bu_blocks = blks;
340         }
341
342         return bucket;
343 }
344
345 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
346 {
347         int i;
348
349         for (i = 0; i < bucket->bu_blocks; i++) {
350                 brelse(bucket->bu_bhs[i]);
351                 bucket->bu_bhs[i] = NULL;
352         }
353 }
354
355 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
356 {
357         if (bucket) {
358                 ocfs2_xattr_bucket_relse(bucket);
359                 bucket->bu_inode = NULL;
360                 kfree(bucket);
361         }
362 }
363
364 /*
365  * A bucket that has never been written to disk doesn't need to be
366  * read.  We just need the buffer_heads.  Don't call this for
367  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
368  * them fully.
369  */
370 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
371                                    u64 xb_blkno)
372 {
373         int i, rc = 0;
374
375         for (i = 0; i < bucket->bu_blocks; i++) {
376                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
377                                               xb_blkno + i);
378                 if (!bucket->bu_bhs[i]) {
379                         rc = -EIO;
380                         mlog_errno(rc);
381                         break;
382                 }
383
384                 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
385                                            bucket->bu_bhs[i]))
386                         ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
387                                                       bucket->bu_bhs[i]);
388         }
389
390         if (rc)
391                 ocfs2_xattr_bucket_relse(bucket);
392         return rc;
393 }
394
395 /* Read the xattr bucket at xb_blkno */
396 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
397                                    u64 xb_blkno)
398 {
399         int rc;
400
401         rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
402                                bucket->bu_blocks, bucket->bu_bhs, 0,
403                                NULL);
404         if (!rc) {
405                 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
406                 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
407                                                  bucket->bu_bhs,
408                                                  bucket->bu_blocks,
409                                                  &bucket_xh(bucket)->xh_check);
410                 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
411                 if (rc)
412                         mlog_errno(rc);
413         }
414
415         if (rc)
416                 ocfs2_xattr_bucket_relse(bucket);
417         return rc;
418 }
419
420 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
421                                              struct ocfs2_xattr_bucket *bucket,
422                                              int type)
423 {
424         int i, rc = 0;
425
426         for (i = 0; i < bucket->bu_blocks; i++) {
427                 rc = ocfs2_journal_access(handle,
428                                           INODE_CACHE(bucket->bu_inode),
429                                           bucket->bu_bhs[i], type);
430                 if (rc) {
431                         mlog_errno(rc);
432                         break;
433                 }
434         }
435
436         return rc;
437 }
438
439 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
440                                              struct ocfs2_xattr_bucket *bucket)
441 {
442         int i;
443
444         spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
445         ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
446                                    bucket->bu_bhs, bucket->bu_blocks,
447                                    &bucket_xh(bucket)->xh_check);
448         spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
449
450         for (i = 0; i < bucket->bu_blocks; i++)
451                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
452 }
453
454 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
455                                          struct ocfs2_xattr_bucket *src)
456 {
457         int i;
458         int blocksize = src->bu_inode->i_sb->s_blocksize;
459
460         BUG_ON(dest->bu_blocks != src->bu_blocks);
461         BUG_ON(dest->bu_inode != src->bu_inode);
462
463         for (i = 0; i < src->bu_blocks; i++) {
464                 memcpy(bucket_block(dest, i), bucket_block(src, i),
465                        blocksize);
466         }
467 }
468
469 static int ocfs2_validate_xattr_block(struct super_block *sb,
470                                       struct buffer_head *bh)
471 {
472         int rc;
473         struct ocfs2_xattr_block *xb =
474                 (struct ocfs2_xattr_block *)bh->b_data;
475
476         mlog(0, "Validating xattr block %llu\n",
477              (unsigned long long)bh->b_blocknr);
478
479         BUG_ON(!buffer_uptodate(bh));
480
481         /*
482          * If the ecc fails, we return the error but otherwise
483          * leave the filesystem running.  We know any error is
484          * local to this block.
485          */
486         rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
487         if (rc)
488                 return rc;
489
490         /*
491          * Errors after here are fatal
492          */
493
494         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
495                 ocfs2_error(sb,
496                             "Extended attribute block #%llu has bad "
497                             "signature %.*s",
498                             (unsigned long long)bh->b_blocknr, 7,
499                             xb->xb_signature);
500                 return -EINVAL;
501         }
502
503         if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
504                 ocfs2_error(sb,
505                             "Extended attribute block #%llu has an "
506                             "invalid xb_blkno of %llu",
507                             (unsigned long long)bh->b_blocknr,
508                             (unsigned long long)le64_to_cpu(xb->xb_blkno));
509                 return -EINVAL;
510         }
511
512         if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
513                 ocfs2_error(sb,
514                             "Extended attribute block #%llu has an invalid "
515                             "xb_fs_generation of #%u",
516                             (unsigned long long)bh->b_blocknr,
517                             le32_to_cpu(xb->xb_fs_generation));
518                 return -EINVAL;
519         }
520
521         return 0;
522 }
523
524 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
525                                   struct buffer_head **bh)
526 {
527         int rc;
528         struct buffer_head *tmp = *bh;
529
530         rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
531                               ocfs2_validate_xattr_block);
532
533         /* If ocfs2_read_block() got us a new bh, pass it up. */
534         if (!rc && !*bh)
535                 *bh = tmp;
536
537         return rc;
538 }
539
540 static inline const char *ocfs2_xattr_prefix(int name_index)
541 {
542         struct xattr_handler *handler = NULL;
543
544         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
545                 handler = ocfs2_xattr_handler_map[name_index];
546
547         return handler ? handler->prefix : NULL;
548 }
549
550 static u32 ocfs2_xattr_name_hash(struct inode *inode,
551                                  const char *name,
552                                  int name_len)
553 {
554         /* Get hash value of uuid from super block */
555         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
556         int i;
557
558         /* hash extended attribute name */
559         for (i = 0; i < name_len; i++) {
560                 hash = (hash << OCFS2_HASH_SHIFT) ^
561                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
562                        *name++;
563         }
564
565         return hash;
566 }
567
568 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
569 {
570         return namevalue_size(name_len, value_len) +
571                 sizeof(struct ocfs2_xattr_entry);
572 }
573
574 static int ocfs2_xi_entry_usage(struct ocfs2_xattr_info *xi)
575 {
576         return namevalue_size_xi(xi) +
577                 sizeof(struct ocfs2_xattr_entry);
578 }
579
580 static int ocfs2_xe_entry_usage(struct ocfs2_xattr_entry *xe)
581 {
582         return namevalue_size_xe(xe) +
583                 sizeof(struct ocfs2_xattr_entry);
584 }
585
586 int ocfs2_calc_security_init(struct inode *dir,
587                              struct ocfs2_security_xattr_info *si,
588                              int *want_clusters,
589                              int *xattr_credits,
590                              struct ocfs2_alloc_context **xattr_ac)
591 {
592         int ret = 0;
593         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
594         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
595                                                  si->value_len);
596
597         /*
598          * The max space of security xattr taken inline is
599          * 256(name) + 80(value) + 16(entry) = 352 bytes,
600          * So reserve one metadata block for it is ok.
601          */
602         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
603             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
604                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
605                 if (ret) {
606                         mlog_errno(ret);
607                         return ret;
608                 }
609                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
610         }
611
612         /* reserve clusters for xattr value which will be set in B tree*/
613         if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
614                 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
615                                                             si->value_len);
616
617                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
618                                                            new_clusters);
619                 *want_clusters += new_clusters;
620         }
621         return ret;
622 }
623
624 int ocfs2_calc_xattr_init(struct inode *dir,
625                           struct buffer_head *dir_bh,
626                           int mode,
627                           struct ocfs2_security_xattr_info *si,
628                           int *want_clusters,
629                           int *xattr_credits,
630                           int *want_meta)
631 {
632         int ret = 0;
633         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
634         int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
635
636         if (si->enable)
637                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
638                                                      si->value_len);
639
640         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
641                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
642                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
643                                         "", NULL, 0);
644                 if (acl_len > 0) {
645                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
646                         if (S_ISDIR(mode))
647                                 a_size <<= 1;
648                 } else if (acl_len != 0 && acl_len != -ENODATA) {
649                         mlog_errno(ret);
650                         return ret;
651                 }
652         }
653
654         if (!(s_size + a_size))
655                 return ret;
656
657         /*
658          * The max space of security xattr taken inline is
659          * 256(name) + 80(value) + 16(entry) = 352 bytes,
660          * The max space of acl xattr taken inline is
661          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
662          * when blocksize = 512, may reserve one more cluser for
663          * xattr bucket, otherwise reserve one metadata block
664          * for them is ok.
665          * If this is a new directory with inline data,
666          * we choose to reserve the entire inline area for
667          * directory contents and force an external xattr block.
668          */
669         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
670             (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
671             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
672                 *want_meta = *want_meta + 1;
673                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
674         }
675
676         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
677             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
678                 *want_clusters += 1;
679                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
680         }
681
682         /*
683          * reserve credits and clusters for xattrs which has large value
684          * and have to be set outside
685          */
686         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
687                 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
688                                                         si->value_len);
689                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
690                                                            new_clusters);
691                 *want_clusters += new_clusters;
692         }
693         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
694             acl_len > OCFS2_XATTR_INLINE_SIZE) {
695                 /* for directory, it has DEFAULT and ACCESS two types of acls */
696                 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
697                                 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
698                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
699                                                            new_clusters);
700                 *want_clusters += new_clusters;
701         }
702
703         return ret;
704 }
705
706 static int ocfs2_xattr_extend_allocation(struct inode *inode,
707                                          u32 clusters_to_add,
708                                          struct ocfs2_xattr_value_buf *vb,
709                                          struct ocfs2_xattr_set_ctxt *ctxt)
710 {
711         int status = 0;
712         handle_t *handle = ctxt->handle;
713         enum ocfs2_alloc_restarted why;
714         u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
715         struct ocfs2_extent_tree et;
716
717         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
718
719         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
720
721         status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
722                               OCFS2_JOURNAL_ACCESS_WRITE);
723         if (status < 0) {
724                 mlog_errno(status);
725                 goto leave;
726         }
727
728         prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
729         status = ocfs2_add_clusters_in_btree(handle,
730                                              &et,
731                                              &logical_start,
732                                              clusters_to_add,
733                                              0,
734                                              ctxt->data_ac,
735                                              ctxt->meta_ac,
736                                              &why);
737         if (status < 0) {
738                 mlog_errno(status);
739                 goto leave;
740         }
741
742         status = ocfs2_journal_dirty(handle, vb->vb_bh);
743         if (status < 0) {
744                 mlog_errno(status);
745                 goto leave;
746         }
747
748         clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
749
750         /*
751          * We should have already allocated enough space before the transaction,
752          * so no need to restart.
753          */
754         BUG_ON(why != RESTART_NONE || clusters_to_add);
755
756 leave:
757
758         return status;
759 }
760
761 static int __ocfs2_remove_xattr_range(struct inode *inode,
762                                       struct ocfs2_xattr_value_buf *vb,
763                                       u32 cpos, u32 phys_cpos, u32 len,
764                                       unsigned int ext_flags,
765                                       struct ocfs2_xattr_set_ctxt *ctxt)
766 {
767         int ret;
768         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
769         handle_t *handle = ctxt->handle;
770         struct ocfs2_extent_tree et;
771
772         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
773
774         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
775                             OCFS2_JOURNAL_ACCESS_WRITE);
776         if (ret) {
777                 mlog_errno(ret);
778                 goto out;
779         }
780
781         ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
782                                   &ctxt->dealloc);
783         if (ret) {
784                 mlog_errno(ret);
785                 goto out;
786         }
787
788         le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
789
790         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
791         if (ret) {
792                 mlog_errno(ret);
793                 goto out;
794         }
795
796         if (ext_flags & OCFS2_EXT_REFCOUNTED)
797                 ret = ocfs2_decrease_refcount(inode, handle,
798                                         ocfs2_blocks_to_clusters(inode->i_sb,
799                                                                  phys_blkno),
800                                         len, ctxt->meta_ac, &ctxt->dealloc, 1);
801         else
802                 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
803                                                   phys_blkno, len);
804         if (ret)
805                 mlog_errno(ret);
806
807 out:
808         return ret;
809 }
810
811 static int ocfs2_xattr_shrink_size(struct inode *inode,
812                                    u32 old_clusters,
813                                    u32 new_clusters,
814                                    struct ocfs2_xattr_value_buf *vb,
815                                    struct ocfs2_xattr_set_ctxt *ctxt)
816 {
817         int ret = 0;
818         unsigned int ext_flags;
819         u32 trunc_len, cpos, phys_cpos, alloc_size;
820         u64 block;
821
822         if (old_clusters <= new_clusters)
823                 return 0;
824
825         cpos = new_clusters;
826         trunc_len = old_clusters - new_clusters;
827         while (trunc_len) {
828                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
829                                                &alloc_size,
830                                                &vb->vb_xv->xr_list, &ext_flags);
831                 if (ret) {
832                         mlog_errno(ret);
833                         goto out;
834                 }
835
836                 if (alloc_size > trunc_len)
837                         alloc_size = trunc_len;
838
839                 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
840                                                  phys_cpos, alloc_size,
841                                                  ext_flags, ctxt);
842                 if (ret) {
843                         mlog_errno(ret);
844                         goto out;
845                 }
846
847                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
848                 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
849                                                        block, alloc_size);
850                 cpos += alloc_size;
851                 trunc_len -= alloc_size;
852         }
853
854 out:
855         return ret;
856 }
857
858 static int ocfs2_xattr_value_truncate(struct inode *inode,
859                                       struct ocfs2_xattr_value_buf *vb,
860                                       int len,
861                                       struct ocfs2_xattr_set_ctxt *ctxt)
862 {
863         int ret;
864         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
865         u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
866
867         if (new_clusters == old_clusters)
868                 return 0;
869
870         if (new_clusters > old_clusters)
871                 ret = ocfs2_xattr_extend_allocation(inode,
872                                                     new_clusters - old_clusters,
873                                                     vb, ctxt);
874         else
875                 ret = ocfs2_xattr_shrink_size(inode,
876                                               old_clusters, new_clusters,
877                                               vb, ctxt);
878
879         return ret;
880 }
881
882 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
883                                   size_t *result, const char *prefix,
884                                   const char *name, int name_len)
885 {
886         char *p = buffer + *result;
887         int prefix_len = strlen(prefix);
888         int total_len = prefix_len + name_len + 1;
889
890         *result += total_len;
891
892         /* we are just looking for how big our buffer needs to be */
893         if (!size)
894                 return 0;
895
896         if (*result > size)
897                 return -ERANGE;
898
899         memcpy(p, prefix, prefix_len);
900         memcpy(p + prefix_len, name, name_len);
901         p[prefix_len + name_len] = '\0';
902
903         return 0;
904 }
905
906 static int ocfs2_xattr_list_entries(struct inode *inode,
907                                     struct ocfs2_xattr_header *header,
908                                     char *buffer, size_t buffer_size)
909 {
910         size_t result = 0;
911         int i, type, ret;
912         const char *prefix, *name;
913
914         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
915                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
916                 type = ocfs2_xattr_get_type(entry);
917                 prefix = ocfs2_xattr_prefix(type);
918
919                 if (prefix) {
920                         name = (const char *)header +
921                                 le16_to_cpu(entry->xe_name_offset);
922
923                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
924                                                      &result, prefix, name,
925                                                      entry->xe_name_len);
926                         if (ret)
927                                 return ret;
928                 }
929         }
930
931         return result;
932 }
933
934 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
935                                          struct ocfs2_dinode *di)
936 {
937         struct ocfs2_xattr_header *xh;
938         int i;
939
940         xh = (struct ocfs2_xattr_header *)
941                  ((void *)di + inode->i_sb->s_blocksize -
942                  le16_to_cpu(di->i_xattr_inline_size));
943
944         for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
945                 if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
946                         return 1;
947
948         return 0;
949 }
950
951 static int ocfs2_xattr_ibody_list(struct inode *inode,
952                                   struct ocfs2_dinode *di,
953                                   char *buffer,
954                                   size_t buffer_size)
955 {
956         struct ocfs2_xattr_header *header = NULL;
957         struct ocfs2_inode_info *oi = OCFS2_I(inode);
958         int ret = 0;
959
960         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
961                 return ret;
962
963         header = (struct ocfs2_xattr_header *)
964                  ((void *)di + inode->i_sb->s_blocksize -
965                  le16_to_cpu(di->i_xattr_inline_size));
966
967         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
968
969         return ret;
970 }
971
972 static int ocfs2_xattr_block_list(struct inode *inode,
973                                   struct ocfs2_dinode *di,
974                                   char *buffer,
975                                   size_t buffer_size)
976 {
977         struct buffer_head *blk_bh = NULL;
978         struct ocfs2_xattr_block *xb;
979         int ret = 0;
980
981         if (!di->i_xattr_loc)
982                 return ret;
983
984         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
985                                      &blk_bh);
986         if (ret < 0) {
987                 mlog_errno(ret);
988                 return ret;
989         }
990
991         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
992         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
993                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
994                 ret = ocfs2_xattr_list_entries(inode, header,
995                                                buffer, buffer_size);
996         } else
997                 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
998                                                    buffer, buffer_size);
999
1000         brelse(blk_bh);
1001
1002         return ret;
1003 }
1004
1005 ssize_t ocfs2_listxattr(struct dentry *dentry,
1006                         char *buffer,
1007                         size_t size)
1008 {
1009         int ret = 0, i_ret = 0, b_ret = 0;
1010         struct buffer_head *di_bh = NULL;
1011         struct ocfs2_dinode *di = NULL;
1012         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
1013
1014         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
1015                 return -EOPNOTSUPP;
1016
1017         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1018                 return ret;
1019
1020         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
1021         if (ret < 0) {
1022                 mlog_errno(ret);
1023                 return ret;
1024         }
1025
1026         di = (struct ocfs2_dinode *)di_bh->b_data;
1027
1028         down_read(&oi->ip_xattr_sem);
1029         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
1030         if (i_ret < 0)
1031                 b_ret = 0;
1032         else {
1033                 if (buffer) {
1034                         buffer += i_ret;
1035                         size -= i_ret;
1036                 }
1037                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
1038                                                buffer, size);
1039                 if (b_ret < 0)
1040                         i_ret = 0;
1041         }
1042         up_read(&oi->ip_xattr_sem);
1043         ocfs2_inode_unlock(dentry->d_inode, 0);
1044
1045         brelse(di_bh);
1046
1047         return i_ret + b_ret;
1048 }
1049
1050 static int ocfs2_xattr_find_entry(int name_index,
1051                                   const char *name,
1052                                   struct ocfs2_xattr_search *xs)
1053 {
1054         struct ocfs2_xattr_entry *entry;
1055         size_t name_len;
1056         int i, cmp = 1;
1057
1058         if (name == NULL)
1059                 return -EINVAL;
1060
1061         name_len = strlen(name);
1062         entry = xs->here;
1063         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1064                 cmp = name_index - ocfs2_xattr_get_type(entry);
1065                 if (!cmp)
1066                         cmp = name_len - entry->xe_name_len;
1067                 if (!cmp)
1068                         cmp = memcmp(name, (xs->base +
1069                                      le16_to_cpu(entry->xe_name_offset)),
1070                                      name_len);
1071                 if (cmp == 0)
1072                         break;
1073                 entry += 1;
1074         }
1075         xs->here = entry;
1076
1077         return cmp ? -ENODATA : 0;
1078 }
1079
1080 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1081                                          struct ocfs2_xattr_value_root *xv,
1082                                          void *buffer,
1083                                          size_t len)
1084 {
1085         u32 cpos, p_cluster, num_clusters, bpc, clusters;
1086         u64 blkno;
1087         int i, ret = 0;
1088         size_t cplen, blocksize;
1089         struct buffer_head *bh = NULL;
1090         struct ocfs2_extent_list *el;
1091
1092         el = &xv->xr_list;
1093         clusters = le32_to_cpu(xv->xr_clusters);
1094         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1095         blocksize = inode->i_sb->s_blocksize;
1096
1097         cpos = 0;
1098         while (cpos < clusters) {
1099                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1100                                                &num_clusters, el, NULL);
1101                 if (ret) {
1102                         mlog_errno(ret);
1103                         goto out;
1104                 }
1105
1106                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1107                 /* Copy ocfs2_xattr_value */
1108                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1109                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1110                                                &bh, NULL);
1111                         if (ret) {
1112                                 mlog_errno(ret);
1113                                 goto out;
1114                         }
1115
1116                         cplen = len >= blocksize ? blocksize : len;
1117                         memcpy(buffer, bh->b_data, cplen);
1118                         len -= cplen;
1119                         buffer += cplen;
1120
1121                         brelse(bh);
1122                         bh = NULL;
1123                         if (len == 0)
1124                                 break;
1125                 }
1126                 cpos += num_clusters;
1127         }
1128 out:
1129         return ret;
1130 }
1131
1132 static int ocfs2_xattr_ibody_get(struct inode *inode,
1133                                  int name_index,
1134                                  const char *name,
1135                                  void *buffer,
1136                                  size_t buffer_size,
1137                                  struct ocfs2_xattr_search *xs)
1138 {
1139         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1140         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1141         struct ocfs2_xattr_value_root *xv;
1142         size_t size;
1143         int ret = 0;
1144
1145         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1146                 return -ENODATA;
1147
1148         xs->end = (void *)di + inode->i_sb->s_blocksize;
1149         xs->header = (struct ocfs2_xattr_header *)
1150                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1151         xs->base = (void *)xs->header;
1152         xs->here = xs->header->xh_entries;
1153
1154         ret = ocfs2_xattr_find_entry(name_index, name, xs);
1155         if (ret)
1156                 return ret;
1157         size = le64_to_cpu(xs->here->xe_value_size);
1158         if (buffer) {
1159                 if (size > buffer_size)
1160                         return -ERANGE;
1161                 if (ocfs2_xattr_is_local(xs->here)) {
1162                         memcpy(buffer, (void *)xs->base +
1163                                le16_to_cpu(xs->here->xe_name_offset) +
1164                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1165                 } else {
1166                         xv = (struct ocfs2_xattr_value_root *)
1167                                 (xs->base + le16_to_cpu(
1168                                  xs->here->xe_name_offset) +
1169                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1170                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1171                                                             buffer, size);
1172                         if (ret < 0) {
1173                                 mlog_errno(ret);
1174                                 return ret;
1175                         }
1176                 }
1177         }
1178
1179         return size;
1180 }
1181
1182 static int ocfs2_xattr_block_get(struct inode *inode,
1183                                  int name_index,
1184                                  const char *name,
1185                                  void *buffer,
1186                                  size_t buffer_size,
1187                                  struct ocfs2_xattr_search *xs)
1188 {
1189         struct ocfs2_xattr_block *xb;
1190         struct ocfs2_xattr_value_root *xv;
1191         size_t size;
1192         int ret = -ENODATA, name_offset, name_len, i;
1193         int uninitialized_var(block_off);
1194
1195         xs->bucket = ocfs2_xattr_bucket_new(inode);
1196         if (!xs->bucket) {
1197                 ret = -ENOMEM;
1198                 mlog_errno(ret);
1199                 goto cleanup;
1200         }
1201
1202         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1203         if (ret) {
1204                 mlog_errno(ret);
1205                 goto cleanup;
1206         }
1207
1208         if (xs->not_found) {
1209                 ret = -ENODATA;
1210                 goto cleanup;
1211         }
1212
1213         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1214         size = le64_to_cpu(xs->here->xe_value_size);
1215         if (buffer) {
1216                 ret = -ERANGE;
1217                 if (size > buffer_size)
1218                         goto cleanup;
1219
1220                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1221                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1222                 i = xs->here - xs->header->xh_entries;
1223
1224                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1225                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1226                                                                 bucket_xh(xs->bucket),
1227                                                                 i,
1228                                                                 &block_off,
1229                                                                 &name_offset);
1230                         xs->base = bucket_block(xs->bucket, block_off);
1231                 }
1232                 if (ocfs2_xattr_is_local(xs->here)) {
1233                         memcpy(buffer, (void *)xs->base +
1234                                name_offset + name_len, size);
1235                 } else {
1236                         xv = (struct ocfs2_xattr_value_root *)
1237                                 (xs->base + name_offset + name_len);
1238                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1239                                                             buffer, size);
1240                         if (ret < 0) {
1241                                 mlog_errno(ret);
1242                                 goto cleanup;
1243                         }
1244                 }
1245         }
1246         ret = size;
1247 cleanup:
1248         ocfs2_xattr_bucket_free(xs->bucket);
1249
1250         brelse(xs->xattr_bh);
1251         xs->xattr_bh = NULL;
1252         return ret;
1253 }
1254
1255 int ocfs2_xattr_get_nolock(struct inode *inode,
1256                            struct buffer_head *di_bh,
1257                            int name_index,
1258                            const char *name,
1259                            void *buffer,
1260                            size_t buffer_size)
1261 {
1262         int ret;
1263         struct ocfs2_dinode *di = NULL;
1264         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1265         struct ocfs2_xattr_search xis = {
1266                 .not_found = -ENODATA,
1267         };
1268         struct ocfs2_xattr_search xbs = {
1269                 .not_found = -ENODATA,
1270         };
1271
1272         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1273                 return -EOPNOTSUPP;
1274
1275         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1276                 ret = -ENODATA;
1277
1278         xis.inode_bh = xbs.inode_bh = di_bh;
1279         di = (struct ocfs2_dinode *)di_bh->b_data;
1280
1281         down_read(&oi->ip_xattr_sem);
1282         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1283                                     buffer_size, &xis);
1284         if (ret == -ENODATA && di->i_xattr_loc)
1285                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1286                                             buffer_size, &xbs);
1287         up_read(&oi->ip_xattr_sem);
1288
1289         return ret;
1290 }
1291
1292 /* ocfs2_xattr_get()
1293  *
1294  * Copy an extended attribute into the buffer provided.
1295  * Buffer is NULL to compute the size of buffer required.
1296  */
1297 static int ocfs2_xattr_get(struct inode *inode,
1298                            int name_index,
1299                            const char *name,
1300                            void *buffer,
1301                            size_t buffer_size)
1302 {
1303         int ret;
1304         struct buffer_head *di_bh = NULL;
1305
1306         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1307         if (ret < 0) {
1308                 mlog_errno(ret);
1309                 return ret;
1310         }
1311         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1312                                      name, buffer, buffer_size);
1313
1314         ocfs2_inode_unlock(inode, 0);
1315
1316         brelse(di_bh);
1317
1318         return ret;
1319 }
1320
1321 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1322                                            handle_t *handle,
1323                                            struct ocfs2_xattr_value_buf *vb,
1324                                            const void *value,
1325                                            int value_len)
1326 {
1327         int ret = 0, i, cp_len;
1328         u16 blocksize = inode->i_sb->s_blocksize;
1329         u32 p_cluster, num_clusters;
1330         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1331         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1332         u64 blkno;
1333         struct buffer_head *bh = NULL;
1334         unsigned int ext_flags;
1335         struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1336
1337         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1338
1339         while (cpos < clusters) {
1340                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1341                                                &num_clusters, &xv->xr_list,
1342                                                &ext_flags);
1343                 if (ret) {
1344                         mlog_errno(ret);
1345                         goto out;
1346                 }
1347
1348                 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1349
1350                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1351
1352                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1353                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1354                                                &bh, NULL);
1355                         if (ret) {
1356                                 mlog_errno(ret);
1357                                 goto out;
1358                         }
1359
1360                         ret = ocfs2_journal_access(handle,
1361                                                    INODE_CACHE(inode),
1362                                                    bh,
1363                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1364                         if (ret < 0) {
1365                                 mlog_errno(ret);
1366                                 goto out;
1367                         }
1368
1369                         cp_len = value_len > blocksize ? blocksize : value_len;
1370                         memcpy(bh->b_data, value, cp_len);
1371                         value_len -= cp_len;
1372                         value += cp_len;
1373                         if (cp_len < blocksize)
1374                                 memset(bh->b_data + cp_len, 0,
1375                                        blocksize - cp_len);
1376
1377                         ret = ocfs2_journal_dirty(handle, bh);
1378                         if (ret < 0) {
1379                                 mlog_errno(ret);
1380                                 goto out;
1381                         }
1382                         brelse(bh);
1383                         bh = NULL;
1384
1385                         /*
1386                          * XXX: do we need to empty all the following
1387                          * blocks in this cluster?
1388                          */
1389                         if (!value_len)
1390                                 break;
1391                 }
1392                 cpos += num_clusters;
1393         }
1394 out:
1395         brelse(bh);
1396
1397         return ret;
1398 }
1399
1400 static int ocfs2_xa_check_space_helper(int needed_space, int free_start,
1401                                        int num_entries)
1402 {
1403         int free_space;
1404
1405         if (!needed_space)
1406                 return 0;
1407
1408         free_space = free_start -
1409                 sizeof(struct ocfs2_xattr_header) -
1410                 (num_entries * sizeof(struct ocfs2_xattr_entry)) -
1411                 OCFS2_XATTR_HEADER_GAP;
1412         if (free_space < 0)
1413                 return -EIO;
1414         if (free_space < needed_space)
1415                 return -ENOSPC;
1416
1417         return 0;
1418 }
1419
1420 static int ocfs2_xa_journal_access(handle_t *handle, struct ocfs2_xa_loc *loc,
1421                                    int type)
1422 {
1423         return loc->xl_ops->xlo_journal_access(handle, loc, type);
1424 }
1425
1426 static void ocfs2_xa_journal_dirty(handle_t *handle, struct ocfs2_xa_loc *loc)
1427 {
1428         loc->xl_ops->xlo_journal_dirty(handle, loc);
1429 }
1430
1431 /* Give a pointer into the storage for the given offset */
1432 static void *ocfs2_xa_offset_pointer(struct ocfs2_xa_loc *loc, int offset)
1433 {
1434         BUG_ON(offset >= loc->xl_size);
1435         return loc->xl_ops->xlo_offset_pointer(loc, offset);
1436 }
1437
1438 /*
1439  * Wipe the name+value pair and allow the storage to reclaim it.  This
1440  * must be followed by either removal of the entry or a call to
1441  * ocfs2_xa_add_namevalue().
1442  */
1443 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1444 {
1445         loc->xl_ops->xlo_wipe_namevalue(loc);
1446 }
1447
1448 /*
1449  * Find lowest offset to a name+value pair.  This is the start of our
1450  * downward-growing free space.
1451  */
1452 static int ocfs2_xa_get_free_start(struct ocfs2_xa_loc *loc)
1453 {
1454         return loc->xl_ops->xlo_get_free_start(loc);
1455 }
1456
1457 /* Can we reuse loc->xl_entry for xi? */
1458 static int ocfs2_xa_can_reuse_entry(struct ocfs2_xa_loc *loc,
1459                                     struct ocfs2_xattr_info *xi)
1460 {
1461         return loc->xl_ops->xlo_can_reuse(loc, xi);
1462 }
1463
1464 /* How much free space is needed to set the new value */
1465 static int ocfs2_xa_check_space(struct ocfs2_xa_loc *loc,
1466                                 struct ocfs2_xattr_info *xi)
1467 {
1468         return loc->xl_ops->xlo_check_space(loc, xi);
1469 }
1470
1471 static void ocfs2_xa_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1472 {
1473         loc->xl_ops->xlo_add_entry(loc, name_hash);
1474         loc->xl_entry->xe_name_hash = cpu_to_le32(name_hash);
1475         /*
1476          * We can't leave the new entry's xe_name_offset at zero or
1477          * add_namevalue() will go nuts.  We set it to the size of our
1478          * storage so that it can never be less than any other entry.
1479          */
1480         loc->xl_entry->xe_name_offset = cpu_to_le16(loc->xl_size);
1481 }
1482
1483 static void ocfs2_xa_add_namevalue(struct ocfs2_xa_loc *loc,
1484                                    struct ocfs2_xattr_info *xi)
1485 {
1486         int size = namevalue_size_xi(xi);
1487         int nameval_offset;
1488         char *nameval_buf;
1489
1490         loc->xl_ops->xlo_add_namevalue(loc, size);
1491         loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
1492         loc->xl_entry->xe_name_len = xi->xi_name_len;
1493         ocfs2_xattr_set_type(loc->xl_entry, xi->xi_name_index);
1494         ocfs2_xattr_set_local(loc->xl_entry,
1495                               xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE);
1496
1497         nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1498         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
1499         memset(nameval_buf, 0, size);
1500         memcpy(nameval_buf, xi->xi_name, xi->xi_name_len);
1501 }
1502
1503 static void ocfs2_xa_fill_value_buf(struct ocfs2_xa_loc *loc,
1504                                     struct ocfs2_xattr_value_buf *vb)
1505 {
1506         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1507         int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
1508
1509         /* Value bufs are for value trees */
1510         BUG_ON(ocfs2_xattr_is_local(loc->xl_entry));
1511         BUG_ON(namevalue_size_xe(loc->xl_entry) !=
1512                (name_size + OCFS2_XATTR_ROOT_SIZE));
1513
1514         loc->xl_ops->xlo_fill_value_buf(loc, vb);
1515         vb->vb_xv =
1516                 (struct ocfs2_xattr_value_root *)ocfs2_xa_offset_pointer(loc,
1517                                                         nameval_offset +
1518                                                         name_size);
1519 }
1520
1521 static int ocfs2_xa_block_journal_access(handle_t *handle,
1522                                          struct ocfs2_xa_loc *loc, int type)
1523 {
1524         struct buffer_head *bh = loc->xl_storage;
1525         ocfs2_journal_access_func access;
1526
1527         if (loc->xl_size == (bh->b_size -
1528                              offsetof(struct ocfs2_xattr_block,
1529                                       xb_attrs.xb_header)))
1530                 access = ocfs2_journal_access_xb;
1531         else
1532                 access = ocfs2_journal_access_di;
1533         return access(handle, INODE_CACHE(loc->xl_inode), bh, type);
1534 }
1535
1536 static void ocfs2_xa_block_journal_dirty(handle_t *handle,
1537                                          struct ocfs2_xa_loc *loc)
1538 {
1539         struct buffer_head *bh = loc->xl_storage;
1540
1541         ocfs2_journal_dirty(handle, bh);
1542 }
1543
1544 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1545                                            int offset)
1546 {
1547         return (char *)loc->xl_header + offset;
1548 }
1549
1550 static int ocfs2_xa_block_can_reuse(struct ocfs2_xa_loc *loc,
1551                                     struct ocfs2_xattr_info *xi)
1552 {
1553         /*
1554          * Block storage is strict.  If the sizes aren't exact, we will
1555          * remove the old one and reinsert the new.
1556          */
1557         return namevalue_size_xe(loc->xl_entry) ==
1558                 namevalue_size_xi(xi);
1559 }
1560
1561 static int ocfs2_xa_block_get_free_start(struct ocfs2_xa_loc *loc)
1562 {
1563         struct ocfs2_xattr_header *xh = loc->xl_header;
1564         int i, count = le16_to_cpu(xh->xh_count);
1565         int offset, free_start = loc->xl_size;
1566
1567         for (i = 0; i < count; i++) {
1568                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1569                 if (offset < free_start)
1570                         free_start = offset;
1571         }
1572
1573         return free_start;
1574 }
1575
1576 static int ocfs2_xa_block_check_space(struct ocfs2_xa_loc *loc,
1577                                       struct ocfs2_xattr_info *xi)
1578 {
1579         int count = le16_to_cpu(loc->xl_header->xh_count);
1580         int free_start = ocfs2_xa_get_free_start(loc);
1581         int needed_space = ocfs2_xi_entry_usage(xi);
1582
1583         /*
1584          * Block storage will reclaim the original entry before inserting
1585          * the new value, so we only need the difference.  If the new
1586          * entry is smaller than the old one, we don't need anything.
1587          */
1588         if (loc->xl_entry) {
1589                 /* Don't need space if we're reusing! */
1590                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1591                         needed_space = 0;
1592                 else
1593                         needed_space -= ocfs2_xe_entry_usage(loc->xl_entry);
1594         }
1595         if (needed_space < 0)
1596                 needed_space = 0;
1597         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1598 }
1599
1600 /*
1601  * Block storage for xattrs keeps the name+value pairs compacted.  When
1602  * we remove one, we have to shift any that preceded it towards the end.
1603  */
1604 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1605 {
1606         int i, offset;
1607         int namevalue_offset, first_namevalue_offset, namevalue_size;
1608         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1609         struct ocfs2_xattr_header *xh = loc->xl_header;
1610         int count = le16_to_cpu(xh->xh_count);
1611
1612         namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1613         namevalue_size = namevalue_size_xe(entry);
1614         first_namevalue_offset = ocfs2_xa_get_free_start(loc);
1615
1616         /* Shift the name+value pairs */
1617         memmove((char *)xh + first_namevalue_offset + namevalue_size,
1618                 (char *)xh + first_namevalue_offset,
1619                 namevalue_offset - first_namevalue_offset);
1620         memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1621
1622         /* Now tell xh->xh_entries about it */
1623         for (i = 0; i < count; i++) {
1624                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1625                 if (offset <= namevalue_offset)
1626                         le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1627                                      namevalue_size);
1628         }
1629
1630         /*
1631          * Note that we don't update xh_free_start or xh_name_value_len
1632          * because they're not used in block-stored xattrs.
1633          */
1634 }
1635
1636 static void ocfs2_xa_block_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1637 {
1638         int count = le16_to_cpu(loc->xl_header->xh_count);
1639         loc->xl_entry = &(loc->xl_header->xh_entries[count]);
1640         le16_add_cpu(&loc->xl_header->xh_count, 1);
1641         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1642 }
1643
1644 static void ocfs2_xa_block_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1645 {
1646         int free_start = ocfs2_xa_get_free_start(loc);
1647
1648         loc->xl_entry->xe_name_offset = cpu_to_le16(free_start - size);
1649 }
1650
1651 static void ocfs2_xa_block_fill_value_buf(struct ocfs2_xa_loc *loc,
1652                                           struct ocfs2_xattr_value_buf *vb)
1653 {
1654         struct buffer_head *bh = loc->xl_storage;
1655
1656         if (loc->xl_size == (bh->b_size -
1657                              offsetof(struct ocfs2_xattr_block,
1658                                       xb_attrs.xb_header)))
1659                 vb->vb_access = ocfs2_journal_access_xb;
1660         else
1661                 vb->vb_access = ocfs2_journal_access_di;
1662         vb->vb_bh = bh;
1663 }
1664
1665 /*
1666  * Operations for xattrs stored in blocks.  This includes inline inode
1667  * storage and unindexed ocfs2_xattr_blocks.
1668  */
1669 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1670         .xlo_journal_access     = ocfs2_xa_block_journal_access,
1671         .xlo_journal_dirty      = ocfs2_xa_block_journal_dirty,
1672         .xlo_offset_pointer     = ocfs2_xa_block_offset_pointer,
1673         .xlo_check_space        = ocfs2_xa_block_check_space,
1674         .xlo_can_reuse          = ocfs2_xa_block_can_reuse,
1675         .xlo_get_free_start     = ocfs2_xa_block_get_free_start,
1676         .xlo_wipe_namevalue     = ocfs2_xa_block_wipe_namevalue,
1677         .xlo_add_entry          = ocfs2_xa_block_add_entry,
1678         .xlo_add_namevalue      = ocfs2_xa_block_add_namevalue,
1679         .xlo_fill_value_buf     = ocfs2_xa_block_fill_value_buf,
1680 };
1681
1682 static int ocfs2_xa_bucket_journal_access(handle_t *handle,
1683                                           struct ocfs2_xa_loc *loc, int type)
1684 {
1685         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1686
1687         return ocfs2_xattr_bucket_journal_access(handle, bucket, type);
1688 }
1689
1690 static void ocfs2_xa_bucket_journal_dirty(handle_t *handle,
1691                                           struct ocfs2_xa_loc *loc)
1692 {
1693         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1694
1695         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
1696 }
1697
1698 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1699                                             int offset)
1700 {
1701         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1702         int block, block_offset;
1703
1704         /* The header is at the front of the bucket */
1705         block = offset >> loc->xl_inode->i_sb->s_blocksize_bits;
1706         block_offset = offset % loc->xl_inode->i_sb->s_blocksize;
1707
1708         return bucket_block(bucket, block) + block_offset;
1709 }
1710
1711 static int ocfs2_xa_bucket_can_reuse(struct ocfs2_xa_loc *loc,
1712                                      struct ocfs2_xattr_info *xi)
1713 {
1714         return namevalue_size_xe(loc->xl_entry) >=
1715                 namevalue_size_xi(xi);
1716 }
1717
1718 static int ocfs2_xa_bucket_get_free_start(struct ocfs2_xa_loc *loc)
1719 {
1720         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1721         return le16_to_cpu(bucket_xh(bucket)->xh_free_start);
1722 }
1723
1724 static int ocfs2_bucket_align_free_start(struct super_block *sb,
1725                                          int free_start, int size)
1726 {
1727         /*
1728          * We need to make sure that the name+value pair fits within
1729          * one block.
1730          */
1731         if (((free_start - size) >> sb->s_blocksize_bits) !=
1732             ((free_start - 1) >> sb->s_blocksize_bits))
1733                 free_start -= free_start % sb->s_blocksize;
1734
1735         return free_start;
1736 }
1737
1738 static int ocfs2_xa_bucket_check_space(struct ocfs2_xa_loc *loc,
1739                                        struct ocfs2_xattr_info *xi)
1740 {
1741         int rc;
1742         int count = le16_to_cpu(loc->xl_header->xh_count);
1743         int free_start = ocfs2_xa_get_free_start(loc);
1744         int needed_space = ocfs2_xi_entry_usage(xi);
1745         int size = namevalue_size_xi(xi);
1746         struct super_block *sb = loc->xl_inode->i_sb;
1747
1748         /*
1749          * Bucket storage does not reclaim name+value pairs it cannot
1750          * reuse.  They live as holes until the bucket fills, and then
1751          * the bucket is defragmented.  However, the bucket can reclaim
1752          * the ocfs2_xattr_entry.
1753          */
1754         if (loc->xl_entry) {
1755                 /* Don't need space if we're reusing! */
1756                 if (ocfs2_xa_can_reuse_entry(loc, xi))
1757                         needed_space = 0;
1758                 else
1759                         needed_space -= sizeof(struct ocfs2_xattr_entry);
1760         }
1761         BUG_ON(needed_space < 0);
1762
1763         if (free_start < size) {
1764                 if (needed_space)
1765                         return -ENOSPC;
1766         } else {
1767                 /*
1768                  * First we check if it would fit in the first place.
1769                  * Below, we align the free start to a block.  This may
1770                  * slide us below the minimum gap.  By checking unaligned
1771                  * first, we avoid that error.
1772                  */
1773                 rc = ocfs2_xa_check_space_helper(needed_space, free_start,
1774                                                  count);
1775                 if (rc)
1776                         return rc;
1777                 free_start = ocfs2_bucket_align_free_start(sb, free_start,
1778                                                            size);
1779         }
1780         return ocfs2_xa_check_space_helper(needed_space, free_start, count);
1781 }
1782
1783 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1784 {
1785         le16_add_cpu(&loc->xl_header->xh_name_value_len,
1786                      -namevalue_size_xe(loc->xl_entry));
1787 }
1788
1789 static void ocfs2_xa_bucket_add_entry(struct ocfs2_xa_loc *loc, u32 name_hash)
1790 {
1791         struct ocfs2_xattr_header *xh = loc->xl_header;
1792         int count = le16_to_cpu(xh->xh_count);
1793         int low = 0, high = count - 1, tmp;
1794         struct ocfs2_xattr_entry *tmp_xe;
1795
1796         /*
1797          * We keep buckets sorted by name_hash, so we need to find
1798          * our insert place.
1799          */
1800         while (low <= high && count) {
1801                 tmp = (low + high) / 2;
1802                 tmp_xe = &xh->xh_entries[tmp];
1803
1804                 if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
1805                         low = tmp + 1;
1806                 else if (name_hash < le32_to_cpu(tmp_xe->xe_name_hash))
1807                         high = tmp - 1;
1808                 else {
1809                         low = tmp;
1810                         break;
1811                 }
1812         }
1813
1814         if (low != count)
1815                 memmove(&xh->xh_entries[low + 1],
1816                         &xh->xh_entries[low],
1817                         ((count - low) * sizeof(struct ocfs2_xattr_entry)));
1818
1819         le16_add_cpu(&xh->xh_count, 1);
1820         loc->xl_entry = &xh->xh_entries[low];
1821         memset(loc->xl_entry, 0, sizeof(struct ocfs2_xattr_entry));
1822 }
1823
1824 static void ocfs2_xa_bucket_add_namevalue(struct ocfs2_xa_loc *loc, int size)
1825 {
1826         int free_start = ocfs2_xa_get_free_start(loc);
1827         struct ocfs2_xattr_header *xh = loc->xl_header;
1828         struct super_block *sb = loc->xl_inode->i_sb;
1829         int nameval_offset;
1830
1831         free_start = ocfs2_bucket_align_free_start(sb, free_start, size);
1832         nameval_offset = free_start - size;
1833         loc->xl_entry->xe_name_offset = cpu_to_le16(nameval_offset);
1834         xh->xh_free_start = cpu_to_le16(nameval_offset);
1835         le16_add_cpu(&xh->xh_name_value_len, size);
1836
1837 }
1838
1839 static void ocfs2_xa_bucket_fill_value_buf(struct ocfs2_xa_loc *loc,
1840                                            struct ocfs2_xattr_value_buf *vb)
1841 {
1842         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1843         struct super_block *sb = loc->xl_inode->i_sb;
1844         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
1845         int size = namevalue_size_xe(loc->xl_entry);
1846         int block_offset = nameval_offset >> sb->s_blocksize_bits;
1847
1848         /* Values are not allowed to straddle block boundaries */
1849         BUG_ON(block_offset !=
1850                ((nameval_offset + size - 1) >> sb->s_blocksize_bits));
1851         /* We expect the bucket to be filled in */
1852         BUG_ON(!bucket->bu_bhs[block_offset]);
1853
1854         vb->vb_access = ocfs2_journal_access;
1855         vb->vb_bh = bucket->bu_bhs[block_offset];
1856 }
1857
1858 /* Operations for xattrs stored in buckets. */
1859 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1860         .xlo_journal_access     = ocfs2_xa_bucket_journal_access,
1861         .xlo_journal_dirty      = ocfs2_xa_bucket_journal_dirty,
1862         .xlo_offset_pointer     = ocfs2_xa_bucket_offset_pointer,
1863         .xlo_check_space        = ocfs2_xa_bucket_check_space,
1864         .xlo_can_reuse          = ocfs2_xa_bucket_can_reuse,
1865         .xlo_get_free_start     = ocfs2_xa_bucket_get_free_start,
1866         .xlo_wipe_namevalue     = ocfs2_xa_bucket_wipe_namevalue,
1867         .xlo_add_entry          = ocfs2_xa_bucket_add_entry,
1868         .xlo_add_namevalue      = ocfs2_xa_bucket_add_namevalue,
1869         .xlo_fill_value_buf     = ocfs2_xa_bucket_fill_value_buf,
1870 };
1871
1872 static unsigned int ocfs2_xa_value_clusters(struct ocfs2_xa_loc *loc)
1873 {
1874         struct ocfs2_xattr_value_buf vb;
1875
1876         if (ocfs2_xattr_is_local(loc->xl_entry))
1877                 return 0;
1878
1879         ocfs2_xa_fill_value_buf(loc, &vb);
1880         return le32_to_cpu(vb.vb_xv->xr_clusters);
1881 }
1882
1883 static int ocfs2_xa_value_truncate(struct ocfs2_xa_loc *loc, u64 bytes,
1884                                    struct ocfs2_xattr_set_ctxt *ctxt)
1885 {
1886         int trunc_rc, access_rc;
1887         struct ocfs2_xattr_value_buf vb;
1888
1889         ocfs2_xa_fill_value_buf(loc, &vb);
1890         trunc_rc = ocfs2_xattr_value_truncate(loc->xl_inode, &vb, bytes,
1891                                               ctxt);
1892
1893         /*
1894          * The caller of ocfs2_xa_value_truncate() has already called
1895          * ocfs2_xa_journal_access on the loc.  However, The truncate code
1896          * calls ocfs2_extend_trans().  This may commit the previous
1897          * transaction and open a new one.  If this is a bucket, truncate
1898          * could leave only vb->vb_bh set up for journaling.  Meanwhile,
1899          * the caller is expecting to dirty the entire bucket.  So we must
1900          * reset the journal work.  We do this even if truncate has failed,
1901          * as it could have failed after committing the extend.
1902          */
1903         access_rc = ocfs2_xa_journal_access(ctxt->handle, loc,
1904                                             OCFS2_JOURNAL_ACCESS_WRITE);
1905
1906         /* Errors in truncate take precedence */
1907         return trunc_rc ? trunc_rc : access_rc;
1908 }
1909
1910 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1911 {
1912         int index, count;
1913         struct ocfs2_xattr_header *xh = loc->xl_header;
1914         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1915
1916         ocfs2_xa_wipe_namevalue(loc);
1917         loc->xl_entry = NULL;
1918
1919         le16_add_cpu(&xh->xh_count, -1);
1920         count = le16_to_cpu(xh->xh_count);
1921
1922         /*
1923          * Only zero out the entry if there are more remaining.  This is
1924          * important for an empty bucket, as it keeps track of the
1925          * bucket's hash value.  It doesn't hurt empty block storage.
1926          */
1927         if (count) {
1928                 index = ((char *)entry - (char *)&xh->xh_entries) /
1929                         sizeof(struct ocfs2_xattr_entry);
1930                 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1931                         (count - index) * sizeof(struct ocfs2_xattr_entry));
1932                 memset(&xh->xh_entries[count], 0,
1933                        sizeof(struct ocfs2_xattr_entry));
1934         }
1935 }
1936
1937 /*
1938  * If we have a problem adjusting the size of an external value during
1939  * ocfs2_xa_prepare_entry() or ocfs2_xa_remove(), we may have an xattr
1940  * in an intermediate state.  For example, the value may be partially
1941  * truncated.
1942  *
1943  * If the value tree hasn't changed, the extend/truncate went nowhere.
1944  * We have nothing to do.  The caller can treat it as a straight error.
1945  *
1946  * If the value tree got partially truncated, we now have a corrupted
1947  * extended attribute.  We're going to wipe its entry and leak the
1948  * clusters.  Better to leak some storage than leave a corrupt entry.
1949  *
1950  * If the value tree grew, it obviously didn't grow enough for the
1951  * new entry.  We're not going to try and reclaim those clusters either.
1952  * If there was already an external value there (orig_clusters != 0),
1953  * the new clusters are attached safely and we can just leave the old
1954  * value in place.  If there was no external value there, we remove
1955  * the entry.
1956  *
1957  * This way, the xattr block we store in the journal will be consistent.
1958  * If the size change broke because of the journal, no changes will hit
1959  * disk anyway.
1960  */
1961 static void ocfs2_xa_cleanup_value_truncate(struct ocfs2_xa_loc *loc,
1962                                             const char *what,
1963                                             unsigned int orig_clusters)
1964 {
1965         unsigned int new_clusters = ocfs2_xa_value_clusters(loc);
1966         char *nameval_buf = ocfs2_xa_offset_pointer(loc,
1967                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
1968
1969         if (new_clusters < orig_clusters) {
1970                 mlog(ML_ERROR,
1971                      "Partial truncate while %s xattr %.*s.  Leaking "
1972                      "%u clusters and removing the entry\n",
1973                      what, loc->xl_entry->xe_name_len, nameval_buf,
1974                      orig_clusters - new_clusters);
1975                 ocfs2_xa_remove_entry(loc);
1976         } else if (!orig_clusters) {
1977                 mlog(ML_ERROR,
1978                      "Unable to allocate an external value for xattr "
1979                      "%.*s safely.  Leaking %u clusters and removing the "
1980                      "entry\n",
1981                      loc->xl_entry->xe_name_len, nameval_buf,
1982                      new_clusters - orig_clusters);
1983                 ocfs2_xa_remove_entry(loc);
1984         } else if (new_clusters > orig_clusters)
1985                 mlog(ML_ERROR,
1986                      "Unable to grow xattr %.*s safely.  %u new clusters "
1987                      "have been added, but the value will not be "
1988                      "modified\n",
1989                      loc->xl_entry->xe_name_len, nameval_buf,
1990                      new_clusters - orig_clusters);
1991 }
1992
1993 static int ocfs2_xa_remove(struct ocfs2_xa_loc *loc,
1994                            struct ocfs2_xattr_set_ctxt *ctxt)
1995 {
1996         int rc = 0;
1997         unsigned int orig_clusters;
1998
1999         if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2000                 orig_clusters = ocfs2_xa_value_clusters(loc);
2001                 rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2002                 if (rc) {
2003                         mlog_errno(rc);
2004                         /*
2005                          * Since this is remove, we can return 0 if
2006                          * ocfs2_xa_cleanup_value_truncate() is going to
2007                          * wipe the entry anyway.  So we check the
2008                          * cluster count as well.
2009                          */
2010                         if (orig_clusters != ocfs2_xa_value_clusters(loc))
2011                                 rc = 0;
2012                         ocfs2_xa_cleanup_value_truncate(loc, "removing",
2013                                                         orig_clusters);
2014                         if (rc)
2015                                 goto out;
2016                 }
2017         }
2018
2019         ocfs2_xa_remove_entry(loc);
2020
2021 out:
2022         return rc;
2023 }
2024
2025 static void ocfs2_xa_install_value_root(struct ocfs2_xa_loc *loc)
2026 {
2027         int name_size = OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len);
2028         char *nameval_buf;
2029
2030         nameval_buf = ocfs2_xa_offset_pointer(loc,
2031                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
2032         memcpy(nameval_buf + name_size, &def_xv, OCFS2_XATTR_ROOT_SIZE);
2033 }
2034
2035 /*
2036  * Take an existing entry and make it ready for the new value.  This
2037  * won't allocate space, but it may free space.  It should be ready for
2038  * ocfs2_xa_prepare_entry() to finish the work.
2039  */
2040 static int ocfs2_xa_reuse_entry(struct ocfs2_xa_loc *loc,
2041                                 struct ocfs2_xattr_info *xi,
2042                                 struct ocfs2_xattr_set_ctxt *ctxt)
2043 {
2044         int rc = 0;
2045         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2046         unsigned int orig_clusters;
2047         char *nameval_buf;
2048         int xe_local = ocfs2_xattr_is_local(loc->xl_entry);
2049         int xi_local = xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE;
2050
2051         BUG_ON(OCFS2_XATTR_SIZE(loc->xl_entry->xe_name_len) !=
2052                name_size);
2053
2054         nameval_buf = ocfs2_xa_offset_pointer(loc,
2055                                 le16_to_cpu(loc->xl_entry->xe_name_offset));
2056         if (xe_local) {
2057                 memset(nameval_buf + name_size, 0,
2058                        namevalue_size_xe(loc->xl_entry) - name_size);
2059                 if (!xi_local)
2060                         ocfs2_xa_install_value_root(loc);
2061         } else {
2062                 orig_clusters = ocfs2_xa_value_clusters(loc);
2063                 if (xi_local) {
2064                         rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2065                         if (rc < 0)
2066                                 mlog_errno(rc);
2067                         else
2068                                 memset(nameval_buf + name_size, 0,
2069                                        namevalue_size_xe(loc->xl_entry) -
2070                                        name_size);
2071                 } else if (le64_to_cpu(loc->xl_entry->xe_value_size) >
2072                            xi->xi_value_len) {
2073                         rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len,
2074                                                      ctxt);
2075                         if (rc < 0)
2076                                 mlog_errno(rc);
2077                 }
2078
2079                 if (rc) {
2080                         ocfs2_xa_cleanup_value_truncate(loc, "reusing",
2081                                                         orig_clusters);
2082                         goto out;
2083                 }
2084         }
2085
2086         loc->xl_entry->xe_value_size = cpu_to_le64(xi->xi_value_len);
2087         ocfs2_xattr_set_local(loc->xl_entry, xi_local);
2088
2089 out:
2090         return rc;
2091 }
2092
2093 /*
2094  * Prepares loc->xl_entry to receive the new xattr.  This includes
2095  * properly setting up the name+value pair region.  If loc->xl_entry
2096  * already exists, it will take care of modifying it appropriately.
2097  *
2098  * Note that this modifies the data.  You did journal_access already,
2099  * right?
2100  */
2101 static int ocfs2_xa_prepare_entry(struct ocfs2_xa_loc *loc,
2102                                   struct ocfs2_xattr_info *xi,
2103                                   u32 name_hash,
2104                                   struct ocfs2_xattr_set_ctxt *ctxt)
2105 {
2106         int rc = 0;
2107         unsigned int orig_clusters;
2108         __le64 orig_value_size = 0;
2109
2110         rc = ocfs2_xa_check_space(loc, xi);
2111         if (rc)
2112                 goto out;
2113
2114         if (loc->xl_entry) {
2115                 if (ocfs2_xa_can_reuse_entry(loc, xi)) {
2116                         orig_value_size = loc->xl_entry->xe_value_size;
2117                         rc = ocfs2_xa_reuse_entry(loc, xi, ctxt);
2118                         if (rc)
2119                                 goto out;
2120                         goto alloc_value;
2121                 }
2122
2123                 if (!ocfs2_xattr_is_local(loc->xl_entry)) {
2124                         orig_clusters = ocfs2_xa_value_clusters(loc);
2125                         rc = ocfs2_xa_value_truncate(loc, 0, ctxt);
2126                         if (rc) {
2127                                 mlog_errno(rc);
2128                                 ocfs2_xa_cleanup_value_truncate(loc,
2129                                                                 "overwriting",
2130                                                                 orig_clusters);
2131                                 goto out;
2132                         }
2133                 }
2134                 ocfs2_xa_wipe_namevalue(loc);
2135         } else
2136                 ocfs2_xa_add_entry(loc, name_hash);
2137
2138         /*
2139          * If we get here, we have a blank entry.  Fill it.  We grow our
2140          * name+value pair back from the end.
2141          */
2142         ocfs2_xa_add_namevalue(loc, xi);
2143         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2144                 ocfs2_xa_install_value_root(loc);
2145
2146 alloc_value:
2147         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2148                 orig_clusters = ocfs2_xa_value_clusters(loc);
2149                 rc = ocfs2_xa_value_truncate(loc, xi->xi_value_len, ctxt);
2150                 if (rc < 0) {
2151                         /*
2152                          * If we tried to grow an existing external value,
2153                          * ocfs2_xa_cleanuP-value_truncate() is going to
2154                          * let it stand.  We have to restore its original
2155                          * value size.
2156                          */
2157                         loc->xl_entry->xe_value_size = orig_value_size;
2158                         ocfs2_xa_cleanup_value_truncate(loc, "growing",
2159                                                         orig_clusters);
2160                         mlog_errno(rc);
2161                 }
2162         }
2163
2164 out:
2165         return rc;
2166 }
2167
2168 /*
2169  * Store the value portion of the name+value pair.  This will skip
2170  * values that are stored externally.  Their tree roots were set up
2171  * by ocfs2_xa_prepare_entry().
2172  */
2173 static int ocfs2_xa_store_value(struct ocfs2_xa_loc *loc,
2174                                 struct ocfs2_xattr_info *xi,
2175                                 struct ocfs2_xattr_set_ctxt *ctxt)
2176 {
2177         int rc = 0;
2178         int nameval_offset = le16_to_cpu(loc->xl_entry->xe_name_offset);
2179         int name_size = OCFS2_XATTR_SIZE(xi->xi_name_len);
2180         char *nameval_buf;
2181         struct ocfs2_xattr_value_buf vb;
2182
2183         nameval_buf = ocfs2_xa_offset_pointer(loc, nameval_offset);
2184         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2185                 ocfs2_xa_fill_value_buf(loc, &vb);
2186                 rc = __ocfs2_xattr_set_value_outside(loc->xl_inode,
2187                                                      ctxt->handle, &vb,
2188                                                      xi->xi_value,
2189                                                      xi->xi_value_len);
2190         } else
2191                 memcpy(nameval_buf + name_size, xi->xi_value, xi->xi_value_len);
2192
2193         return rc;
2194 }
2195
2196 static int ocfs2_xa_set(struct ocfs2_xa_loc *loc,
2197                         struct ocfs2_xattr_info *xi,
2198                         struct ocfs2_xattr_set_ctxt *ctxt)
2199 {
2200         int ret;
2201         u32 name_hash = ocfs2_xattr_name_hash(loc->xl_inode, xi->xi_name,
2202                                               xi->xi_name_len);
2203
2204         ret = ocfs2_xa_journal_access(ctxt->handle, loc,
2205                                       OCFS2_JOURNAL_ACCESS_WRITE);
2206         if (ret) {
2207                 mlog_errno(ret);
2208                 goto out;
2209         }
2210
2211         /*
2212          * From here on out, everything is going to modify the buffer a
2213          * little.  Errors are going to leave the xattr header in a
2214          * sane state.  Thus, even with errors we dirty the sucker.
2215          */
2216
2217         /* Don't worry, we are never called with !xi_value and !xl_entry */
2218         if (!xi->xi_value) {
2219                 ret = ocfs2_xa_remove(loc, ctxt);
2220                 goto out_dirty;
2221         }
2222
2223         ret = ocfs2_xa_prepare_entry(loc, xi, name_hash, ctxt);
2224         if (ret) {
2225                 if (ret != -ENOSPC)
2226                         mlog_errno(ret);
2227                 goto out_dirty;
2228         }
2229
2230         ret = ocfs2_xa_store_value(loc, xi, ctxt);
2231         if (ret)
2232                 mlog_errno(ret);
2233
2234 out_dirty:
2235         ocfs2_xa_journal_dirty(ctxt->handle, loc);
2236
2237 out:
2238         return ret;
2239 }
2240
2241 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
2242                                      struct inode *inode,
2243                                      struct buffer_head *bh,
2244                                      struct ocfs2_xattr_entry *entry)
2245 {
2246         struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
2247
2248         BUG_ON(!(OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL));
2249
2250         loc->xl_inode = inode;
2251         loc->xl_ops = &ocfs2_xa_block_loc_ops;
2252         loc->xl_storage = bh;
2253         loc->xl_entry = entry;
2254         loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
2255         loc->xl_header =
2256                 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
2257                                               loc->xl_size);
2258 }
2259
2260 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
2261                                           struct inode *inode,
2262                                           struct buffer_head *bh,
2263                                           struct ocfs2_xattr_entry *entry)
2264 {
2265         struct ocfs2_xattr_block *xb =
2266                 (struct ocfs2_xattr_block *)bh->b_data;
2267
2268         BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
2269
2270         loc->xl_inode = inode;
2271         loc->xl_ops = &ocfs2_xa_block_loc_ops;
2272         loc->xl_storage = bh;
2273         loc->xl_header = &(xb->xb_attrs.xb_header);
2274         loc->xl_entry = entry;
2275         loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
2276                                              xb_attrs.xb_header);
2277 }
2278
2279 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
2280                                            struct ocfs2_xattr_bucket *bucket,
2281                                            struct ocfs2_xattr_entry *entry)
2282 {
2283         loc->xl_inode = bucket->bu_inode;
2284         loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
2285         loc->xl_storage = bucket;
2286         loc->xl_header = bucket_xh(bucket);
2287         loc->xl_entry = entry;
2288         loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
2289 }
2290
2291 /*
2292  * In xattr remove, if it is stored outside and refcounted, we may have
2293  * the chance to split the refcount tree. So need the allocators.
2294  */
2295 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
2296                                         struct ocfs2_xattr_value_root *xv,
2297                                         struct ocfs2_caching_info *ref_ci,
2298                                         struct buffer_head *ref_root_bh,
2299                                         struct ocfs2_alloc_context **meta_ac,
2300                                         int *ref_credits)
2301 {
2302         int ret, meta_add = 0;
2303         u32 p_cluster, num_clusters;
2304         unsigned int ext_flags;
2305
2306         *ref_credits = 0;
2307         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
2308                                        &num_clusters,
2309                                        &xv->xr_list,
2310                                        &ext_flags);
2311         if (ret) {
2312                 mlog_errno(ret);
2313                 goto out;
2314         }
2315
2316         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2317                 goto out;
2318
2319         ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2320                                                  ref_root_bh, xv,
2321                                                  &meta_add, ref_credits);
2322         if (ret) {
2323                 mlog_errno(ret);
2324                 goto out;
2325         }
2326
2327         ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2328                                                 meta_add, meta_ac);
2329         if (ret)
2330                 mlog_errno(ret);
2331
2332 out:
2333         return ret;
2334 }
2335
2336 static int ocfs2_remove_value_outside(struct inode*inode,
2337                                       struct ocfs2_xattr_value_buf *vb,
2338                                       struct ocfs2_xattr_header *header,
2339                                       struct ocfs2_caching_info *ref_ci,
2340                                       struct buffer_head *ref_root_bh)
2341 {
2342         int ret = 0, i, ref_credits;
2343         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2344         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2345         void *val;
2346
2347         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2348
2349         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2350                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2351
2352                 if (ocfs2_xattr_is_local(entry))
2353                         continue;
2354
2355                 val = (void *)header +
2356                         le16_to_cpu(entry->xe_name_offset);
2357                 vb->vb_xv = (struct ocfs2_xattr_value_root *)
2358                         (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2359
2360                 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2361                                                          ref_ci, ref_root_bh,
2362                                                          &ctxt.meta_ac,
2363                                                          &ref_credits);
2364
2365                 ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2366                                         ocfs2_remove_extent_credits(osb->sb));
2367                 if (IS_ERR(ctxt.handle)) {
2368                         ret = PTR_ERR(ctxt.handle);
2369                         mlog_errno(ret);
2370                         break;
2371                 }
2372
2373                 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2374                 if (ret < 0) {
2375                         mlog_errno(ret);
2376                         break;
2377                 }
2378
2379                 ocfs2_commit_trans(osb, ctxt.handle);
2380                 if (ctxt.meta_ac) {
2381                         ocfs2_free_alloc_context(ctxt.meta_ac);
2382                         ctxt.meta_ac = NULL;
2383                 }
2384         }
2385
2386         if (ctxt.meta_ac)
2387                 ocfs2_free_alloc_context(ctxt.meta_ac);
2388         ocfs2_schedule_truncate_log_flush(osb, 1);
2389         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2390         return ret;
2391 }
2392
2393 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2394                                     struct buffer_head *di_bh,
2395                                     struct ocfs2_caching_info *ref_ci,
2396                                     struct buffer_head *ref_root_bh)
2397 {
2398
2399         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2400         struct ocfs2_xattr_header *header;
2401         int ret;
2402         struct ocfs2_xattr_value_buf vb = {
2403                 .vb_bh = di_bh,
2404                 .vb_access = ocfs2_journal_access_di,
2405         };
2406
2407         header = (struct ocfs2_xattr_header *)
2408                  ((void *)di + inode->i_sb->s_blocksize -
2409                  le16_to_cpu(di->i_xattr_inline_size));
2410
2411         ret = ocfs2_remove_value_outside(inode, &vb, header,
2412                                          ref_ci, ref_root_bh);
2413
2414         return ret;
2415 }
2416
2417 struct ocfs2_rm_xattr_bucket_para {
2418         struct ocfs2_caching_info *ref_ci;
2419         struct buffer_head *ref_root_bh;
2420 };
2421
2422 static int ocfs2_xattr_block_remove(struct inode *inode,
2423                                     struct buffer_head *blk_bh,
2424                                     struct ocfs2_caching_info *ref_ci,
2425                                     struct buffer_head *ref_root_bh)
2426 {
2427         struct ocfs2_xattr_block *xb;
2428         int ret = 0;
2429         struct ocfs2_xattr_value_buf vb = {
2430                 .vb_bh = blk_bh,
2431                 .vb_access = ocfs2_journal_access_xb,
2432         };
2433         struct ocfs2_rm_xattr_bucket_para args = {
2434                 .ref_ci = ref_ci,
2435                 .ref_root_bh = ref_root_bh,
2436         };
2437
2438         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2439         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2440                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2441                 ret = ocfs2_remove_value_outside(inode, &vb, header,
2442                                                  ref_ci, ref_root_bh);
2443         } else
2444                 ret = ocfs2_iterate_xattr_index_block(inode,
2445                                                 blk_bh,
2446                                                 ocfs2_rm_xattr_cluster,
2447                                                 &args);
2448
2449         return ret;
2450 }
2451
2452 static int ocfs2_xattr_free_block(struct inode *inode,
2453                                   u64 block,
2454                                   struct ocfs2_caching_info *ref_ci,
2455                                   struct buffer_head *ref_root_bh)
2456 {
2457         struct inode *xb_alloc_inode;
2458         struct buffer_head *xb_alloc_bh = NULL;
2459         struct buffer_head *blk_bh = NULL;
2460         struct ocfs2_xattr_block *xb;
2461         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2462         handle_t *handle;
2463         int ret = 0;
2464         u64 blk, bg_blkno;
2465         u16 bit;
2466
2467         ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2468         if (ret < 0) {
2469                 mlog_errno(ret);
2470                 goto out;
2471         }
2472
2473         ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2474         if (ret < 0) {
2475                 mlog_errno(ret);
2476                 goto out;
2477         }
2478
2479         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2480         blk = le64_to_cpu(xb->xb_blkno);
2481         bit = le16_to_cpu(xb->xb_suballoc_bit);
2482         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2483
2484         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2485                                 EXTENT_ALLOC_SYSTEM_INODE,
2486                                 le16_to_cpu(xb->xb_suballoc_slot));
2487         if (!xb_alloc_inode) {
2488                 ret = -ENOMEM;
2489                 mlog_errno(ret);
2490                 goto out;
2491         }
2492         mutex_lock(&xb_alloc_inode->i_mutex);
2493
2494         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2495         if (ret < 0) {
2496                 mlog_errno(ret);
2497                 goto out_mutex;
2498         }
2499
2500         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2501         if (IS_ERR(handle)) {
2502                 ret = PTR_ERR(handle);
2503                 mlog_errno(ret);
2504                 goto out_unlock;
2505         }
2506
2507         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2508                                        bit, bg_blkno, 1);
2509         if (ret < 0)
2510                 mlog_errno(ret);
2511
2512         ocfs2_commit_trans(osb, handle);
2513 out_unlock:
2514         ocfs2_inode_unlock(xb_alloc_inode, 1);
2515         brelse(xb_alloc_bh);
2516 out_mutex:
2517         mutex_unlock(&xb_alloc_inode->i_mutex);
2518         iput(xb_alloc_inode);
2519 out:
2520         brelse(blk_bh);
2521         return ret;
2522 }
2523
2524 /*
2525  * ocfs2_xattr_remove()
2526  *
2527  * Free extended attribute resources associated with this inode.
2528  */
2529 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2530 {
2531         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2532         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2533         struct ocfs2_refcount_tree *ref_tree = NULL;
2534         struct buffer_head *ref_root_bh = NULL;
2535         struct ocfs2_caching_info *ref_ci = NULL;
2536         handle_t *handle;
2537         int ret;
2538
2539         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2540                 return 0;
2541
2542         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2543                 return 0;
2544
2545         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2546                 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2547                                                le64_to_cpu(di->i_refcount_loc),
2548                                                1, &ref_tree, &ref_root_bh);
2549                 if (ret) {
2550                         mlog_errno(ret);
2551                         goto out;
2552                 }
2553                 ref_ci = &ref_tree->rf_ci;
2554
2555         }
2556
2557         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2558                 ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2559                                                ref_ci, ref_root_bh);
2560                 if (ret < 0) {
2561                         mlog_errno(ret);
2562                         goto out;
2563                 }
2564         }
2565
2566         if (di->i_xattr_loc) {
2567                 ret = ocfs2_xattr_free_block(inode,
2568                                              le64_to_cpu(di->i_xattr_loc),
2569                                              ref_ci, ref_root_bh);
2570                 if (ret < 0) {
2571                         mlog_errno(ret);
2572                         goto out;
2573                 }
2574         }
2575
2576         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2577                                    OCFS2_INODE_UPDATE_CREDITS);
2578         if (IS_ERR(handle)) {
2579                 ret = PTR_ERR(handle);
2580                 mlog_errno(ret);
2581                 goto out;
2582         }
2583         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2584                                       OCFS2_JOURNAL_ACCESS_WRITE);
2585         if (ret) {
2586                 mlog_errno(ret);
2587                 goto out_commit;
2588         }
2589
2590         di->i_xattr_loc = 0;
2591
2592         spin_lock(&oi->ip_lock);
2593         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2594         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2595         spin_unlock(&oi->ip_lock);
2596
2597         ret = ocfs2_journal_dirty(handle, di_bh);
2598         if (ret < 0)
2599                 mlog_errno(ret);
2600 out_commit:
2601         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2602 out:
2603         if (ref_tree)
2604                 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2605         brelse(ref_root_bh);
2606         return ret;
2607 }
2608
2609 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2610                                         struct ocfs2_dinode *di)
2611 {
2612         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2613         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2614         int free;
2615
2616         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2617                 return 0;
2618
2619         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2620                 struct ocfs2_inline_data *idata = &di->id2.i_data;
2621                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2622         } else if (ocfs2_inode_is_fast_symlink(inode)) {
2623                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
2624                         le64_to_cpu(di->i_size);
2625         } else {
2626                 struct ocfs2_extent_list *el = &di->id2.i_list;
2627                 free = (le16_to_cpu(el->l_count) -
2628                         le16_to_cpu(el->l_next_free_rec)) *
2629                         sizeof(struct ocfs2_extent_rec);
2630         }
2631         if (free >= xattrsize)
2632                 return 1;
2633
2634         return 0;
2635 }
2636
2637 /*
2638  * ocfs2_xattr_ibody_find()
2639  *
2640  * Find extended attribute in inode block and
2641  * fill search info into struct ocfs2_xattr_search.
2642  */
2643 static int ocfs2_xattr_ibody_find(struct inode *inode,
2644                                   int name_index,
2645                                   const char *name,
2646                                   struct ocfs2_xattr_search *xs)
2647 {
2648         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2649         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2650         int ret;
2651         int has_space = 0;
2652
2653         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2654                 return 0;
2655
2656         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2657                 down_read(&oi->ip_alloc_sem);
2658                 has_space = ocfs2_xattr_has_space_inline(inode, di);
2659                 up_read(&oi->ip_alloc_sem);
2660                 if (!has_space)
2661                         return 0;
2662         }
2663
2664         xs->xattr_bh = xs->inode_bh;
2665         xs->end = (void *)di + inode->i_sb->s_blocksize;
2666         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2667                 xs->header = (struct ocfs2_xattr_header *)
2668                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2669         else
2670                 xs->header = (struct ocfs2_xattr_header *)
2671                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2672         xs->base = (void *)xs->header;
2673         xs->here = xs->header->xh_entries;
2674
2675         /* Find the named attribute. */
2676         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2677                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2678                 if (ret && ret != -ENODATA)
2679                         return ret;
2680                 xs->not_found = ret;
2681         }
2682
2683         return 0;
2684 }
2685
2686 static int ocfs2_xattr_ibody_init(struct inode *inode,
2687                                   struct buffer_head *di_bh,
2688                                   struct ocfs2_xattr_set_ctxt *ctxt)
2689 {
2690         int ret;
2691         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2692         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2693         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2694         unsigned int xattrsize = osb->s_xattr_inline_size;
2695
2696         if (!ocfs2_xattr_has_space_inline(inode, di)) {
2697                 ret = -ENOSPC;
2698                 goto out;
2699         }
2700
2701         ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode), di_bh,
2702                                       OCFS2_JOURNAL_ACCESS_WRITE);
2703         if (ret) {
2704                 mlog_errno(ret);
2705                 goto out;
2706         }
2707
2708         /*
2709          * Adjust extent record count or inline data size
2710          * to reserve space for extended attribute.
2711          */
2712         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2713                 struct ocfs2_inline_data *idata = &di->id2.i_data;
2714                 le16_add_cpu(&idata->id_count, -xattrsize);
2715         } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
2716                 struct ocfs2_extent_list *el = &di->id2.i_list;
2717                 le16_add_cpu(&el->l_count, -(xattrsize /
2718                                              sizeof(struct ocfs2_extent_rec)));
2719         }
2720         di->i_xattr_inline_size = cpu_to_le16(xattrsize);
2721
2722         spin_lock(&oi->ip_lock);
2723         oi->ip_dyn_features |= OCFS2_INLINE_XATTR_FL|OCFS2_HAS_XATTR_FL;
2724         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2725         spin_unlock(&oi->ip_lock);
2726
2727         ret = ocfs2_journal_dirty(ctxt->handle, di_bh);
2728         if (ret < 0)
2729                 mlog_errno(ret);
2730
2731 out:
2732         return ret;
2733 }
2734
2735 /*
2736  * ocfs2_xattr_ibody_set()
2737  *
2738  * Set, replace or remove an extended attribute into inode block.
2739  *
2740  */
2741 static int ocfs2_xattr_ibody_set(struct inode *inode,
2742                                  struct ocfs2_xattr_info *xi,
2743                                  struct ocfs2_xattr_search *xs,
2744                                  struct ocfs2_xattr_set_ctxt *ctxt)
2745 {
2746         int ret;
2747         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2748         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2749         struct ocfs2_xa_loc loc;
2750
2751         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2752                 return -ENOSPC;
2753
2754         down_write(&oi->ip_alloc_sem);
2755         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2756                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
2757                         ret = -ENOSPC;
2758                         goto out;
2759                 }
2760         }
2761
2762         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2763                 ret = ocfs2_xattr_ibody_init(inode, xs->inode_bh, ctxt);
2764                 if (ret) {
2765                         if (ret != -ENOSPC)
2766                                 mlog_errno(ret);
2767                         goto out;
2768                 }
2769         }
2770
2771         ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
2772                                  xs->not_found ? NULL : xs->here);
2773         ret = ocfs2_xa_set(&loc, xi, ctxt);
2774         if (ret) {
2775                 if (ret != -ENOSPC)
2776                         mlog_errno(ret);
2777                 goto out;
2778         }
2779         xs->here = loc.xl_entry;
2780
2781 out:
2782         up_write(&oi->ip_alloc_sem);
2783
2784         return ret;
2785 }
2786
2787 /*
2788  * ocfs2_xattr_block_find()
2789  *
2790  * Find extended attribute in external block and
2791  * fill search info into struct ocfs2_xattr_search.
2792  */
2793 static int ocfs2_xattr_block_find(struct inode *inode,
2794                                   int name_index,
2795                                   const char *name,
2796                                   struct ocfs2_xattr_search *xs)
2797 {
2798         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2799         struct buffer_head *blk_bh = NULL;
2800         struct ocfs2_xattr_block *xb;
2801         int ret = 0;
2802
2803         if (!di->i_xattr_loc)
2804                 return ret;
2805
2806         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2807                                      &blk_bh);
2808         if (ret < 0) {
2809                 mlog_errno(ret);
2810                 return ret;
2811         }
2812
2813         xs->xattr_bh = blk_bh;
2814         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2815
2816         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2817                 xs->header = &xb->xb_attrs.xb_header;
2818                 xs->base = (void *)xs->header;
2819                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2820                 xs->here = xs->header->xh_entries;
2821
2822                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2823         } else
2824                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2825                                                    name_index,
2826                                                    name, xs);
2827
2828         if (ret && ret != -ENODATA) {
2829                 xs->xattr_bh = NULL;
2830                 goto cleanup;
2831         }
2832         xs->not_found = ret;
2833         return 0;
2834 cleanup:
2835         brelse(blk_bh);
2836
2837         return ret;
2838 }
2839
2840 static int ocfs2_create_xattr_block(struct inode *inode,
2841                                     struct buffer_head *inode_bh,
2842                                     struct ocfs2_xattr_set_ctxt *ctxt,
2843                                     int indexed,
2844                                     struct buffer_head **ret_bh)
2845 {
2846         int ret;
2847         u16 suballoc_bit_start;
2848         u32 num_got;
2849         u64 first_blkno;
2850         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2851         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2852         struct buffer_head *new_bh = NULL;
2853         struct ocfs2_xattr_block *xblk;
2854
2855         ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
2856                                       inode_bh, OCFS2_JOURNAL_ACCESS_CREATE);
2857         if (ret < 0) {
2858                 mlog_errno(ret);
2859                 goto end;
2860         }
2861
2862         ret = ocfs2_claim_metadata(osb, ctxt->handle, ctxt->meta_ac, 1,
2863                                    &suballoc_bit_start, &num_got,
2864                                    &first_blkno);
2865         if (ret < 0) {
2866                 mlog_errno(ret);
2867                 goto end;
2868         }
2869
2870         new_bh = sb_getblk(inode->i_sb, first_blkno);
2871         ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2872
2873         ret = ocfs2_journal_access_xb(ctxt->handle, INODE_CACHE(inode),
2874                                       new_bh,
2875                                       OCFS2_JOURNAL_ACCESS_CREATE);
2876         if (ret < 0) {
2877                 mlog_errno(ret);
2878                 goto end;
2879         }
2880
2881         /* Initialize ocfs2_xattr_block */
2882         xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2883         memset(xblk, 0, inode->i_sb->s_blocksize);
2884         strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2885         xblk->xb_suballoc_slot = cpu_to_le16(ctxt->meta_ac->ac_alloc_slot);
2886         xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2887         xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2888         xblk->xb_blkno = cpu_to_le64(first_blkno);
2889         if (indexed) {
2890                 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2891                 xr->xt_clusters = cpu_to_le32(1);
2892                 xr->xt_last_eb_blk = 0;
2893                 xr->xt_list.l_tree_depth = 0;
2894                 xr->xt_list.l_count = cpu_to_le16(
2895                                         ocfs2_xattr_recs_per_xb(inode->i_sb));
2896                 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2897                 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2898         }
2899         ocfs2_journal_dirty(ctxt->handle, new_bh);
2900
2901         /* Add it to the inode */
2902         di->i_xattr_loc = cpu_to_le64(first_blkno);
2903
2904         spin_lock(&OCFS2_I(inode)->ip_lock);
2905         OCFS2_I(inode)->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
2906         di->i_dyn_features = cpu_to_le16(OCFS2_I(inode)->ip_dyn_features);
2907         spin_unlock(&OCFS2_I(inode)->ip_lock);
2908
2909         ocfs2_journal_dirty(ctxt->handle, inode_bh);
2910
2911         *ret_bh = new_bh;
2912         new_bh = NULL;
2913
2914 end:
2915         brelse(new_bh);
2916         return ret;
2917 }
2918
2919 /*
2920  * ocfs2_xattr_block_set()
2921  *
2922  * Set, replace or remove an extended attribute into external block.
2923  *
2924  */
2925 static int ocfs2_xattr_block_set(struct inode *inode,
2926                                  struct ocfs2_xattr_info *xi,
2927                                  struct ocfs2_xattr_search *xs,
2928                                  struct ocfs2_xattr_set_ctxt *ctxt)
2929 {
2930         struct buffer_head *new_bh = NULL;
2931         struct ocfs2_xattr_block *xblk = NULL;
2932         int ret;
2933         struct ocfs2_xa_loc loc;
2934
2935         if (!xs->xattr_bh) {
2936                 ret = ocfs2_create_xattr_block(inode, xs->inode_bh, ctxt,
2937                                                0, &new_bh);
2938                 if (ret) {
2939                         mlog_errno(ret);
2940                         goto end;
2941                 }
2942
2943                 xs->xattr_bh = new_bh;
2944                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2945                 xs->header = &xblk->xb_attrs.xb_header;
2946                 xs->base = (void *)xs->header;
2947                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2948                 xs->here = xs->header->xh_entries;
2949         } else
2950                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2951
2952         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2953                 ocfs2_init_xattr_block_xa_loc(&loc, inode, xs->xattr_bh,
2954                                               xs->not_found ? NULL : xs->here);
2955
2956                 ret = ocfs2_xa_set(&loc, xi, ctxt);
2957                 if (!ret)
2958                         xs->here = loc.xl_entry;
2959                 else if (ret != -ENOSPC)
2960                         goto end;
2961                 else {
2962                         ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2963                         if (ret)
2964                                 goto end;
2965                 }
2966         }
2967
2968         if (le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)
2969                 ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2970
2971 end:
2972         return ret;
2973 }
2974
2975 /* Check whether the new xattr can be inserted into the inode. */
2976 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2977                                        struct ocfs2_xattr_info *xi,
2978                                        struct ocfs2_xattr_search *xs)
2979 {
2980         struct ocfs2_xattr_entry *last;
2981         int free, i;
2982         size_t min_offs = xs->end - xs->base;
2983
2984         if (!xs->header)
2985                 return 0;
2986
2987         last = xs->header->xh_entries;
2988
2989         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2990                 size_t offs = le16_to_cpu(last->xe_name_offset);
2991                 if (offs < min_offs)
2992                         min_offs = offs;
2993                 last += 1;
2994         }
2995
2996         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2997         if (free < 0)
2998                 return 0;
2999
3000         BUG_ON(!xs->not_found);
3001
3002         if (free >= (sizeof(struct ocfs2_xattr_entry) + namevalue_size_xi(xi)))
3003                 return 1;
3004
3005         return 0;
3006 }
3007
3008 static int ocfs2_calc_xattr_set_need(struct inode *inode,
3009                                      struct ocfs2_dinode *di,
3010                                      struct ocfs2_xattr_info *xi,
3011                                      struct ocfs2_xattr_search *xis,
3012                                      struct ocfs2_xattr_search *xbs,
3013                                      int *clusters_need,
3014                                      int *meta_need,
3015                                      int *credits_need)
3016 {
3017         int ret = 0, old_in_xb = 0;
3018         int clusters_add = 0, meta_add = 0, credits = 0;
3019         struct buffer_head *bh = NULL;
3020         struct ocfs2_xattr_block *xb = NULL;
3021         struct ocfs2_xattr_entry *xe = NULL;
3022         struct ocfs2_xattr_value_root *xv = NULL;
3023         char *base = NULL;
3024         int name_offset, name_len = 0;
3025         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
3026                                                     xi->xi_value_len);
3027         u64 value_size;
3028
3029         /*
3030          * Calculate the clusters we need to write.
3031          * No matter whether we replace an old one or add a new one,
3032          * we need this for writing.
3033          */
3034         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
3035                 credits += new_clusters *
3036                            ocfs2_clusters_to_blocks(inode->i_sb, 1);
3037
3038         if (xis->not_found && xbs->not_found) {
3039                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3040
3041                 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3042                         clusters_add += new_clusters;
3043                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3044                                                         &def_xv.xv.xr_list,
3045                                                         new_clusters);
3046                 }
3047
3048                 goto meta_guess;
3049         }
3050
3051         if (!xis->not_found) {
3052                 xe = xis->here;
3053                 name_offset = le16_to_cpu(xe->xe_name_offset);
3054                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3055                 base = xis->base;
3056                 credits += OCFS2_INODE_UPDATE_CREDITS;
3057         } else {
3058                 int i, block_off = 0;
3059                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3060                 xe = xbs->here;
3061                 name_offset = le16_to_cpu(xe->xe_name_offset);
3062                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
3063                 i = xbs->here - xbs->header->xh_entries;
3064                 old_in_xb = 1;
3065
3066                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3067                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3068                                                         bucket_xh(xbs->bucket),
3069                                                         i, &block_off,
3070                                                         &name_offset);
3071                         base = bucket_block(xbs->bucket, block_off);
3072                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3073                 } else {
3074                         base = xbs->base;
3075                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
3076                 }
3077         }
3078
3079         /*
3080          * delete a xattr doesn't need metadata and cluster allocation.
3081          * so just calculate the credits and return.
3082          *
3083          * The credits for removing the value tree will be extended
3084          * by ocfs2_remove_extent itself.
3085          */
3086         if (!xi->xi_value) {
3087                 if (!ocfs2_xattr_is_local(xe))
3088                         credits += ocfs2_remove_extent_credits(inode->i_sb);
3089
3090                 goto out;
3091         }
3092
3093         /* do cluster allocation guess first. */
3094         value_size = le64_to_cpu(xe->xe_value_size);
3095
3096         if (old_in_xb) {
3097                 /*
3098                  * In xattr set, we always try to set the xe in inode first,
3099                  * so if it can be inserted into inode successfully, the old
3100                  * one will be removed from the xattr block, and this xattr
3101                  * will be inserted into inode as a new xattr in inode.
3102                  */
3103                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
3104                         clusters_add += new_clusters;
3105                         credits += ocfs2_remove_extent_credits(inode->i_sb) +
3106                                     OCFS2_INODE_UPDATE_CREDITS;
3107                         if (!ocfs2_xattr_is_local(xe))
3108                                 credits += ocfs2_calc_extend_credits(
3109                                                         inode->i_sb,
3110                                                         &def_xv.xv.xr_list,
3111                                                         new_clusters);
3112                         goto out;
3113                 }
3114         }
3115
3116         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
3117                 /* the new values will be stored outside. */
3118                 u32 old_clusters = 0;
3119
3120                 if (!ocfs2_xattr_is_local(xe)) {
3121                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
3122                                                                  value_size);
3123                         xv = (struct ocfs2_xattr_value_root *)
3124                              (base + name_offset + name_len);
3125                         value_size = OCFS2_XATTR_ROOT_SIZE;
3126                 } else
3127                         xv = &def_xv.xv;
3128
3129                 if (old_clusters >= new_clusters) {
3130                         credits += ocfs2_remove_extent_credits(inode->i_sb);
3131                         goto out;
3132                 } else {
3133                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
3134                         clusters_add += new_clusters - old_clusters;
3135                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3136                                                              &xv->xr_list,
3137                                                              new_clusters -
3138                                                              old_clusters);
3139                         if (value_size >= OCFS2_XATTR_ROOT_SIZE)
3140                                 goto out;
3141                 }
3142         } else {
3143                 /*
3144                  * Now the new value will be stored inside. So if the new
3145                  * value is smaller than the size of value root or the old
3146                  * value, we don't need any allocation, otherwise we have
3147                  * to guess metadata allocation.
3148                  */
3149                 if ((ocfs2_xattr_is_local(xe) &&
3150                      (value_size >= xi->xi_value_len)) ||
3151                     (!ocfs2_xattr_is_local(xe) &&
3152                      OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
3153                         goto out;
3154         }
3155
3156 meta_guess:
3157         /* calculate metadata allocation. */
3158         if (di->i_xattr_loc) {
3159                 if (!xbs->xattr_bh) {
3160                         ret = ocfs2_read_xattr_block(inode,
3161                                                      le64_to_cpu(di->i_xattr_loc),
3162                                                      &bh);
3163                         if (ret) {
3164                                 mlog_errno(ret);
3165                                 goto out;
3166                         }
3167
3168                         xb = (struct ocfs2_xattr_block *)bh->b_data;
3169                 } else
3170                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
3171
3172                 /*
3173                  * If there is already an xattr tree, good, we can calculate
3174                  * like other b-trees. Otherwise we may have the chance of
3175                  * create a tree, the credit calculation is borrowed from
3176                  * ocfs2_calc_extend_credits with root_el = NULL. And the
3177                  * new tree will be cluster based, so no meta is needed.
3178                  */
3179                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
3180                         struct ocfs2_extent_list *el =
3181                                  &xb->xb_attrs.xb_root.xt_list;
3182                         meta_add += ocfs2_extend_meta_needed(el);
3183                         credits += ocfs2_calc_extend_credits(inode->i_sb,
3184                                                              el, 1);
3185                 } else
3186                         credits += OCFS2_SUBALLOC_ALLOC + 1;
3187
3188                 /*
3189                  * This cluster will be used either for new bucket or for
3190                  * new xattr block.
3191                  * If the cluster size is the same as the bucket size, one
3192                  * more is needed since we may need to extend the bucket
3193                  * also.
3194                  */
3195                 clusters_add += 1;
3196                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3197                 if (OCFS2_XATTR_BUCKET_SIZE ==
3198                         OCFS2_SB(inode->i_sb)->s_clustersize) {
3199                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3200                         clusters_add += 1;
3201                 }
3202         } else {
3203                 meta_add += 1;
3204                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
3205         }
3206 out:
3207         if (clusters_need)
3208                 *clusters_need = clusters_add;
3209         if (meta_need)
3210                 *meta_need = meta_add;
3211         if (credits_need)
3212                 *credits_need = credits;
3213         brelse(bh);
3214         return ret;
3215 }
3216
3217 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
3218                                      struct ocfs2_dinode *di,
3219                                      struct ocfs2_xattr_info *xi,
3220                                      struct ocfs2_xattr_search *xis,
3221                                      struct ocfs2_xattr_search *xbs,
3222                                      struct ocfs2_xattr_set_ctxt *ctxt,
3223                                      int extra_meta,
3224                                      int *credits)
3225 {
3226         int clusters_add, meta_add, ret;
3227         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3228
3229         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
3230
3231         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
3232
3233         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
3234                                         &clusters_add, &meta_add, credits);
3235         if (ret) {
3236                 mlog_errno(ret);
3237                 return ret;
3238         }
3239
3240         meta_add += extra_meta;
3241         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
3242              "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits);
3243
3244         if (meta_add) {
3245                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
3246                                                         &ctxt->meta_ac);
3247                 if (ret) {
3248                         mlog_errno(ret);
3249                         goto out;
3250                 }
3251         }
3252
3253         if (clusters_add) {
3254                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
3255                 if (ret)
3256                         mlog_errno(ret);
3257         }
3258 out:
3259         if (ret) {
3260                 if (ctxt->meta_ac) {
3261                         ocfs2_free_alloc_context(ctxt->meta_ac);
3262                         ctxt->meta_ac = NULL;
3263                 }
3264
3265                 /*
3266                  * We cannot have an error and a non null ctxt->data_ac.
3267                  */
3268         }
3269
3270         return ret;
3271 }
3272
3273 static int __ocfs2_xattr_set_handle(struct inode *inode,
3274                                     struct ocfs2_dinode *di,
3275                                     struct ocfs2_xattr_info *xi,
3276                                     struct ocfs2_xattr_search *xis,
3277                                     struct ocfs2_xattr_search *xbs,
3278                                     struct ocfs2_xattr_set_ctxt *ctxt)
3279 {
3280         int ret = 0, credits, old_found;
3281
3282         if (!xi->xi_value) {
3283                 /* Remove existing extended attribute */
3284                 if (!xis->not_found)
3285                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3286                 else if (!xbs->not_found)
3287                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3288         } else {
3289                 /* We always try to set extended attribute into inode first*/
3290                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
3291                 if (!ret && !xbs->not_found) {
3292                         /*
3293                          * If succeed and that extended attribute existing in
3294                          * external block, then we will remove it.
3295                          */
3296                         xi->xi_value = NULL;
3297                         xi->xi_value_len = 0;
3298
3299                         old_found = xis->not_found;
3300                         xis->not_found = -ENODATA;
3301                         ret = ocfs2_calc_xattr_set_need(inode,
3302                                                         di,
3303                                                         xi,
3304                                                         xis,
3305                                                         xbs,
3306                                                         NULL,
3307                                                         NULL,
3308                                                         &credits);
3309                         xis->not_found = old_found;
3310                         if (ret) {
3311                                 mlog_errno(ret);
3312                                 goto out;
3313                         }
3314
3315                         ret = ocfs2_extend_trans(ctxt->handle, credits +
3316                                         ctxt->handle->h_buffer_credits);
3317                         if (ret) {
3318                                 mlog_errno(ret);
3319                                 goto out;
3320                         }
3321                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3322                 } else if (ret == -ENOSPC) {
3323                         if (di->i_xattr_loc && !xbs->xattr_bh) {
3324                                 ret = ocfs2_xattr_block_find(inode,
3325                                                              xi->xi_name_index,
3326                                                              xi->xi_name, xbs);
3327                                 if (ret)
3328                                         goto out;
3329
3330                                 old_found = xis->not_found;
3331                                 xis->not_found = -ENODATA;
3332                                 ret = ocfs2_calc_xattr_set_need(inode,
3333                                                                 di,
3334                                                                 xi,
3335                                                                 xis,
3336                                                                 xbs,
3337                                                                 NULL,
3338                                                                 NULL,
3339                                                                 &credits);
3340                                 xis->not_found = old_found;
3341                                 if (ret) {
3342                                         mlog_errno(ret);
3343                                         goto out;
3344                                 }
3345
3346                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
3347                                         ctxt->handle->h_buffer_credits);
3348                                 if (ret) {
3349                                         mlog_errno(ret);
3350                                         goto out;
3351                                 }
3352                         }
3353                         /*
3354                          * If no space in inode, we will set extended attribute
3355                          * into external block.
3356                          */
3357                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
3358                         if (ret)
3359                                 goto out;
3360                         if (!xis->not_found) {
3361                                 /*
3362                                  * If succeed and that extended attribute
3363                                  * existing in inode, we will remove it.
3364                                  */
3365                                 xi->xi_value = NULL;
3366                                 xi->xi_value_len = 0;
3367                                 xbs->not_found = -ENODATA;
3368                                 ret = ocfs2_calc_xattr_set_need(inode,
3369                                                                 di,
3370                                                                 xi,
3371                                                                 xis,
3372                                                                 xbs,
3373                                                                 NULL,
3374                                                                 NULL,
3375                                                                 &credits);
3376                                 if (ret) {
3377                                         mlog_errno(ret);
3378                                         goto out;
3379                                 }
3380
3381                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
3382                                                 ctxt->handle->h_buffer_credits);
3383                                 if (ret) {
3384                                         mlog_errno(ret);
3385                                         goto out;
3386                                 }
3387                                 ret = ocfs2_xattr_ibody_set(inode, xi,
3388                                                             xis, ctxt);
3389                         }
3390                 }
3391         }
3392
3393         if (!ret) {
3394                 /* Update inode ctime. */
3395                 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3396                                               xis->inode_bh,
3397                                               OCFS2_JOURNAL_ACCESS_WRITE);
3398                 if (ret) {
3399                         mlog_errno(ret);
3400                         goto out;
3401                 }
3402
3403                 inode->i_ctime = CURRENT_TIME;
3404                 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3405                 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3406                 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3407         }
3408 out:
3409         return ret;
3410 }
3411
3412 /*
3413  * This function only called duing creating inode
3414  * for init security/acl xattrs of the new inode.
3415  * All transanction credits have been reserved in mknod.
3416  */
3417 int ocfs2_xattr_set_handle(handle_t *handle,
3418                            struct inode *inode,
3419                            struct buffer_head *di_bh,
3420                            int name_index,
3421                            const char *name,
3422                            const void *value,
3423                            size_t value_len,
3424                            int flags,
3425                            struct ocfs2_alloc_context *meta_ac,
3426                            struct ocfs2_alloc_context *data_ac)
3427 {
3428         struct ocfs2_dinode *di;
3429         int ret;
3430
3431         struct ocfs2_xattr_info xi = {
3432                 .xi_name_index = name_index,
3433                 .xi_name = name,
3434                 .xi_name_len = strlen(name),
3435                 .xi_value = value,
3436                 .xi_value_len = value_len,
3437         };
3438
3439         struct ocfs2_xattr_search xis = {
3440                 .not_found = -ENODATA,
3441         };
3442
3443         struct ocfs2_xattr_search xbs = {
3444                 .not_found = -ENODATA,
3445         };
3446
3447         struct ocfs2_xattr_set_ctxt ctxt = {
3448                 .handle = handle,
3449                 .meta_ac = meta_ac,
3450                 .data_ac = data_ac,
3451         };
3452
3453         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3454                 return -EOPNOTSUPP;
3455
3456         /*
3457          * In extreme situation, may need xattr bucket when
3458          * block size is too small. And we have already reserved
3459          * the credits for bucket in mknod.
3460          */
3461         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3462                 xbs.bucket = ocfs2_xattr_bucket_new(inode);
3463                 if (!xbs.bucket) {
3464                         mlog_errno(-ENOMEM);
3465                         return -ENOMEM;
3466                 }
3467         }
3468
3469         xis.inode_bh = xbs.inode_bh = di_bh;
3470         di = (struct ocfs2_dinode *)di_bh->b_data;
3471
3472         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3473
3474         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3475         if (ret)
3476                 goto cleanup;
3477         if (xis.not_found) {
3478                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3479                 if (ret)
3480                         goto cleanup;
3481         }
3482
3483         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3484
3485 cleanup:
3486         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3487         brelse(xbs.xattr_bh);
3488         ocfs2_xattr_bucket_free(xbs.bucket);
3489
3490         return ret;
3491 }
3492
3493 /*
3494  * ocfs2_xattr_set()
3495  *
3496  * Set, replace or remove an extended attribute for this inode.
3497  * value is NULL to remove an existing extended attribute, else either
3498  * create or replace an extended attribute.
3499  */
3500 int ocfs2_xattr_set(struct inode *inode,
3501                     int name_index,
3502                     const char *name,
3503                     const void *value,
3504                     size_t value_len,
3505                     int flags)
3506 {
3507         struct buffer_head *di_bh = NULL;
3508         struct ocfs2_dinode *di;
3509         int ret, credits, ref_meta = 0, ref_credits = 0;
3510         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3511         struct inode *tl_inode = osb->osb_tl_inode;
3512         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
3513         struct ocfs2_refcount_tree *ref_tree = NULL;
3514
3515         struct ocfs2_xattr_info xi = {
3516                 .xi_name_index = name_index,
3517                 .xi_name = name,
3518                 .xi_name_len = strlen(name),
3519                 .xi_value = value,
3520                 .xi_value_len = value_len,
3521         };
3522
3523         struct ocfs2_xattr_search xis = {
3524                 .not_found = -ENODATA,
3525         };
3526
3527         struct ocfs2_xattr_search xbs = {
3528                 .not_found = -ENODATA,
3529         };
3530
3531         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3532                 return -EOPNOTSUPP;
3533
3534         /*
3535          * Only xbs will be used on indexed trees.  xis doesn't need a
3536          * bucket.
3537          */
3538         xbs.bucket = ocfs2_xattr_bucket_new(inode);
3539         if (!xbs.bucket) {
3540                 mlog_errno(-ENOMEM);
3541                 return -ENOMEM;
3542         }
3543
3544         ret = ocfs2_inode_lock(inode, &di_bh, 1);
3545         if (ret < 0) {
3546                 mlog_errno(ret);
3547                 goto cleanup_nolock;
3548         }
3549         xis.inode_bh = xbs.inode_bh = di_bh;
3550         di = (struct ocfs2_dinode *)di_bh->b_data;
3551
3552         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3553         /*
3554          * Scan inode and external block to find the same name
3555          * extended attribute and collect search infomation.
3556          */
3557         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3558         if (ret)
3559                 goto cleanup;
3560         if (xis.not_found) {
3561                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3562                 if (ret)
3563                         goto cleanup;
3564         }
3565
3566         if (xis.not_found && xbs.not_found) {
3567                 ret = -ENODATA;
3568                 if (flags & XATTR_REPLACE)
3569                         goto cleanup;
3570                 ret = 0;
3571                 if (!value)
3572                         goto cleanup;
3573         } else {
3574                 ret = -EEXIST;
3575                 if (flags & XATTR_CREATE)
3576                         goto cleanup;
3577         }
3578
3579         /* Check whether the value is refcounted and do some prepartion. */
3580         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3581             (!xis.not_found || !xbs.not_found)) {
3582                 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3583                                                    &xis, &xbs, &ref_tree,
3584                                                    &ref_meta, &ref_credits);
3585                 if (ret) {
3586                         mlog_errno(ret);
3587                         goto cleanup;
3588                 }
3589         }
3590
3591         mutex_lock(&tl_inode->i_mutex);
3592
3593         if (ocfs2_truncate_log_needs_flush(osb)) {
3594                 ret = __ocfs2_flush_truncate_log(osb);
3595                 if (ret < 0) {
3596                         mutex_unlock(&tl_inode->i_mutex);
3597                         mlog_errno(ret);
3598                         goto cleanup;
3599                 }
3600         }
3601         mutex_unlock(&tl_inode->i_mutex);
3602
3603         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3604                                         &xbs, &ctxt, ref_meta, &credits);
3605         if (ret) {
3606                 mlog_errno(ret);
3607                 goto cleanup;
3608         }
3609
3610         /* we need to update inode's ctime field, so add credit for it. */
3611         credits += OCFS2_INODE_UPDATE_CREDITS;
3612         ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3613         if (IS_ERR(ctxt.handle)) {
3614                 ret = PTR_ERR(ctxt.handle);
3615                 mlog_errno(ret);
3616                 goto cleanup;
3617         }
3618
3619         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3620
3621         ocfs2_commit_trans(osb, ctxt.handle);
3622
3623         if (ctxt.data_ac)
3624                 ocfs2_free_alloc_context(ctxt.data_ac);
3625         if (ctxt.meta_ac)
3626                 ocfs2_free_alloc_context(ctxt.meta_ac);
3627         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3628                 ocfs2_schedule_truncate_log_flush(osb, 1);
3629         ocfs2_run_deallocs(osb, &ctxt.dealloc);
3630
3631 cleanup:
3632         if (ref_tree)
3633                 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3634         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3635         if (!value && !ret) {
3636                 ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3637                 if (ret)
3638                         mlog_errno(ret);
3639         }
3640         ocfs2_inode_unlock(inode, 1);
3641 cleanup_nolock:
3642         brelse(di_bh);
3643         brelse(xbs.xattr_bh);
3644         ocfs2_xattr_bucket_free(xbs.bucket);
3645
3646         return ret;
3647 }
3648
3649 /*
3650  * Find the xattr extent rec which may contains name_hash.
3651  * e_cpos will be the first name hash of the xattr rec.
3652  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3653  */
3654 static int ocfs2_xattr_get_rec(struct inode *inode,
3655                                u32 name_hash,
3656                                u64 *p_blkno,
3657                                u32 *e_cpos,
3658                                u32 *num_clusters,
3659                                struct ocfs2_extent_list *el)
3660 {
3661         int ret = 0, i;
3662         struct buffer_head *eb_bh = NULL;
3663         struct ocfs2_extent_block *eb;
3664         struct ocfs2_extent_rec *rec = NULL;
3665         u64 e_blkno = 0;
3666
3667         if (el->l_tree_depth) {
3668                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3669                                       &eb_bh);
3670                 if (ret) {
3671                         mlog_errno(ret);
3672                         goto out;
3673                 }
3674
3675                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3676                 el = &eb->h_list;
3677
3678                 if (el->l_tree_depth) {
3679                         ocfs2_error(inode->i_sb,
3680                                     "Inode %lu has non zero tree depth in "
3681                                     "xattr tree block %llu\n", inode->i_ino,
3682                                     (unsigned long long)eb_bh->b_blocknr);
3683                         ret = -EROFS;
3684                         goto out;
3685                 }
3686         }
3687
3688         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3689                 rec = &el->l_recs[i];
3690
3691                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3692                         e_blkno = le64_to_cpu(rec->e_blkno);
3693                         break;
3694                 }
3695         }
3696
3697         if (!e_blkno) {
3698                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
3699                             "record (%u, %u, 0) in xattr", inode->i_ino,
3700                             le32_to_cpu(rec->e_cpos),
3701                             ocfs2_rec_clusters(el, rec));
3702                 ret = -EROFS;
3703                 goto out;
3704         }
3705
3706         *p_blkno = le64_to_cpu(rec->e_blkno);
3707         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3708         if (e_cpos)
3709                 *e_cpos = le32_to_cpu(rec->e_cpos);
3710 out:
3711         brelse(eb_bh);
3712         return ret;
3713 }
3714
3715 typedef int (xattr_bucket_func)(struct inode *inode,
3716                                 struct ocfs2_xattr_bucket *bucket,
3717                                 void *para);
3718
3719 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3720                                    struct ocfs2_xattr_bucket *bucket,
3721                                    int name_index,
3722                                    const char *name,
3723                                    u32 name_hash,
3724                                    u16 *xe_index,
3725                                    int *found)
3726 {
3727         int i, ret = 0, cmp = 1, block_off, new_offset;
3728         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3729         size_t name_len = strlen(name);
3730         struct ocfs2_xattr_entry *xe = NULL;
3731         char *xe_name;
3732
3733         /*
3734          * We don't use binary search in the bucket because there
3735          * may be multiple entries with the same name hash.
3736          */
3737         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3738                 xe = &xh->xh_entries[i];
3739
3740                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
3741                         continue;
3742                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3743                         break;
3744
3745                 cmp = name_index - ocfs2_xattr_get_type(xe);
3746                 if (!cmp)
3747                         cmp = name_len - xe->xe_name_len;
3748                 if (cmp)
3749                         continue;
3750
3751                 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3752                                                         xh,
3753                                                         i,
3754                                                         &block_off,
3755                                                         &new_offset);
3756                 if (ret) {
3757                         mlog_errno(ret);
3758                         break;
3759                 }
3760
3761
3762                 xe_name = bucket_block(bucket, block_off) + new_offset;
3763                 if (!memcmp(name, xe_name, name_len)) {
3764                         *xe_index = i;
3765                         *found = 1;
3766                         ret = 0;
3767                         break;
3768                 }
3769         }
3770
3771         return ret;
3772 }
3773
3774 /*
3775  * Find the specified xattr entry in a series of buckets.
3776  * This series start from p_blkno and last for num_clusters.
3777  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3778  * the num of the valid buckets.
3779  *
3780  * Return the buffer_head this xattr should reside in. And if the xattr's
3781  * hash is in the gap of 2 buckets, return the lower bucket.
3782  */
3783 static int ocfs2_xattr_bucket_find(struct inode *inode,
3784                                    int name_index,
3785                                    const char *name,
3786                                    u32 name_hash,
3787                                    u64 p_blkno,
3788                                    u32 first_hash,
3789                                    u32 num_clusters,
3790                                    struct ocfs2_xattr_search *xs)
3791 {
3792         int ret, found = 0;
3793         struct ocfs2_xattr_header *xh = NULL;
3794         struct ocfs2_xattr_entry *xe = NULL;
3795         u16 index = 0;
3796         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3797         int low_bucket = 0, bucket, high_bucket;
3798         struct ocfs2_xattr_bucket *search;
3799         u32 last_hash;
3800         u64 blkno, lower_blkno = 0;
3801
3802         search = ocfs2_xattr_bucket_new(inode);
3803         if (!search) {
3804                 ret = -ENOMEM;
3805                 mlog_errno(ret);
3806                 goto out;
3807         }
3808
3809         ret = ocfs2_read_xattr_bucket(search, p_blkno);
3810         if (ret) {
3811                 mlog_errno(ret);
3812                 goto out;
3813         }
3814
3815         xh = bucket_xh(search);
3816         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3817         while (low_bucket <= high_bucket) {
3818                 ocfs2_xattr_bucket_relse(search);
3819
3820                 bucket = (low_bucket + high_bucket) / 2;
3821                 blkno = p_blkno + bucket * blk_per_bucket;
3822                 ret = ocfs2_read_xattr_bucket(search, blkno);
3823                 if (ret) {
3824                         mlog_errno(ret);
3825                         goto out;
3826                 }
3827
3828                 xh = bucket_xh(search);
3829                 xe = &xh->xh_entries[0];
3830                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3831                         high_bucket = bucket - 1;
3832                         continue;
3833                 }
3834
3835                 /*
3836                  * Check whether the hash of the last entry in our
3837                  * bucket is larger than the search one. for an empty
3838                  * bucket, the last one is also the first one.
3839                  */
3840                 if (xh->xh_count)
3841                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3842
3843                 last_hash = le32_to_cpu(xe->xe_name_hash);
3844
3845                 /* record lower_blkno which may be the insert place. */
3846                 lower_blkno = blkno;
3847
3848                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3849                         low_bucket = bucket + 1;
3850                         continue;
3851                 }
3852
3853                 /* the searched xattr should reside in this bucket if exists. */
3854                 ret = ocfs2_find_xe_in_bucket(inode, search,
3855                                               name_index, name, name_hash,
3856                                               &index, &found);
3857                 if (ret) {
3858                         mlog_errno(ret);
3859                         goto out;
3860                 }
3861                 break;
3862         }
3863
3864         /*
3865          * Record the bucket we have found.
3866          * When the xattr's hash value is in the gap of 2 buckets, we will
3867          * always set it to the previous bucket.
3868          */
3869         if (!lower_blkno)
3870                 lower_blkno = p_blkno;
3871
3872         /* This should be in cache - we just read it during the search */
3873         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3874         if (ret) {
3875                 mlog_errno(ret);
3876                 goto out;
3877         }
3878
3879         xs->header = bucket_xh(xs->bucket);
3880         xs->base = bucket_block(xs->bucket, 0);
3881         xs->end = xs->base + inode->i_sb->s_blocksize;
3882
3883         if (found) {
3884                 xs->here = &xs->header->xh_entries[index];
3885                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3886                      (unsigned long long)bucket_blkno(xs->bucket), index);
3887         } else
3888                 ret = -ENODATA;
3889
3890 out:
3891         ocfs2_xattr_bucket_free(search);
3892         return ret;
3893 }
3894
3895 static int ocfs2_xattr_index_block_find(struct inode *inode,
3896                                         struct buffer_head *root_bh,
3897                                         int name_index,
3898                                         const char *name,
3899                                         struct ocfs2_xattr_search *xs)
3900 {
3901         int ret;
3902         struct ocfs2_xattr_block *xb =
3903                         (struct ocfs2_xattr_block *)root_bh->b_data;
3904         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3905         struct ocfs2_extent_list *el = &xb_root->xt_list;
3906         u64 p_blkno = 0;
3907         u32 first_hash, num_clusters = 0;
3908         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3909
3910         if (le16_to_cpu(el->l_next_free_rec) == 0)
3911                 return -ENODATA;
3912
3913         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3914              name, name_hash, name_index);
3915
3916         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3917                                   &num_clusters, el);
3918         if (ret) {
3919                 mlog_errno(ret);
3920                 goto out;
3921         }
3922
3923         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3924
3925         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3926              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3927              first_hash);
3928
3929         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3930                                       p_blkno, first_hash, num_clusters, xs);
3931
3932 out:
3933         return ret;
3934 }
3935
3936 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3937                                        u64 blkno,
3938                                        u32 clusters,
3939                                        xattr_bucket_func *func,
3940                                        void *para)
3941 {
3942         int i, ret = 0;
3943         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3944         u32 num_buckets = clusters * bpc;
3945         struct ocfs2_xattr_bucket *bucket;
3946
3947         bucket = ocfs2_xattr_bucket_new(inode);
3948         if (!bucket) {
3949                 mlog_errno(-ENOMEM);
3950                 return -ENOMEM;
3951         }
3952
3953         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3954              clusters, (unsigned long long)blkno);
3955
3956         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3957                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3958                 if (ret) {
3959                         mlog_errno(ret);
3960                         break;
3961                 }
3962
3963                 /*
3964                  * The real bucket num in this series of blocks is stored
3965                  * in the 1st bucket.
3966                  */
3967                 if (i == 0)
3968                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3969
3970                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3971                      (unsigned long long)blkno,
3972                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3973                 if (func) {
3974                         ret = func(inode, bucket, para);
3975                         if (ret && ret != -ERANGE)
3976                                 mlog_errno(ret);
3977                         /* Fall through to bucket_relse() */
3978                 }
3979
3980                 ocfs2_xattr_bucket_relse(bucket);
3981                 if (ret)
3982                         break;
3983         }
3984
3985         ocfs2_xattr_bucket_free(bucket);
3986         return ret;
3987 }
3988
3989 struct ocfs2_xattr_tree_list {
3990         char *buffer;
3991         size_t buffer_size;
3992         size_t result;
3993 };
3994
3995 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3996                                              struct ocfs2_xattr_header *xh,
3997                                              int index,
3998                                              int *block_off,
3999                                              int *new_offset)
4000 {
4001         u16 name_offset;
4002
4003         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
4004                 return -EINVAL;
4005
4006         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
4007
4008         *block_off = name_offset >> sb->s_blocksize_bits;
4009         *new_offset = name_offset % sb->s_blocksize;
4010
4011         return 0;
4012 }
4013
4014 static int ocfs2_list_xattr_bucket(struct inode *inode,
4015                                    struct ocfs2_xattr_bucket *bucket,
4016                                    void *para)
4017 {
4018         int ret = 0, type;
4019         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
4020         int i, block_off, new_offset;
4021         const char *prefix, *name;
4022
4023         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
4024                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
4025                 type = ocfs2_xattr_get_type(entry);
4026                 prefix = ocfs2_xattr_prefix(type);
4027
4028                 if (prefix) {
4029                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
4030                                                                 bucket_xh(bucket),
4031                                                                 i,
4032                                                                 &block_off,
4033                                                                 &new_offset);
4034                         if (ret)
4035                                 break;
4036
4037                         name = (const char *)bucket_block(bucket, block_off) +
4038                                 new_offset;
4039                         ret = ocfs2_xattr_list_entry(xl->buffer,
4040                                                      xl->buffer_size,
4041                                                      &xl->result,
4042                                                      prefix, name,
4043                                                      entry->xe_name_len);
4044                         if (ret)
4045                                 break;
4046                 }
4047         }
4048
4049         return ret;
4050 }
4051
4052 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
4053                                            struct buffer_head *blk_bh,
4054                                            xattr_tree_rec_func *rec_func,
4055                                            void *para)
4056 {
4057         struct ocfs2_xattr_block *xb =
4058                         (struct ocfs2_xattr_block *)blk_bh->b_data;
4059         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
4060         int ret = 0;
4061         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
4062         u64 p_blkno = 0;
4063
4064         if (!el->l_next_free_rec || !rec_func)
4065                 return 0;
4066
4067         while (name_hash > 0) {
4068                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
4069                                           &e_cpos, &num_clusters, el);
4070                 if (ret) {
4071                         mlog_errno(ret);
4072                         break;
4073                 }
4074
4075                 ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
4076                                num_clusters, para);
4077                 if (ret) {
4078                         if (ret != -ERANGE)
4079                                 mlog_errno(ret);
4080                         break;
4081                 }
4082
4083                 if (e_cpos == 0)
4084                         break;
4085
4086                 name_hash = e_cpos - 1;
4087         }
4088
4089         return ret;
4090
4091 }
4092
4093 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
4094                                      struct buffer_head *root_bh,
4095                                      u64 blkno, u32 cpos, u32 len, void *para)
4096 {
4097         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
4098                                            ocfs2_list_xattr_bucket, para);
4099 }
4100
4101 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
4102                                              struct buffer_head *blk_bh,
4103                                              char *buffer,
4104                                              size_t buffer_size)
4105 {
4106         int ret;
4107         struct ocfs2_xattr_tree_list xl = {
4108                 .buffer = buffer,
4109                 .buffer_size = buffer_size,
4110                 .result = 0,
4111         };
4112
4113         ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
4114                                               ocfs2_list_xattr_tree_rec, &xl);
4115         if (ret) {
4116                 mlog_errno(ret);
4117                 goto out;
4118         }
4119
4120         ret = xl.result;
4121 out:
4122         return ret;
4123 }
4124
4125 static int cmp_xe(const void *a, const void *b)
4126 {
4127         const struct ocfs2_xattr_entry *l = a, *r = b;
4128         u32 l_hash = le32_to_cpu(l->xe_name_hash);
4129         u32 r_hash = le32_to_cpu(r->xe_name_hash);
4130
4131         if (l_hash > r_hash)
4132                 return 1;
4133         if (l_hash < r_hash)
4134                 return -1;
4135         return 0;
4136 }
4137
4138 static void swap_xe(void *a, void *b, int size)
4139 {
4140         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
4141
4142         tmp = *l;
4143         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
4144         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
4145 }
4146
4147 /*
4148  * When the ocfs2_xattr_block is filled up, new bucket will be created
4149  * and all the xattr entries will be moved to the new bucket.
4150  * The header goes at the start of the bucket, and the names+values are
4151  * filled from the end.  This is why *target starts as the last buffer.
4152  * Note: we need to sort the entries since they are not saved in order
4153  * in the ocfs2_xattr_block.
4154  */
4155 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
4156                                            struct buffer_head *xb_bh,
4157                                            struct ocfs2_xattr_bucket *bucket)
4158 {
4159         int i, blocksize = inode->i_sb->s_blocksize;
4160         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4161         u16 offset, size, off_change;
4162         struct ocfs2_xattr_entry *xe;
4163         struct ocfs2_xattr_block *xb =
4164                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
4165         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
4166         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
4167         u16 count = le16_to_cpu(xb_xh->xh_count);
4168         char *src = xb_bh->b_data;
4169         char *target = bucket_block(bucket, blks - 1);
4170
4171         mlog(0, "cp xattr from block %llu to bucket %llu\n",
4172              (unsigned long long)xb_bh->b_blocknr,
4173              (unsigned long long)bucket_blkno(bucket));
4174
4175         for (i = 0; i < blks; i++)
4176                 memset(bucket_block(bucket, i), 0, blocksize);
4177
4178         /*
4179          * Since the xe_name_offset is based on ocfs2_xattr_header,
4180          * there is a offset change corresponding to the change of
4181          * ocfs2_xattr_header's position.
4182          */
4183         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4184         xe = &xb_xh->xh_entries[count - 1];
4185         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
4186         size = blocksize - offset;
4187
4188         /* copy all the names and values. */
4189         memcpy(target + offset, src + offset, size);
4190
4191         /* Init new header now. */
4192         xh->xh_count = xb_xh->xh_count;
4193         xh->xh_num_buckets = cpu_to_le16(1);
4194         xh->xh_name_value_len = cpu_to_le16(size);
4195         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
4196
4197         /* copy all the entries. */
4198         target = bucket_block(bucket, 0);
4199         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
4200         size = count * sizeof(struct ocfs2_xattr_entry);
4201         memcpy(target + offset, (char *)xb_xh + offset, size);
4202
4203         /* Change the xe offset for all the xe because of the move. */
4204         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
4205                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
4206         for (i = 0; i < count; i++)
4207                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
4208
4209         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
4210              offset, size, off_change);
4211
4212         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
4213              cmp_xe, swap_xe);
4214 }
4215
4216 /*
4217  * After we move xattr from block to index btree, we have to
4218  * update ocfs2_xattr_search to the new xe and base.
4219  *
4220  * When the entry is in xattr block, xattr_bh indicates the storage place.
4221  * While if the entry is in index b-tree, "bucket" indicates the
4222  * real place of the xattr.
4223  */
4224 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
4225                                             struct ocfs2_xattr_search *xs,
4226                                             struct buffer_head *old_bh)
4227 {
4228         char *buf = old_bh->b_data;
4229         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
4230         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
4231         int i;
4232
4233         xs->header = bucket_xh(xs->bucket);
4234         xs->base = bucket_block(xs->bucket, 0);
4235         xs->end = xs->base + inode->i_sb->s_blocksize;
4236
4237         if (xs->not_found)
4238                 return;
4239
4240         i = xs->here - old_xh->xh_entries;
4241         xs->here = &xs->header->xh_entries[i];
4242 }
4243
4244 static int ocfs2_xattr_create_index_block(struct inode *inode,
4245                                           struct ocfs2_xattr_search *xs,
4246                                           struct ocfs2_xattr_set_ctxt *ctxt)
4247 {
4248         int ret;
4249         u32 bit_off, len;
4250         u64 blkno;
4251         handle_t *handle = ctxt->handle;
4252         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4253         struct ocfs2_inode_info *oi = OCFS2_I(inode);
4254         struct buffer_head *xb_bh = xs->xattr_bh;
4255         struct ocfs2_xattr_block *xb =
4256                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4257         struct ocfs2_xattr_tree_root *xr;
4258         u16 xb_flags = le16_to_cpu(xb->xb_flags);
4259
4260         mlog(0, "create xattr index block for %llu\n",
4261              (unsigned long long)xb_bh->b_blocknr);
4262
4263         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
4264         BUG_ON(!xs->bucket);
4265
4266         /*
4267          * XXX:
4268          * We can use this lock for now, and maybe move to a dedicated mutex
4269          * if performance becomes a problem later.
4270          */
4271         down_write(&oi->ip_alloc_sem);
4272
4273         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
4274                                       OCFS2_JOURNAL_ACCESS_WRITE);
4275         if (ret) {
4276                 mlog_errno(ret);
4277                 goto out;
4278         }
4279
4280         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
4281                                      1, 1, &bit_off, &len);
4282         if (ret) {
4283                 mlog_errno(ret);
4284                 goto out;
4285         }
4286
4287         /*
4288          * The bucket may spread in many blocks, and
4289          * we will only touch the 1st block and the last block
4290          * in the whole bucket(one for entry and one for data).
4291          */
4292         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
4293
4294         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
4295              (unsigned long long)blkno);
4296
4297         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
4298         if (ret) {
4299                 mlog_errno(ret);
4300                 goto out;
4301         }
4302
4303         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
4304                                                 OCFS2_JOURNAL_ACCESS_CREATE);
4305         if (ret) {
4306                 mlog_errno(ret);
4307                 goto out;
4308         }
4309
4310         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
4311         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
4312
4313         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
4314
4315         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
4316         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
4317                offsetof(struct ocfs2_xattr_block, xb_attrs));
4318
4319         xr = &xb->xb_attrs.xb_root;
4320         xr->xt_clusters = cpu_to_le32(1);
4321         xr->xt_last_eb_blk = 0;
4322         xr->xt_list.l_tree_depth = 0;
4323         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
4324         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
4325
4326         xr->xt_list.l_recs[0].e_cpos = 0;
4327         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
4328         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
4329
4330         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
4331
4332         ocfs2_journal_dirty(handle, xb_bh);
4333
4334 out:
4335         up_write(&oi->ip_alloc_sem);
4336
4337         return ret;
4338 }
4339
4340 static int cmp_xe_offset(const void *a, const void *b)
4341 {
4342         const struct ocfs2_xattr_entry *l = a, *r = b;
4343         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
4344         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
4345
4346         if (l_name_offset < r_name_offset)
4347                 return 1;
4348         if (l_name_offset > r_name_offset)
4349                 return -1;
4350         return 0;
4351 }
4352
4353 /*
4354  * defrag a xattr bucket if we find that the bucket has some
4355  * holes beteen name/value pairs.
4356  * We will move all the name/value pairs to the end of the bucket
4357  * so that we can spare some space for insertion.
4358  */
4359 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
4360                                      handle_t *handle,
4361                                      struct ocfs2_xattr_bucket *bucket)
4362 {
4363         int ret, i;
4364         size_t end, offset, len;
4365         struct ocfs2_xattr_header *xh;
4366         char *entries, *buf, *bucket_buf = NULL;
4367         u64 blkno = bucket_blkno(bucket);
4368         u16 xh_free_start;
4369         size_t blocksize = inode->i_sb->s_blocksize;
4370         struct ocfs2_xattr_entry *xe;
4371
4372         /*
4373          * In order to make the operation more efficient and generic,
4374          * we copy all the blocks into a contiguous memory and do the
4375          * defragment there, so if anything is error, we will not touch
4376          * the real block.
4377          */
4378         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
4379         if (!bucket_buf) {
4380                 ret = -EIO;
4381                 goto out;
4382         }
4383
4384         buf = bucket_buf;
4385         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4386                 memcpy(buf, bucket_block(bucket, i), blocksize);
4387
4388         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4389                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4390         if (ret < 0) {
4391                 mlog_errno(ret);
4392                 goto out;
4393         }
4394
4395         xh = (struct ocfs2_xattr_header *)bucket_buf;
4396         entries = (char *)xh->xh_entries;
4397         xh_free_start = le16_to_cpu(xh->xh_free_start);
4398
4399         mlog(0, "adjust xattr bucket in %llu, count = %u, "
4400              "xh_free_start = %u, xh_name_value_len = %u.\n",
4401              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4402              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4403
4404         /*
4405          * sort all the entries by their offset.
4406          * the largest will be the first, so that we can
4407          * move them to the end one by one.
4408          */
4409         sort(entries, le16_to_cpu(xh->xh_count),
4410              sizeof(struct ocfs2_xattr_entry),
4411              cmp_xe_offset, swap_xe);
4412
4413         /* Move all name/values to the end of the bucket. */
4414         xe = xh->xh_entries;
4415         end = OCFS2_XATTR_BUCKET_SIZE;
4416         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4417                 offset = le16_to_cpu(xe->xe_name_offset);
4418                 len = namevalue_size_xe(xe);
4419
4420                 /*
4421                  * We must make sure that the name/value pair
4422                  * exist in the same block. So adjust end to
4423                  * the previous block end if needed.
4424                  */
4425                 if (((end - len) / blocksize !=
4426                         (end - 1) / blocksize))
4427                         end = end - end % blocksize;
4428
4429                 if (end > offset + len) {
4430                         memmove(bucket_buf + end - len,
4431                                 bucket_buf + offset, len);
4432                         xe->xe_name_offset = cpu_to_le16(end - len);
4433                 }
4434
4435                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4436                                 "bucket %llu\n", (unsigned long long)blkno);
4437
4438                 end -= len;
4439         }
4440
4441         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4442                         "bucket %llu\n", (unsigned long long)blkno);
4443
4444         if (xh_free_start == end)
4445                 goto out;
4446
4447         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4448         xh->xh_free_start = cpu_to_le16(end);
4449
4450         /* sort the entries by their name_hash. */
4451         sort(entries, le16_to_cpu(xh->xh_count),
4452              sizeof(struct ocfs2_xattr_entry),
4453              cmp_xe, swap_xe);
4454
4455         buf = bucket_buf;
4456         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4457                 memcpy(bucket_block(bucket, i), buf, blocksize);
4458         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4459
4460 out:
4461         kfree(bucket_buf);
4462         return ret;
4463 }
4464
4465 /*
4466  * prev_blkno points to the start of an existing extent.  new_blkno
4467  * points to a newly allocated extent.  Because we know each of our
4468  * clusters contains more than bucket, we can easily split one cluster
4469  * at a bucket boundary.  So we take the last cluster of the existing
4470  * extent and split it down the middle.  We move the last half of the
4471  * buckets in the last cluster of the existing extent over to the new
4472  * extent.
4473  *
4474  * first_bh is the buffer at prev_blkno so we can update the existing
4475  * extent's bucket count.  header_bh is the bucket were we were hoping
4476  * to insert our xattr.  If the bucket move places the target in the new
4477  * extent, we'll update first_bh and header_bh after modifying the old
4478  * extent.
4479  *
4480  * first_hash will be set as the 1st xe's name_hash in the new extent.
4481  */
4482 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4483                                                handle_t *handle,
4484                                                struct ocfs2_xattr_bucket *first,
4485                                                struct ocfs2_xattr_bucket *target,
4486                                                u64 new_blkno,
4487                                                u32 num_clusters,
4488                                                u32 *first_hash)
4489 {
4490         int ret;
4491         struct super_block *sb = inode->i_sb;
4492         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4493         int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4494         int to_move = num_buckets / 2;
4495         u64 src_blkno;
4496         u64 last_cluster_blkno = bucket_blkno(first) +
4497                 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4498
4499         BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4500         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4501
4502         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
4503              (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
4504
4505         ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4506                                      last_cluster_blkno, new_blkno,
4507                                      to_move, first_hash);
4508         if (ret) {
4509                 mlog_errno(ret);
4510                 goto out;
4511         }
4512
4513         /* This is the first bucket that got moved */
4514         src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4515
4516         /*
4517          * If the target bucket was part of the moved buckets, we need to
4518          * update first and target.
4519          */
4520         if (bucket_blkno(target) >= src_blkno) {
4521                 /* Find the block for the new target bucket */
4522                 src_blkno = new_blkno +
4523                         (bucket_blkno(target) - src_blkno);
4524
4525                 ocfs2_xattr_bucket_relse(first);
4526                 ocfs2_xattr_bucket_relse(target);
4527
4528                 /*
4529                  * These shouldn't fail - the buffers are in the
4530                  * journal from ocfs2_cp_xattr_bucket().
4531                  */
4532                 ret = ocfs2_read_xattr_bucket(first, new_blkno);
4533                 if (ret) {
4534                         mlog_errno(ret);
4535                         goto out;
4536                 }
4537                 ret = ocfs2_read_xattr_bucket(target, src_blkno);
4538                 if (ret)
4539                         mlog_errno(ret);
4540
4541         }
4542
4543 out:
4544         return ret;
4545 }
4546
4547 /*
4548  * Find the suitable pos when we divide a bucket into 2.
4549  * We have to make sure the xattrs with the same hash value exist
4550  * in the same bucket.
4551  *
4552  * If this ocfs2_xattr_header covers more than one hash value, find a
4553  * place where the hash value changes.  Try to find the most even split.
4554  * The most common case is that all entries have different hash values,
4555  * and the first check we make will find a place to split.
4556  */
4557 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4558 {
4559         struct ocfs2_xattr_entry *entries = xh->xh_entries;
4560         int count = le16_to_cpu(xh->xh_count);
4561         int delta, middle = count / 2;
4562
4563         /*
4564          * We start at the middle.  Each step gets farther away in both
4565          * directions.  We therefore hit the change in hash value
4566          * nearest to the middle.  Note that this loop does not execute for
4567          * count < 2.
4568          */
4569         for (delta = 0; delta < middle; delta++) {
4570                 /* Let's check delta earlier than middle */
4571                 if (cmp_xe(&entries[middle - delta - 1],
4572                            &entries[middle - delta]))
4573                         return middle - delta;
4574
4575                 /* For even counts, don't walk off the end */
4576                 if ((middle + delta + 1) == count)
4577                         continue;
4578
4579                 /* Now try delta past middle */
4580                 if (cmp_xe(&entries[middle + delta],
4581                            &entries[middle + delta + 1]))
4582                         return middle + delta + 1;
4583         }
4584
4585         /* Every entry had the same hash */
4586         return count;
4587 }
4588
4589 /*
4590  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4591  * first_hash will record the 1st hash of the new bucket.
4592  *
4593  * Normally half of the xattrs will be moved.  But we have to make
4594  * sure that the xattrs with the same hash value are stored in the
4595  * same bucket. If all the xattrs in this bucket have the same hash
4596  * value, the new bucket will be initialized as an empty one and the
4597  * first_hash will be initialized as (hash_value+1).
4598  */
4599 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4600                                     handle_t *handle,
4601                                     u64 blk,
4602                                     u64 new_blk,
4603                                     u32 *first_hash,
4604                                     int new_bucket_head)
4605 {
4606         int ret, i;
4607         int count, start, len, name_value_len = 0, name_offset = 0;
4608         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4609         struct ocfs2_xattr_header *xh;
4610         struct ocfs2_xattr_entry *xe;
4611         int blocksize = inode->i_sb->s_blocksize;
4612
4613         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
4614              (unsigned long long)blk, (unsigned long long)new_blk);
4615
4616         s_bucket = ocfs2_xattr_bucket_new(inode);
4617         t_bucket = ocfs2_xattr_bucket_new(inode);
4618         if (!s_bucket || !t_bucket) {
4619                 ret = -ENOMEM;
4620                 mlog_errno(ret);
4621                 goto out;
4622         }
4623
4624         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4625         if (ret) {
4626                 mlog_errno(ret);
4627                 goto out;
4628         }
4629
4630         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4631                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4632         if (ret) {
4633                 mlog_errno(ret);
4634                 goto out;
4635         }
4636
4637         /*
4638          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4639          * there's no need to read it.
4640          */
4641         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
4642         if (ret) {
4643                 mlog_errno(ret);
4644                 goto out;
4645         }
4646
4647         /*
4648          * Hey, if we're overwriting t_bucket, what difference does
4649          * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4650          * same part of ocfs2_cp_xattr_bucket().
4651          */
4652         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4653                                                 new_bucket_head ?
4654                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4655                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4656         if (ret) {
4657                 mlog_errno(ret);
4658                 goto out;
4659         }
4660
4661         xh = bucket_xh(s_bucket);
4662         count = le16_to_cpu(xh->xh_count);
4663         start = ocfs2_xattr_find_divide_pos(xh);
4664
4665         if (start == count) {
4666                 xe = &xh->xh_entries[start-1];
4667
4668                 /*
4669                  * initialized a new empty bucket here.
4670                  * The hash value is set as one larger than
4671                  * that of the last entry in the previous bucket.
4672                  */
4673                 for (i = 0; i < t_bucket->bu_blocks; i++)
4674                         memset(bucket_block(t_bucket, i), 0, blocksize);
4675
4676                 xh = bucket_xh(t_bucket);
4677                 xh->xh_free_start = cpu_to_le16(blocksize);
4678                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4679                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4680
4681                 goto set_num_buckets;
4682         }
4683
4684         /* copy the whole bucket to the new first. */
4685         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4686
4687         /* update the new bucket. */
4688         xh = bucket_xh(t_bucket);
4689
4690         /*
4691          * Calculate the total name/value len and xh_free_start for
4692          * the old bucket first.
4693          */
4694         name_offset = OCFS2_XATTR_BUCKET_SIZE;
4695         name_value_len = 0;
4696         for (i = 0; i < start; i++) {
4697                 xe = &xh->xh_entries[i];
4698                 name_value_len += namevalue_size_xe(xe);
4699                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4700                         name_offset = le16_to_cpu(xe->xe_name_offset);
4701         }
4702
4703         /*
4704          * Now begin the modification to the new bucket.
4705          *
4706          * In the new bucket, We just move the xattr entry to the beginning
4707          * and don't touch the name/value. So there will be some holes in the
4708          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4709          * called.
4710          */
4711         xe = &xh->xh_entries[start];
4712         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4713         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4714              (int)((char *)xe - (char *)xh),
4715              (int)((char *)xh->xh_entries - (char *)xh));
4716         memmove((char *)xh->xh_entries, (char *)xe, len);
4717         xe = &xh->xh_entries[count - start];
4718         len = sizeof(struct ocfs2_xattr_entry) * start;
4719         memset((char *)xe, 0, len);
4720
4721         le16_add_cpu(&xh->xh_count, -start);
4722         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4723
4724         /* Calculate xh_free_start for the new bucket. */
4725         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4726         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4727                 xe = &xh->xh_entries[i];
4728                 if (le16_to_cpu(xe->xe_name_offset) <
4729                     le16_to_cpu(xh->xh_free_start))
4730                         xh->xh_free_start = xe->xe_name_offset;
4731         }
4732
4733 set_num_buckets:
4734         /* set xh->xh_num_buckets for the new xh. */
4735         if (new_bucket_head)
4736                 xh->xh_num_buckets = cpu_to_le16(1);
4737         else
4738                 xh->xh_num_buckets = 0;
4739
4740         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4741
4742         /* store the first_hash of the new bucket. */
4743         if (first_hash)
4744                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4745
4746         /*
4747          * Now only update the 1st block of the old bucket.  If we
4748          * just added a new empty bucket, there is no need to modify
4749          * it.
4750          */
4751         if (start == count)
4752                 goto out;
4753
4754         xh = bucket_xh(s_bucket);
4755         memset(&xh->xh_entries[start], 0,
4756                sizeof(struct ocfs2_xattr_entry) * (count - start));
4757         xh->xh_count = cpu_to_le16(start);
4758         xh->xh_free_start = cpu_to_le16(name_offset);
4759         xh->xh_name_value_len = cpu_to_le16(name_value_len);
4760
4761         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4762
4763 out:
4764         ocfs2_xattr_bucket_free(s_bucket);
4765         ocfs2_xattr_bucket_free(t_bucket);
4766
4767         return ret;
4768 }
4769
4770 /*
4771  * Copy xattr from one bucket to another bucket.
4772  *
4773  * The caller must make sure that the journal transaction
4774  * has enough space for journaling.
4775  */
4776 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4777                                  handle_t *handle,
4778                                  u64 s_blkno,
4779                                  u64 t_blkno,
4780                                  int t_is_new)
4781 {
4782         int ret;
4783         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4784
4785         BUG_ON(s_blkno == t_blkno);
4786
4787         mlog(0, "cp bucket %llu to %llu, target is %d\n",
4788              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4789              t_is_new);
4790
4791         s_bucket = ocfs2_xattr_bucket_new(inode);
4792         t_bucket = ocfs2_xattr_bucket_new(inode);
4793         if (!s_bucket || !t_bucket) {
4794                 ret = -ENOMEM;
4795                 mlog_errno(ret);
4796                 goto out;
4797         }
4798
4799         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4800         if (ret)
4801                 goto out;
4802
4803         /*
4804          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4805          * there's no need to read it.
4806          */
4807         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4808         if (ret)
4809                 goto out;
4810
4811         /*
4812          * Hey, if we're overwriting t_bucket, what difference does
4813          * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4814          * cluster to fill, we came here from
4815          * ocfs2_mv_xattr_buckets(), and it is really new -
4816          * ACCESS_CREATE is required.  But we also might have moved data
4817          * out of t_bucket before extending back into it.
4818          * ocfs2_add_new_xattr_bucket() can do this - its call to
4819          * ocfs2_add_new_xattr_cluster() may have created a new extent
4820          * and copied out the end of the old extent.  Then it re-extends
4821          * the old extent back to create space for new xattrs.  That's
4822          * how we get here, and the bucket isn't really new.
4823          */
4824         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4825                                                 t_is_new ?
4826                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4827                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4828         if (ret)
4829                 goto out;
4830
4831         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4832         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4833
4834 out:
4835         ocfs2_xattr_bucket_free(t_bucket);
4836         ocfs2_xattr_bucket_free(s_bucket);
4837
4838         return ret;
4839 }
4840
4841 /*
4842  * src_blk points to the start of an existing extent.  last_blk points to
4843  * last cluster in that extent.  to_blk points to a newly allocated
4844  * extent.  We copy the buckets from the cluster at last_blk to the new
4845  * extent.  If start_bucket is non-zero, we skip that many buckets before
4846  * we start copying.  The new extent's xh_num_buckets gets set to the
4847  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4848  * by the same amount.
4849  */
4850 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4851                                   u64 src_blk, u64 last_blk, u64 to_blk,
4852                                   unsigned int start_bucket,
4853                                   u32 *first_hash)
4854 {
4855         int i, ret, credits;
4856         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4857         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4858         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4859         struct ocfs2_xattr_bucket *old_first, *new_first;
4860
4861         mlog(0, "mv xattrs from cluster %llu to %llu\n",
4862              (unsigned long long)last_blk, (unsigned long long)to_blk);
4863
4864         BUG_ON(start_bucket >= num_buckets);
4865         if (start_bucket) {
4866                 num_buckets -= start_bucket;
4867                 last_blk += (start_bucket * blks_per_bucket);
4868         }
4869
4870         /* The first bucket of the original extent */
4871         old_first = ocfs2_xattr_bucket_new(inode);
4872         /* The first bucket of the new extent */
4873         new_first = ocfs2_xattr_bucket_new(inode);
4874         if (!old_first || !new_first) {
4875                 ret = -ENOMEM;
4876                 mlog_errno(ret);
4877                 goto out;
4878         }
4879
4880         ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4881         if (ret) {
4882                 mlog_errno(ret);
4883                 goto out;
4884         }
4885
4886         /*
4887          * We need to update the first bucket of the old extent and all
4888          * the buckets going to the new extent.
4889          */
4890         credits = ((num_buckets + 1) * blks_per_bucket) +
4891                 handle->h_buffer_credits;
4892         ret = ocfs2_extend_trans(handle, credits);
4893         if (ret) {
4894                 mlog_errno(ret);
4895                 goto out;
4896         }
4897
4898         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4899                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4900         if (ret) {
4901                 mlog_errno(ret);
4902                 goto out;
4903         }
4904
4905         for (i = 0; i < num_buckets; i++) {
4906                 ret = ocfs2_cp_xattr_bucket(inode, handle,
4907                                             last_blk + (i * blks_per_bucket),
4908                                             to_blk + (i * blks_per_bucket),
4909                                             1);
4910                 if (ret) {
4911                         mlog_errno(ret);
4912                         goto out;
4913                 }
4914         }
4915
4916         /*
4917          * Get the new bucket ready before we dirty anything
4918          * (This actually shouldn't fail, because we already dirtied
4919          * it once in ocfs2_cp_xattr_bucket()).
4920          */
4921         ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4922         if (ret) {
4923                 mlog_errno(ret);
4924                 goto out;
4925         }
4926         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4927                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4928         if (ret) {
4929                 mlog_errno(ret);
4930                 goto out;
4931         }
4932
4933         /* Now update the headers */
4934         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4935         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4936
4937         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4938         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4939
4940         if (first_hash)
4941                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4942
4943 out:
4944         ocfs2_xattr_bucket_free(new_first);
4945         ocfs2_xattr_bucket_free(old_first);
4946         return ret;
4947 }
4948
4949 /*
4950  * Move some xattrs in this cluster to the new cluster.
4951  * This function should only be called when bucket size == cluster size.
4952  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4953  */
4954 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4955                                       handle_t *handle,
4956                                       u64 prev_blk,
4957                                       u64 new_blk,
4958                                       u32 *first_hash)
4959 {
4960         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4961         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4962
4963         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4964
4965         ret = ocfs2_extend_trans(handle, credits);
4966         if (ret) {
4967                 mlog_errno(ret);
4968                 return ret;
4969         }
4970
4971         /* Move half of the xattr in start_blk to the next bucket. */
4972         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4973                                           new_blk, first_hash, 1);
4974 }
4975
4976 /*
4977  * Move some xattrs from the old cluster to the new one since they are not
4978  * contiguous in ocfs2 xattr tree.
4979  *
4980  * new_blk starts a new separate cluster, and we will move some xattrs from
4981  * prev_blk to it. v_start will be set as the first name hash value in this
4982  * new cluster so that it can be used as e_cpos during tree insertion and
4983  * don't collide with our original b-tree operations. first_bh and header_bh
4984  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4985  * to extend the insert bucket.
4986  *
4987  * The problem is how much xattr should we move to the new one and when should
4988  * we update first_bh and header_bh?
4989  * 1. If cluster size > bucket size, that means the previous cluster has more
4990  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4991  *    update the first_bh and header_bh if the insert bucket has been moved
4992  *    to the new cluster.
4993  * 2. If cluster_size == bucket_size:
4994  *    a) If the previous extent rec has more than one cluster and the insert
4995  *       place isn't in the last cluster, copy the entire last cluster to the
4996  *       new one. This time, we don't need to upate the first_bh and header_bh
4997  *       since they will not be moved into the new cluster.
4998  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4999  *       the new one. And we set the extend flag to zero if the insert place is
5000  *       moved into the new allocated cluster since no extend is needed.
5001  */
5002 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
5003                                             handle_t *handle,
5004                                             struct ocfs2_xattr_bucket *first,
5005                                             struct ocfs2_xattr_bucket *target,
5006                                             u64 new_blk,
5007                                             u32 prev_clusters,
5008                                             u32 *v_start,
5009                                             int *extend)
5010 {
5011         int ret;
5012
5013         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
5014              (unsigned long long)bucket_blkno(first), prev_clusters,
5015              (unsigned long long)new_blk);
5016
5017         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
5018                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
5019                                                           handle,
5020                                                           first, target,
5021                                                           new_blk,
5022                                                           prev_clusters,
5023                                                           v_start);
5024                 if (ret)
5025                         mlog_errno(ret);
5026         } else {
5027                 /* The start of the last cluster in the first extent */
5028                 u64 last_blk = bucket_blkno(first) +
5029                         ((prev_clusters - 1) *
5030                          ocfs2_clusters_to_blocks(inode->i_sb, 1));
5031
5032                 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
5033                         ret = ocfs2_mv_xattr_buckets(inode, handle,
5034                                                      bucket_blkno(first),
5035                                                      last_blk, new_blk, 0,
5036                                                      v_start);
5037                         if (ret)
5038                                 mlog_errno(ret);
5039                 } else {
5040                         ret = ocfs2_divide_xattr_cluster(inode, handle,
5041                                                          last_blk, new_blk,
5042                                                          v_start);
5043                         if (ret)
5044                                 mlog_errno(ret);
5045
5046                         if ((bucket_blkno(target) == last_blk) && extend)
5047                                 *extend = 0;
5048                 }
5049         }
5050
5051         return ret;
5052 }
5053
5054 /*
5055  * Add a new cluster for xattr storage.
5056  *
5057  * If the new cluster is contiguous with the previous one, it will be
5058  * appended to the same extent record, and num_clusters will be updated.
5059  * If not, we will insert a new extent for it and move some xattrs in
5060  * the last cluster into the new allocated one.
5061  * We also need to limit the maximum size of a btree leaf, otherwise we'll
5062  * lose the benefits of hashing because we'll have to search large leaves.
5063  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
5064  * if it's bigger).
5065  *
5066  * first_bh is the first block of the previous extent rec and header_bh
5067  * indicates the bucket we will insert the new xattrs. They will be updated
5068  * when the header_bh is moved into the new cluster.
5069  */
5070 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
5071                                        struct buffer_head *root_bh,
5072                                        struct ocfs2_xattr_bucket *first,
5073                                        struct ocfs2_xattr_bucket *target,
5074                                        u32 *num_clusters,
5075                                        u32 prev_cpos,
5076                                        int *extend,
5077                                        struct ocfs2_xattr_set_ctxt *ctxt)
5078 {
5079         int ret;
5080         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
5081         u32 prev_clusters = *num_clusters;
5082         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
5083         u64 block;
5084         handle_t *handle = ctxt->handle;
5085         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5086         struct ocfs2_extent_tree et;
5087
5088         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
5089              "previous xattr blkno = %llu\n",
5090              (unsigned long long)OCFS2_I(inode)->ip_blkno,
5091              prev_cpos, (unsigned long long)bucket_blkno(first));
5092
5093         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5094
5095         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5096                                       OCFS2_JOURNAL_ACCESS_WRITE);
5097         if (ret < 0) {
5098                 mlog_errno(ret);
5099                 goto leave;
5100         }
5101
5102         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
5103                                      clusters_to_add, &bit_off, &num_bits);
5104         if (ret < 0) {
5105                 if (ret != -ENOSPC)
5106                         mlog_errno(ret);
5107                 goto leave;
5108         }
5109
5110         BUG_ON(num_bits > clusters_to_add);
5111
5112         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
5113         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
5114              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
5115
5116         if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
5117             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
5118              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
5119                 /*
5120                  * If this cluster is contiguous with the old one and
5121                  * adding this new cluster, we don't surpass the limit of
5122                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
5123                  * initialized and used like other buckets in the previous
5124                  * cluster.
5125                  * So add it as a contiguous one. The caller will handle
5126                  * its init process.
5127                  */
5128                 v_start = prev_cpos + prev_clusters;
5129                 *num_clusters = prev_clusters + num_bits;
5130                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
5131                      num_bits);
5132         } else {
5133                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
5134                                                        handle,
5135                                                        first,
5136                                                        target,
5137                                                        block,
5138                                                        prev_clusters,
5139                                                        &v_start,
5140                                                        extend);
5141                 if (ret) {
5142                         mlog_errno(ret);
5143                         goto leave;
5144                 }
5145         }
5146
5147         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
5148              num_bits, (unsigned long long)block, v_start);
5149         ret = ocfs2_insert_extent(handle, &et, v_start, block,
5150                                   num_bits, 0, ctxt->meta_ac);
5151         if (ret < 0) {
5152                 mlog_errno(ret);
5153                 goto leave;
5154         }
5155
5156         ret = ocfs2_journal_dirty(handle, root_bh);
5157         if (ret < 0)
5158                 mlog_errno(ret);
5159
5160 leave:
5161         return ret;
5162 }
5163
5164 /*
5165  * We are given an extent.  'first' is the bucket at the very front of
5166  * the extent.  The extent has space for an additional bucket past
5167  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
5168  * of the target bucket.  We wish to shift every bucket past the target
5169  * down one, filling in that additional space.  When we get back to the
5170  * target, we split the target between itself and the now-empty bucket
5171  * at target+1 (aka, target_blkno + blks_per_bucket).
5172  */
5173 static int ocfs2_extend_xattr_bucket(struct inode *inode,
5174                                      handle_t *handle,
5175                                      struct ocfs2_xattr_bucket *first,
5176                                      u64 target_blk,
5177                                      u32 num_clusters)
5178 {
5179         int ret, credits;
5180         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5181         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5182         u64 end_blk;
5183         u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
5184
5185         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
5186              "from %llu, len = %u\n", (unsigned long long)target_blk,
5187              (unsigned long long)bucket_blkno(first), num_clusters);
5188
5189         /* The extent must have room for an additional bucket */
5190         BUG_ON(new_bucket >=
5191                (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
5192
5193         /* end_blk points to the last existing bucket */
5194         end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
5195
5196         /*
5197          * end_blk is the start of the last existing bucket.
5198          * Thus, (end_blk - target_blk) covers the target bucket and
5199          * every bucket after it up to, but not including, the last
5200          * existing bucket.  Then we add the last existing bucket, the
5201          * new bucket, and the first bucket (3 * blk_per_bucket).
5202          */
5203         credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
5204                   handle->h_buffer_credits;
5205         ret = ocfs2_extend_trans(handle, credits);
5206         if (ret) {
5207                 mlog_errno(ret);
5208                 goto out;
5209         }
5210
5211         ret = ocfs2_xattr_bucket_journal_access(handle, first,
5212                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5213         if (ret) {
5214                 mlog_errno(ret);
5215                 goto out;
5216         }
5217
5218         while (end_blk != target_blk) {
5219                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
5220                                             end_blk + blk_per_bucket, 0);
5221                 if (ret)
5222                         goto out;
5223                 end_blk -= blk_per_bucket;
5224         }
5225
5226         /* Move half of the xattr in target_blkno to the next bucket. */
5227         ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
5228                                         target_blk + blk_per_bucket, NULL, 0);
5229
5230         le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
5231         ocfs2_xattr_bucket_journal_dirty(handle, first);
5232
5233 out:
5234         return ret;
5235 }
5236
5237 /*
5238  * Add new xattr bucket in an extent record and adjust the buckets
5239  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
5240  * bucket we want to insert into.
5241  *
5242  * In the easy case, we will move all the buckets after target down by
5243  * one. Half of target's xattrs will be moved to the next bucket.
5244  *
5245  * If current cluster is full, we'll allocate a new one.  This may not
5246  * be contiguous.  The underlying calls will make sure that there is
5247  * space for the insert, shifting buckets around if necessary.
5248  * 'target' may be moved by those calls.
5249  */
5250 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
5251                                       struct buffer_head *xb_bh,
5252                                       struct ocfs2_xattr_bucket *target,
5253                                       struct ocfs2_xattr_set_ctxt *ctxt)
5254 {
5255         struct ocfs2_xattr_block *xb =
5256                         (struct ocfs2_xattr_block *)xb_bh->b_data;
5257         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
5258         struct ocfs2_extent_list *el = &xb_root->xt_list;
5259         u32 name_hash =
5260                 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
5261         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5262         int ret, num_buckets, extend = 1;
5263         u64 p_blkno;
5264         u32 e_cpos, num_clusters;
5265         /* The bucket at the front of the extent */
5266         struct ocfs2_xattr_bucket *first;
5267
5268         mlog(0, "Add new xattr bucket starting from %llu\n",
5269              (unsigned long long)bucket_blkno(target));
5270
5271         /* The first bucket of the original extent */
5272         first = ocfs2_xattr_bucket_new(inode);
5273         if (!first) {
5274                 ret = -ENOMEM;
5275                 mlog_errno(ret);
5276                 goto out;
5277         }
5278
5279         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
5280                                   &num_clusters, el);
5281         if (ret) {
5282                 mlog_errno(ret);
5283                 goto out;
5284         }
5285
5286         ret = ocfs2_read_xattr_bucket(first, p_blkno);
5287         if (ret) {
5288                 mlog_errno(ret);
5289                 goto out;
5290         }
5291
5292         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
5293         if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
5294                 /*
5295                  * This can move first+target if the target bucket moves
5296                  * to the new extent.
5297                  */
5298                 ret = ocfs2_add_new_xattr_cluster(inode,
5299                                                   xb_bh,
5300                                                   first,
5301                                                   target,
5302                                                   &num_clusters,
5303                                                   e_cpos,
5304                                                   &extend,
5305                                                   ctxt);
5306                 if (ret) {
5307                         mlog_errno(ret);
5308                         goto out;
5309                 }
5310         }
5311
5312         if (extend) {
5313                 ret = ocfs2_extend_xattr_bucket(inode,
5314                                                 ctxt->handle,
5315                                                 first,
5316                                                 bucket_blkno(target),
5317                                                 num_clusters);
5318                 if (ret)
5319                         mlog_errno(ret);
5320         }
5321
5322 out:
5323         ocfs2_xattr_bucket_free(first);
5324
5325         return ret;
5326 }
5327
5328 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
5329                                         struct ocfs2_xattr_bucket *bucket,
5330                                         int offs)
5331 {
5332         int block_off = offs >> inode->i_sb->s_blocksize_bits;
5333
5334         offs = offs % inode->i_sb->s_blocksize;
5335         return bucket_block(bucket, block_off) + offs;
5336 }
5337
5338 /*
5339  * Truncate the specified xe_off entry in xattr bucket.
5340  * bucket is indicated by header_bh and len is the new length.
5341  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5342  *
5343  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5344  */
5345 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5346                                              struct ocfs2_xattr_bucket *bucket,
5347                                              int xe_off,
5348                                              int len,
5349                                              struct ocfs2_xattr_set_ctxt *ctxt)
5350 {
5351         int ret, offset;
5352         u64 value_blk;
5353         struct ocfs2_xattr_entry *xe;
5354         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5355         size_t blocksize = inode->i_sb->s_blocksize;
5356         struct ocfs2_xattr_value_buf vb = {
5357                 .vb_access = ocfs2_journal_access,
5358         };
5359
5360         xe = &xh->xh_entries[xe_off];
5361
5362         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5363
5364         offset = le16_to_cpu(xe->xe_name_offset) +
5365                  OCFS2_XATTR_SIZE(xe->xe_name_len);
5366
5367         value_blk = offset / blocksize;
5368
5369         /* We don't allow ocfs2_xattr_value to be stored in different block. */
5370         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5371
5372         vb.vb_bh = bucket->bu_bhs[value_blk];
5373         BUG_ON(!vb.vb_bh);
5374
5375         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5376                 (vb.vb_bh->b_data + offset % blocksize);
5377
5378         /*
5379          * From here on out we have to dirty the bucket.  The generic
5380          * value calls only modify one of the bucket's bhs, but we need
5381          * to send the bucket at once.  So if they error, they *could* have
5382          * modified something.  We have to assume they did, and dirty
5383          * the whole bucket.  This leaves us in a consistent state.
5384          */
5385         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
5386              xe_off, (unsigned long long)bucket_blkno(bucket), len);
5387         ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5388         if (ret) {
5389                 mlog_errno(ret);
5390                 goto out;
5391         }
5392
5393         ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5394                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5395         if (ret) {
5396                 mlog_errno(ret);
5397                 goto out;
5398         }
5399
5400         xe->xe_value_size = cpu_to_le64(len);
5401
5402         ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5403
5404 out:
5405         return ret;
5406 }
5407
5408 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5409                                   struct buffer_head *root_bh,
5410                                   u64 blkno,
5411                                   u32 cpos,
5412                                   u32 len,
5413                                   void *para)
5414 {
5415         int ret;
5416         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5417         struct inode *tl_inode = osb->osb_tl_inode;
5418         handle_t *handle;
5419         struct ocfs2_xattr_block *xb =
5420                         (struct ocfs2_xattr_block *)root_bh->b_data;
5421         struct ocfs2_alloc_context *meta_ac = NULL;
5422         struct ocfs2_cached_dealloc_ctxt dealloc;
5423         struct ocfs2_extent_tree et;
5424
5425         ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5426                                           ocfs2_delete_xattr_in_bucket, para);
5427         if (ret) {
5428                 mlog_errno(ret);
5429                 return ret;
5430         }
5431
5432         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5433
5434         ocfs2_init_dealloc_ctxt(&dealloc);
5435
5436         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
5437              cpos, len, (unsigned long long)blkno);
5438
5439         ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5440                                                len);
5441
5442         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5443         if (ret) {
5444                 mlog_errno(ret);
5445                 return ret;
5446         }
5447
5448         mutex_lock(&tl_inode->i_mutex);
5449
5450         if (ocfs2_truncate_log_needs_flush(osb)) {
5451                 ret = __ocfs2_flush_truncate_log(osb);
5452                 if (ret < 0) {
5453                         mlog_errno(ret);
5454                         goto out;
5455                 }
5456         }
5457
5458         handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5459         if (IS_ERR(handle)) {
5460                 ret = -ENOMEM;
5461                 mlog_errno(ret);
5462                 goto out;
5463         }
5464
5465         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5466                                       OCFS2_JOURNAL_ACCESS_WRITE);
5467         if (ret) {
5468                 mlog_errno(ret);
5469                 goto out_commit;
5470         }
5471
5472         ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5473                                   &dealloc);
5474         if (ret) {
5475                 mlog_errno(ret);
5476                 goto out_commit;
5477         }
5478
5479         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5480
5481         ret = ocfs2_journal_dirty(handle, root_bh);
5482         if (ret) {
5483                 mlog_errno(ret);
5484                 goto out_commit;
5485         }
5486
5487         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5488         if (ret)
5489                 mlog_errno(ret);
5490
5491 out_commit:
5492         ocfs2_commit_trans(osb, handle);
5493 out:
5494         ocfs2_schedule_truncate_log_flush(osb, 1);
5495
5496         mutex_unlock(&tl_inode->i_mutex);
5497
5498         if (meta_ac)
5499                 ocfs2_free_alloc_context(meta_ac);
5500
5501         ocfs2_run_deallocs(osb, &dealloc);
5502
5503         return ret;
5504 }
5505
5506 /*
5507  * check whether the xattr bucket is filled up with the same hash value.
5508  * If we want to insert the xattr with the same hash, return -ENOSPC.
5509  * If we want to insert a xattr with different hash value, go ahead
5510  * and ocfs2_divide_xattr_bucket will handle this.
5511  */
5512 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5513                                               struct ocfs2_xattr_bucket *bucket,
5514                                               const char *name)
5515 {
5516         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5517         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5518
5519         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5520                 return 0;
5521
5522         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5523             xh->xh_entries[0].xe_name_hash) {
5524                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5525                      "hash = %u\n",
5526                      (unsigned long long)bucket_blkno(bucket),
5527                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5528                 return -ENOSPC;
5529         }
5530
5531         return 0;
5532 }
5533
5534 /*
5535  * Try to set the entry in the current bucket.  If we fail, the caller
5536  * will handle getting us another bucket.
5537  */
5538 static int ocfs2_xattr_set_entry_bucket(struct inode *inode,
5539                                         struct ocfs2_xattr_info *xi,
5540                                         struct ocfs2_xattr_search *xs,
5541                                         struct ocfs2_xattr_set_ctxt *ctxt)
5542 {
5543         int ret;
5544         struct ocfs2_xa_loc loc;
5545
5546         mlog_entry("Set xattr %s in xattr bucket\n", xi->xi_name);
5547
5548         ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
5549                                        xs->not_found ? NULL : xs->here);
5550         ret = ocfs2_xa_set(&loc, xi, ctxt);
5551         if (!ret) {
5552                 xs->here = loc.xl_entry;
5553                 goto out;
5554         }
5555         if (ret != -ENOSPC) {
5556                 mlog_errno(ret);
5557                 goto out;
5558         }
5559
5560         /* Ok, we need space.  Let's try defragmenting the bucket. */
5561         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5562                                         xs->bucket);
5563         if (ret) {
5564                 mlog_errno(ret);
5565                 goto out;
5566         }
5567
5568         ret = ocfs2_xa_set(&loc, xi, ctxt);
5569         if (!ret) {
5570                 xs->here = loc.xl_entry;
5571                 goto out;
5572         }
5573         if (ret != -ENOSPC)
5574                 mlog_errno(ret);
5575
5576
5577 out:
5578         mlog_exit(ret);
5579         return ret;
5580 }
5581
5582 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5583                                              struct ocfs2_xattr_info *xi,
5584                                              struct ocfs2_xattr_search *xs,
5585                                              struct ocfs2_xattr_set_ctxt *ctxt)
5586 {
5587         int ret;
5588
5589         mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name);
5590
5591         ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5592         if (!ret)
5593                 goto out;
5594         if (ret != -ENOSPC) {
5595                 mlog_errno(ret);
5596                 goto out;
5597         }
5598
5599         /* Ack, need more space.  Let's try to get another bucket! */
5600
5601         /*
5602          * We do not allow for overlapping ranges between buckets. And
5603          * the maximum number of collisions we will allow for then is
5604          * one bucket's worth, so check it here whether we need to
5605          * add a new bucket for the insert.
5606          */
5607         ret = ocfs2_check_xattr_bucket_collision(inode,
5608                                                  xs->bucket,
5609                                                  xi->xi_name);
5610         if (ret) {
5611                 mlog_errno(ret);
5612                 goto out;
5613         }
5614
5615         ret = ocfs2_add_new_xattr_bucket(inode,
5616                                          xs->xattr_bh,
5617                                          xs->bucket,
5618                                          ctxt);
5619         if (ret) {
5620                 mlog_errno(ret);
5621                 goto out;
5622         }
5623
5624         /*
5625          * ocfs2_add_new_xattr_bucket() will have updated
5626          * xs->bucket if it moved, but it will not have updated
5627          * any of the other search fields.  Thus, we drop it and
5628          * re-search.  Everything should be cached, so it'll be
5629          * quick.
5630          */
5631         ocfs2_xattr_bucket_relse(xs->bucket);
5632         ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5633                                            xi->xi_name_index,
5634                                            xi->xi_name, xs);
5635         if (ret && ret != -ENODATA)
5636                 goto out;
5637         xs->not_found = ret;
5638
5639         /* Ok, we have a new bucket, let's try again */
5640         ret = ocfs2_xattr_set_entry_bucket(inode, xi, xs, ctxt);
5641         if (ret && (ret != -ENOSPC))
5642                 mlog_errno(ret);
5643
5644 out:
5645         mlog_exit(ret);
5646         return ret;
5647 }
5648
5649 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5650                                         struct ocfs2_xattr_bucket *bucket,
5651                                         void *para)
5652 {
5653         int ret = 0, ref_credits;
5654         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5655         u16 i;
5656         struct ocfs2_xattr_entry *xe;
5657         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5658         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5659         int credits = ocfs2_remove_extent_credits(osb->sb) +
5660                 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5661         struct ocfs2_xattr_value_root *xv;
5662         struct ocfs2_rm_xattr_bucket_para *args =
5663                         (struct ocfs2_rm_xattr_bucket_para *)para;
5664
5665         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5666
5667         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5668                 xe = &xh->xh_entries[i];
5669                 if (ocfs2_xattr_is_local(xe))
5670                         continue;
5671
5672                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5673                                                       i, &xv, NULL);
5674
5675                 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5676                                                          args->ref_ci,
5677                                                          args->ref_root_bh,
5678                                                          &ctxt.meta_ac,
5679                                                          &ref_credits);
5680
5681                 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5682                 if (IS_ERR(ctxt.handle)) {
5683                         ret = PTR_ERR(ctxt.handle);
5684                         mlog_errno(ret);
5685                         break;
5686                 }
5687
5688                 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5689                                                         i, 0, &ctxt);
5690
5691                 ocfs2_commit_trans(osb, ctxt.handle);
5692                 if (ctxt.meta_ac) {
5693                         ocfs2_free_alloc_context(ctxt.meta_ac);
5694                         ctxt.meta_ac = NULL;
5695                 }
5696                 if (ret) {
5697                         mlog_errno(ret);
5698                         break;
5699                 }
5700         }
5701
5702         if (ctxt.meta_ac)
5703                 ocfs2_free_alloc_context(ctxt.meta_ac);
5704         ocfs2_schedule_truncate_log_flush(osb, 1);
5705         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5706         return ret;
5707 }
5708
5709 /*
5710  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5711  * or change the extent record flag), we need to recalculate
5712  * the metaecc for the whole bucket. So it is done here.
5713  *
5714  * Note:
5715  * We have to give the extra credits for the caller.
5716  */
5717 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5718                                             handle_t *handle,
5719                                             void *para)
5720 {
5721         int ret;
5722         struct ocfs2_xattr_bucket *bucket =
5723                         (struct ocfs2_xattr_bucket *)para;
5724
5725         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5726                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5727         if (ret) {
5728                 mlog_errno(ret);
5729                 return ret;
5730         }
5731
5732         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5733
5734         return 0;
5735 }
5736
5737 /*
5738  * Special action we need if the xattr value is refcounted.
5739  *
5740  * 1. If the xattr is refcounted, lock the tree.
5741  * 2. CoW the xattr if we are setting the new value and the value
5742  *    will be stored outside.
5743  * 3. In other case, decrease_refcount will work for us, so just
5744  *    lock the refcount tree, calculate the meta and credits is OK.
5745  *
5746  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5747  * currently CoW is a completed transaction, while this function
5748  * will also lock the allocators and let us deadlock. So we will
5749  * CoW the whole xattr value.
5750  */
5751 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5752                                         struct ocfs2_dinode *di,
5753                                         struct ocfs2_xattr_info *xi,
5754                                         struct ocfs2_xattr_search *xis,
5755                                         struct ocfs2_xattr_search *xbs,
5756                                         struct ocfs2_refcount_tree **ref_tree,
5757                                         int *meta_add,
5758                                         int *credits)
5759 {
5760         int ret = 0;
5761         struct ocfs2_xattr_block *xb;
5762         struct ocfs2_xattr_entry *xe;
5763         char *base;
5764         u32 p_cluster, num_clusters;
5765         unsigned int ext_flags;
5766         int name_offset, name_len;
5767         struct ocfs2_xattr_value_buf vb;
5768         struct ocfs2_xattr_bucket *bucket = NULL;
5769         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5770         struct ocfs2_post_refcount refcount;
5771         struct ocfs2_post_refcount *p = NULL;
5772         struct buffer_head *ref_root_bh = NULL;
5773
5774         if (!xis->not_found) {
5775                 xe = xis->here;
5776                 name_offset = le16_to_cpu(xe->xe_name_offset);
5777                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5778                 base = xis->base;
5779                 vb.vb_bh = xis->inode_bh;
5780                 vb.vb_access = ocfs2_journal_access_di;
5781         } else {
5782                 int i, block_off = 0;
5783                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5784                 xe = xbs->here;
5785                 name_offset = le16_to_cpu(xe->xe_name_offset);
5786                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5787                 i = xbs->here - xbs->header->xh_entries;
5788
5789                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5790                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5791                                                         bucket_xh(xbs->bucket),
5792                                                         i, &block_off,
5793                                                         &name_offset);
5794                         if (ret) {
5795                                 mlog_errno(ret);
5796                                 goto out;
5797                         }
5798                         base = bucket_block(xbs->bucket, block_off);
5799                         vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5800                         vb.vb_access = ocfs2_journal_access;
5801
5802                         if (ocfs2_meta_ecc(osb)) {
5803                                 /*create parameters for ocfs2_post_refcount. */
5804                                 bucket = xbs->bucket;
5805                                 refcount.credits = bucket->bu_blocks;
5806                                 refcount.para = bucket;
5807                                 refcount.func =
5808                                         ocfs2_xattr_bucket_post_refcount;
5809                                 p = &refcount;
5810                         }
5811                 } else {
5812                         base = xbs->base;
5813                         vb.vb_bh = xbs->xattr_bh;
5814                         vb.vb_access = ocfs2_journal_access_xb;
5815                 }
5816         }
5817
5818         if (ocfs2_xattr_is_local(xe))
5819                 goto out;
5820
5821         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5822                                 (base + name_offset + name_len);
5823
5824         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5825                                        &num_clusters, &vb.vb_xv->xr_list,
5826                                        &ext_flags);
5827         if (ret) {
5828                 mlog_errno(ret);
5829                 goto out;
5830         }
5831
5832         /*
5833          * We just need to check the 1st extent record, since we always
5834          * CoW the whole xattr. So there shouldn't be a xattr with
5835          * some REFCOUNT extent recs after the 1st one.
5836          */
5837         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5838                 goto out;
5839
5840         ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5841                                        1, ref_tree, &ref_root_bh);
5842         if (ret) {
5843                 mlog_errno(ret);
5844                 goto out;
5845         }
5846
5847         /*
5848          * If we are deleting the xattr or the new size will be stored inside,
5849          * cool, leave it there, the xattr truncate process will remove them
5850          * for us(it still needs the refcount tree lock and the meta, credits).
5851          * And the worse case is that every cluster truncate will split the
5852          * refcount tree, and make the original extent become 3. So we will need
5853          * 2 * cluster more extent recs at most.
5854          */
5855         if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5856
5857                 ret = ocfs2_refcounted_xattr_delete_need(inode,
5858                                                          &(*ref_tree)->rf_ci,
5859                                                          ref_root_bh, vb.vb_xv,
5860                                                          meta_add, credits);
5861                 if (ret)
5862                         mlog_errno(ret);
5863                 goto out;
5864         }
5865
5866         ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5867                                        *ref_tree, ref_root_bh, 0,
5868                                        le32_to_cpu(vb.vb_xv->xr_clusters), p);
5869         if (ret)
5870                 mlog_errno(ret);
5871
5872 out:
5873         brelse(ref_root_bh);
5874         return ret;
5875 }
5876
5877 /*
5878  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5879  * The physical clusters will be added to refcount tree.
5880  */
5881 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5882                                 struct ocfs2_xattr_value_root *xv,
5883                                 struct ocfs2_extent_tree *value_et,
5884                                 struct ocfs2_caching_info *ref_ci,
5885                                 struct buffer_head *ref_root_bh,
5886                                 struct ocfs2_cached_dealloc_ctxt *dealloc,
5887                                 struct ocfs2_post_refcount *refcount)
5888 {
5889         int ret = 0;
5890         u32 clusters = le32_to_cpu(xv->xr_clusters);
5891         u32 cpos, p_cluster, num_clusters;
5892         struct ocfs2_extent_list *el = &xv->xr_list;
5893         unsigned int ext_flags;
5894
5895         cpos = 0;
5896         while (cpos < clusters) {
5897                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5898                                                &num_clusters, el, &ext_flags);
5899
5900                 cpos += num_clusters;
5901                 if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5902                         continue;
5903
5904                 BUG_ON(!p_cluster);
5905
5906                 ret = ocfs2_add_refcount_flag(inode, value_et,
5907                                               ref_ci, ref_root_bh,
5908                                               cpos - num_clusters,
5909                                               p_cluster, num_clusters,
5910                                               dealloc, refcount);
5911                 if (ret) {
5912                         mlog_errno(ret);
5913                         break;
5914                 }
5915         }
5916
5917         return ret;
5918 }
5919
5920 /*
5921  * Given a normal ocfs2_xattr_header, refcount all the entries which
5922  * have value stored outside.
5923  * Used for xattrs stored in inode and ocfs2_xattr_block.
5924  */
5925 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5926                                 struct ocfs2_xattr_value_buf *vb,
5927                                 struct ocfs2_xattr_header *header,
5928                                 struct ocfs2_caching_info *ref_ci,
5929                                 struct buffer_head *ref_root_bh,
5930                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
5931 {
5932
5933         struct ocfs2_xattr_entry *xe;
5934         struct ocfs2_xattr_value_root *xv;
5935         struct ocfs2_extent_tree et;
5936         int i, ret = 0;
5937
5938         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5939                 xe = &header->xh_entries[i];
5940
5941                 if (ocfs2_xattr_is_local(xe))
5942                         continue;
5943
5944                 xv = (struct ocfs2_xattr_value_root *)((void *)header +
5945                         le16_to_cpu(xe->xe_name_offset) +
5946                         OCFS2_XATTR_SIZE(xe->xe_name_len));
5947
5948                 vb->vb_xv = xv;
5949                 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5950
5951                 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5952                                                         ref_ci, ref_root_bh,
5953                                                         dealloc, NULL);
5954                 if (ret) {
5955                         mlog_errno(ret);
5956                         break;
5957                 }
5958         }
5959
5960         return ret;
5961 }
5962
5963 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
5964                                 struct buffer_head *fe_bh,
5965                                 struct ocfs2_caching_info *ref_ci,
5966                                 struct buffer_head *ref_root_bh,
5967                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
5968 {
5969         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
5970         struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
5971                                 (fe_bh->b_data + inode->i_sb->s_blocksize -
5972                                 le16_to_cpu(di->i_xattr_inline_size));
5973         struct ocfs2_xattr_value_buf vb = {
5974                 .vb_bh = fe_bh,
5975                 .vb_access = ocfs2_journal_access_di,
5976         };
5977
5978         return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
5979                                                   ref_ci, ref_root_bh, dealloc);
5980 }
5981
5982 struct ocfs2_xattr_tree_value_refcount_para {
5983         struct ocfs2_caching_info *ref_ci;
5984         struct buffer_head *ref_root_bh;
5985         struct ocfs2_cached_dealloc_ctxt *dealloc;
5986 };
5987
5988 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
5989                                            struct ocfs2_xattr_bucket *bucket,
5990                                            int offset,
5991                                            struct ocfs2_xattr_value_root **xv,
5992                                            struct buffer_head **bh)
5993 {
5994         int ret, block_off, name_offset;
5995         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5996         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
5997         void *base;
5998
5999         ret = ocfs2_xattr_bucket_get_name_value(sb,
6000                                                 bucket_xh(bucket),
6001                                                 offset,
6002                                                 &block_off,
6003                                                 &name_offset);
6004         if (ret) {
6005                 mlog_errno(ret);
6006                 goto out;
6007         }
6008
6009         base = bucket_block(bucket, block_off);
6010
6011         *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6012                          OCFS2_XATTR_SIZE(xe->xe_name_len));
6013
6014         if (bh)
6015                 *bh = bucket->bu_bhs[block_off];
6016 out:
6017         return ret;
6018 }
6019
6020 /*
6021  * For a given xattr bucket, refcount all the entries which
6022  * have value stored outside.
6023  */
6024 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6025                                              struct ocfs2_xattr_bucket *bucket,
6026                                              void *para)
6027 {
6028         int i, ret = 0;
6029         struct ocfs2_extent_tree et;
6030         struct ocfs2_xattr_tree_value_refcount_para *ref =
6031                         (struct ocfs2_xattr_tree_value_refcount_para *)para;
6032         struct ocfs2_xattr_header *xh =
6033                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6034         struct ocfs2_xattr_entry *xe;
6035         struct ocfs2_xattr_value_buf vb = {
6036                 .vb_access = ocfs2_journal_access,
6037         };
6038         struct ocfs2_post_refcount refcount = {
6039                 .credits = bucket->bu_blocks,
6040                 .para = bucket,
6041                 .func = ocfs2_xattr_bucket_post_refcount,
6042         };
6043         struct ocfs2_post_refcount *p = NULL;
6044
6045         /* We only need post_refcount if we support metaecc. */
6046         if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6047                 p = &refcount;
6048
6049         mlog(0, "refcount bucket %llu, count = %u\n",
6050              (unsigned long long)bucket_blkno(bucket),
6051              le16_to_cpu(xh->xh_count));
6052         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6053                 xe = &xh->xh_entries[i];
6054
6055                 if (ocfs2_xattr_is_local(xe))
6056                         continue;
6057
6058                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6059                                                       &vb.vb_xv, &vb.vb_bh);
6060                 if (ret) {
6061                         mlog_errno(ret);
6062                         break;
6063                 }
6064
6065                 ocfs2_init_xattr_value_extent_tree(&et,
6066                                                    INODE_CACHE(inode), &vb);
6067
6068                 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6069                                                         &et, ref->ref_ci,
6070                                                         ref->ref_root_bh,
6071                                                         ref->dealloc, p);
6072                 if (ret) {
6073                         mlog_errno(ret);
6074                         break;
6075                 }
6076         }
6077
6078         return ret;
6079
6080 }
6081
6082 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6083                                      struct buffer_head *root_bh,
6084                                      u64 blkno, u32 cpos, u32 len, void *para)
6085 {
6086         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6087                                            ocfs2_xattr_bucket_value_refcount,
6088                                            para);
6089 }
6090
6091 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6092                                 struct buffer_head *blk_bh,
6093                                 struct ocfs2_caching_info *ref_ci,
6094                                 struct buffer_head *ref_root_bh,
6095                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6096 {
6097         int ret = 0;
6098         struct ocfs2_xattr_block *xb =
6099                                 (struct ocfs2_xattr_block *)blk_bh->b_data;
6100
6101         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6102                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6103                 struct ocfs2_xattr_value_buf vb = {
6104                         .vb_bh = blk_bh,
6105                         .vb_access = ocfs2_journal_access_xb,
6106                 };
6107
6108                 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6109                                                          ref_ci, ref_root_bh,
6110                                                          dealloc);
6111         } else {
6112                 struct ocfs2_xattr_tree_value_refcount_para para = {
6113                         .ref_ci = ref_ci,
6114                         .ref_root_bh = ref_root_bh,
6115                         .dealloc = dealloc,
6116                 };
6117
6118                 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6119                                                 ocfs2_refcount_xattr_tree_rec,
6120                                                 &para);
6121         }
6122
6123         return ret;
6124 }
6125
6126 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6127                                      struct buffer_head *fe_bh,
6128                                      struct ocfs2_caching_info *ref_ci,
6129                                      struct buffer_head *ref_root_bh,
6130                                      struct ocfs2_cached_dealloc_ctxt *dealloc)
6131 {
6132         int ret = 0;
6133         struct ocfs2_inode_info *oi = OCFS2_I(inode);
6134         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6135         struct buffer_head *blk_bh = NULL;
6136
6137         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6138                 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6139                                                          ref_ci, ref_root_bh,
6140                                                          dealloc);
6141                 if (ret) {
6142                         mlog_errno(ret);
6143                         goto out;
6144                 }
6145         }
6146
6147         if (!di->i_xattr_loc)
6148                 goto out;
6149
6150         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6151                                      &blk_bh);
6152         if (ret < 0) {
6153                 mlog_errno(ret);
6154                 goto out;
6155         }
6156
6157         ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6158                                                 ref_root_bh, dealloc);
6159         if (ret)
6160                 mlog_errno(ret);
6161
6162         brelse(blk_bh);
6163 out:
6164
6165         return ret;
6166 }
6167
6168 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6169 /*
6170  * Store the information we need in xattr reflink.
6171  * old_bh and new_bh are inode bh for the old and new inode.
6172  */
6173 struct ocfs2_xattr_reflink {
6174         struct inode *old_inode;
6175         struct inode *new_inode;
6176         struct buffer_head *old_bh;
6177         struct buffer_head *new_bh;
6178         struct ocfs2_caching_info *ref_ci;
6179         struct buffer_head *ref_root_bh;
6180         struct ocfs2_cached_dealloc_ctxt *dealloc;
6181         should_xattr_reflinked *xattr_reflinked;
6182 };
6183
6184 /*
6185  * Given a xattr header and xe offset,
6186  * return the proper xv and the corresponding bh.
6187  * xattr in inode, block and xattr tree have different implementaions.
6188  */
6189 typedef int (get_xattr_value_root)(struct super_block *sb,
6190                                    struct buffer_head *bh,
6191                                    struct ocfs2_xattr_header *xh,
6192                                    int offset,
6193                                    struct ocfs2_xattr_value_root **xv,
6194                                    struct buffer_head **ret_bh,
6195                                    void *para);
6196
6197 /*
6198  * Calculate all the xattr value root metadata stored in this xattr header and
6199  * credits we need if we create them from the scratch.
6200  * We use get_xattr_value_root so that all types of xattr container can use it.
6201  */
6202 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6203                                              struct buffer_head *bh,
6204                                              struct ocfs2_xattr_header *xh,
6205                                              int *metas, int *credits,
6206                                              int *num_recs,
6207                                              get_xattr_value_root *func,
6208                                              void *para)
6209 {
6210         int i, ret = 0;
6211         struct ocfs2_xattr_value_root *xv;
6212         struct ocfs2_xattr_entry *xe;
6213
6214         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6215                 xe = &xh->xh_entries[i];
6216                 if (ocfs2_xattr_is_local(xe))
6217                         continue;
6218
6219                 ret = func(sb, bh, xh, i, &xv, NULL, para);
6220                 if (ret) {
6221                         mlog_errno(ret);
6222                         break;
6223                 }
6224
6225                 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6226                           le16_to_cpu(xv->xr_list.l_next_free_rec);
6227
6228                 *credits += ocfs2_calc_extend_credits(sb,
6229                                                 &def_xv.xv.xr_list,
6230                                                 le32_to_cpu(xv->xr_clusters));
6231
6232                 /*
6233                  * If the value is a tree with depth > 1, We don't go deep
6234                  * to the extent block, so just calculate a maximum record num.
6235                  */
6236                 if (!xv->xr_list.l_tree_depth)
6237                         *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6238                 else
6239                         *num_recs += ocfs2_clusters_for_bytes(sb,
6240                                                               XATTR_SIZE_MAX);
6241         }
6242
6243         return ret;
6244 }
6245
6246 /* Used by xattr inode and block to return the right xv and buffer_head. */
6247 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6248                                       struct buffer_head *bh,
6249                                       struct ocfs2_xattr_header *xh,
6250                                       int offset,
6251                                       struct ocfs2_xattr_value_root **xv,
6252                                       struct buffer_head **ret_bh,
6253                                       void *para)
6254 {
6255         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6256
6257         *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6258                 le16_to_cpu(xe->xe_name_offset) +
6259                 OCFS2_XATTR_SIZE(xe->xe_name_len));
6260
6261         if (ret_bh)
6262                 *ret_bh = bh;
6263
6264         return 0;
6265 }
6266
6267 /*
6268  * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6269  * It is only used for inline xattr and xattr block.
6270  */
6271 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6272                                         struct ocfs2_xattr_header *xh,
6273                                         struct buffer_head *ref_root_bh,
6274                                         int *credits,
6275                                         struct ocfs2_alloc_context **meta_ac)
6276 {
6277         int ret, meta_add = 0, num_recs = 0;
6278         struct ocfs2_refcount_block *rb =
6279                         (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6280
6281         *credits = 0;
6282
6283         ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6284                                                 &meta_add, credits, &num_recs,
6285                                                 ocfs2_get_xattr_value_root,
6286                                                 NULL);
6287         if (ret) {
6288                 mlog_errno(ret);
6289                 goto out;
6290         }
6291
6292         /*
6293          * We need to add/modify num_recs in refcount tree, so just calculate
6294          * an approximate number we need for refcount tree change.
6295          * Sometimes we need to split the tree, and after split,  half recs
6296          * will be moved to the new block, and a new block can only provide
6297          * half number of recs. So we multiple new blocks by 2.
6298          */
6299         num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6300         meta_add += num_recs;
6301         *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6302         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6303                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6304                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6305         else
6306                 *credits += 1;
6307
6308         ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6309         if (ret)
6310                 mlog_errno(ret);
6311
6312 out:
6313         return ret;
6314 }
6315
6316 /*
6317  * Given a xattr header, reflink all the xattrs in this container.
6318  * It can be used for inode, block and bucket.
6319  *
6320  * NOTE:
6321  * Before we call this function, the caller has memcpy the xattr in
6322  * old_xh to the new_xh.
6323  *
6324  * If args.xattr_reflinked is set, call it to decide whether the xe should
6325  * be reflinked or not. If not, remove it from the new xattr header.
6326  */
6327 static int ocfs2_reflink_xattr_header(handle_t *handle,
6328                                       struct ocfs2_xattr_reflink *args,
6329                                       struct buffer_head *old_bh,
6330                                       struct ocfs2_xattr_header *xh,
6331                                       struct buffer_head *new_bh,
6332                                       struct ocfs2_xattr_header *new_xh,
6333                                       struct ocfs2_xattr_value_buf *vb,
6334                                       struct ocfs2_alloc_context *meta_ac,
6335                                       get_xattr_value_root *func,
6336                                       void *para)
6337 {
6338         int ret = 0, i, j;
6339         struct super_block *sb = args->old_inode->i_sb;
6340         struct buffer_head *value_bh;
6341         struct ocfs2_xattr_entry *xe, *last;
6342         struct ocfs2_xattr_value_root *xv, *new_xv;
6343         struct ocfs2_extent_tree data_et;
6344         u32 clusters, cpos, p_cluster, num_clusters;
6345         unsigned int ext_flags = 0;
6346
6347         mlog(0, "reflink xattr in container %llu, count = %u\n",
6348              (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6349
6350         last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6351         for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6352                 xe = &xh->xh_entries[i];
6353
6354                 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6355                         xe = &new_xh->xh_entries[j];
6356
6357                         le16_add_cpu(&new_xh->xh_count, -1);
6358                         if (new_xh->xh_count) {
6359                                 memmove(xe, xe + 1,
6360                                         (void *)last - (void *)xe);
6361                                 memset(last, 0,
6362                                        sizeof(struct ocfs2_xattr_entry));
6363                         }
6364
6365                         /*
6366                          * We don't want j to increase in the next round since
6367                          * it is already moved ahead.
6368                          */
6369                         j--;
6370                         continue;
6371                 }
6372
6373                 if (ocfs2_xattr_is_local(xe))
6374                         continue;
6375
6376                 ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6377                 if (ret) {
6378                         mlog_errno(ret);
6379                         break;
6380                 }
6381
6382                 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6383                 if (ret) {
6384                         mlog_errno(ret);
6385                         break;
6386                 }
6387
6388                 /*
6389                  * For the xattr which has l_tree_depth = 0, all the extent
6390                  * recs have already be copied to the new xh with the
6391                  * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6392                  * increase the refount count int the refcount tree.
6393                  *
6394                  * For the xattr which has l_tree_depth > 0, we need
6395                  * to initialize it to the empty default value root,
6396                  * and then insert the extents one by one.
6397                  */
6398                 if (xv->xr_list.l_tree_depth) {
6399                         memcpy(new_xv, &def_xv, sizeof(def_xv));
6400                         vb->vb_xv = new_xv;
6401                         vb->vb_bh = value_bh;
6402                         ocfs2_init_xattr_value_extent_tree(&data_et,
6403                                         INODE_CACHE(args->new_inode), vb);
6404                 }
6405
6406                 clusters = le32_to_cpu(xv->xr_clusters);
6407                 cpos = 0;
6408                 while (cpos < clusters) {
6409                         ret = ocfs2_xattr_get_clusters(args->old_inode,
6410                                                        cpos,
6411                                                        &p_cluster,
6412                                                        &num_clusters,
6413                                                        &xv->xr_list,
6414                                                        &ext_flags);
6415                         if (ret) {
6416                                 mlog_errno(ret);
6417                                 goto out;
6418                         }
6419
6420                         BUG_ON(!p_cluster);
6421
6422                         if (xv->xr_list.l_tree_depth) {
6423                                 ret = ocfs2_insert_extent(handle,
6424                                                 &data_et, cpos,
6425                                                 ocfs2_clusters_to_blocks(
6426                                                         args->old_inode->i_sb,
6427                                                         p_cluster),
6428                                                 num_clusters, ext_flags,
6429                                                 meta_ac);
6430                                 if (ret) {
6431                                         mlog_errno(ret);
6432                                         goto out;
6433                                 }
6434                         }
6435
6436                         ret = ocfs2_increase_refcount(handle, args->ref_ci,
6437                                                       args->ref_root_bh,
6438                                                       p_cluster, num_clusters,
6439                                                       meta_ac, args->dealloc);
6440                         if (ret) {
6441                                 mlog_errno(ret);
6442                                 goto out;
6443                         }
6444
6445                         cpos += num_clusters;
6446                 }
6447         }
6448
6449 out:
6450         return ret;
6451 }
6452
6453 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6454 {
6455         int ret = 0, credits = 0;
6456         handle_t *handle;
6457         struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6458         struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6459         int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6460         int header_off = osb->sb->s_blocksize - inline_size;
6461         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6462                                         (args->old_bh->b_data + header_off);
6463         struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6464                                         (args->new_bh->b_data + header_off);
6465         struct ocfs2_alloc_context *meta_ac = NULL;
6466         struct ocfs2_inode_info *new_oi;
6467         struct ocfs2_dinode *new_di;
6468         struct ocfs2_xattr_value_buf vb = {
6469                 .vb_bh = args->new_bh,
6470                 .vb_access = ocfs2_journal_access_di,
6471         };
6472
6473         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6474                                                   &credits, &meta_ac);
6475         if (ret) {
6476                 mlog_errno(ret);
6477                 goto out;
6478         }
6479
6480         handle = ocfs2_start_trans(osb, credits);
6481         if (IS_ERR(handle)) {
6482                 ret = PTR_ERR(handle);
6483                 mlog_errno(ret);
6484                 goto out;
6485         }
6486
6487         ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6488                                       args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6489         if (ret) {
6490                 mlog_errno(ret);
6491                 goto out_commit;
6492         }
6493
6494         memcpy(args->new_bh->b_data + header_off,
6495                args->old_bh->b_data + header_off, inline_size);
6496
6497         new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6498         new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6499
6500         ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6501                                          args->new_bh, new_xh, &vb, meta_ac,
6502                                          ocfs2_get_xattr_value_root, NULL);
6503         if (ret) {
6504                 mlog_errno(ret);
6505                 goto out_commit;
6506         }
6507
6508         new_oi = OCFS2_I(args->new_inode);
6509         spin_lock(&new_oi->ip_lock);
6510         new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6511         new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6512         spin_unlock(&new_oi->ip_lock);
6513
6514         ocfs2_journal_dirty(handle, args->new_bh);
6515
6516 out_commit:
6517         ocfs2_commit_trans(osb, handle);
6518
6519 out:
6520         if (meta_ac)
6521                 ocfs2_free_alloc_context(meta_ac);
6522         return ret;
6523 }
6524
6525 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6526                                           struct buffer_head *fe_bh,
6527                                           struct buffer_head **ret_bh,
6528                                           int indexed)
6529 {
6530         int ret;
6531         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6532         struct ocfs2_xattr_set_ctxt ctxt;
6533
6534         memset(&ctxt, 0, sizeof(ctxt));
6535         ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &ctxt.meta_ac);
6536         if (ret < 0) {
6537                 mlog_errno(ret);
6538                 return ret;
6539         }
6540
6541         ctxt.handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6542         if (IS_ERR(ctxt.handle)) {
6543                 ret = PTR_ERR(ctxt.handle);
6544                 mlog_errno(ret);
6545                 goto out;
6546         }
6547
6548         mlog(0, "create new xattr block for inode %llu, index = %d\n",
6549              (unsigned long long)fe_bh->b_blocknr, indexed);
6550         ret = ocfs2_create_xattr_block(inode, fe_bh, &ctxt, indexed,
6551                                        ret_bh);
6552         if (ret)
6553                 mlog_errno(ret);
6554
6555         ocfs2_commit_trans(osb, ctxt.handle);
6556 out:
6557         ocfs2_free_alloc_context(ctxt.meta_ac);
6558         return ret;
6559 }
6560
6561 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6562                                      struct buffer_head *blk_bh,
6563                                      struct buffer_head *new_blk_bh)
6564 {
6565         int ret = 0, credits = 0;
6566         handle_t *handle;
6567         struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6568         struct ocfs2_dinode *new_di;
6569         struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6570         int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6571         struct ocfs2_xattr_block *xb =
6572                         (struct ocfs2_xattr_block *)blk_bh->b_data;
6573         struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6574         struct ocfs2_xattr_block *new_xb =
6575                         (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6576         struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6577         struct ocfs2_alloc_context *meta_ac;
6578         struct ocfs2_xattr_value_buf vb = {
6579                 .vb_bh = new_blk_bh,
6580                 .vb_access = ocfs2_journal_access_xb,
6581         };
6582
6583         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6584                                                   &credits, &meta_ac);
6585         if (ret) {
6586                 mlog_errno(ret);
6587                 return ret;
6588         }
6589
6590         /* One more credits in case we need to add xattr flags in new inode. */
6591         handle = ocfs2_start_trans(osb, credits + 1);
6592         if (IS_ERR(handle)) {
6593                 ret = PTR_ERR(handle);
6594                 mlog_errno(ret);
6595                 goto out;
6596         }
6597
6598         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6599                 ret = ocfs2_journal_access_di(handle,
6600                                               INODE_CACHE(args->new_inode),
6601                                               args->new_bh,
6602                                               OCFS2_JOURNAL_ACCESS_WRITE);
6603                 if (ret) {
6604                         mlog_errno(ret);
6605                         goto out_commit;
6606                 }
6607         }
6608
6609         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6610                                       new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6611         if (ret) {
6612                 mlog_errno(ret);
6613                 goto out_commit;
6614         }
6615
6616         memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6617                osb->sb->s_blocksize - header_off);
6618
6619         ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6620                                          new_blk_bh, new_xh, &vb, meta_ac,
6621                                          ocfs2_get_xattr_value_root, NULL);
6622         if (ret) {
6623                 mlog_errno(ret);
6624                 goto out_commit;
6625         }
6626
6627         ocfs2_journal_dirty(handle, new_blk_bh);
6628
6629         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6630                 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6631                 spin_lock(&new_oi->ip_lock);
6632                 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6633                 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6634                 spin_unlock(&new_oi->ip_lock);
6635
6636                 ocfs2_journal_dirty(handle, args->new_bh);
6637         }
6638
6639 out_commit:
6640         ocfs2_commit_trans(osb, handle);
6641
6642 out:
6643         ocfs2_free_alloc_context(meta_ac);
6644         return ret;
6645 }
6646
6647 struct ocfs2_reflink_xattr_tree_args {
6648         struct ocfs2_xattr_reflink *reflink;
6649         struct buffer_head *old_blk_bh;
6650         struct buffer_head *new_blk_bh;
6651         struct ocfs2_xattr_bucket *old_bucket;
6652         struct ocfs2_xattr_bucket *new_bucket;
6653 };
6654
6655 /*
6656  * NOTE:
6657  * We have to handle the case that both old bucket and new bucket
6658  * will call this function to get the right ret_bh.
6659  * So The caller must give us the right bh.
6660  */
6661 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6662                                         struct buffer_head *bh,
6663                                         struct ocfs2_xattr_header *xh,
6664                                         int offset,
6665                                         struct ocfs2_xattr_value_root **xv,
6666                                         struct buffer_head **ret_bh,
6667                                         void *para)
6668 {
6669         struct ocfs2_reflink_xattr_tree_args *args =
6670                         (struct ocfs2_reflink_xattr_tree_args *)para;
6671         struct ocfs2_xattr_bucket *bucket;
6672
6673         if (bh == args->old_bucket->bu_bhs[0])
6674                 bucket = args->old_bucket;
6675         else
6676                 bucket = args->new_bucket;
6677
6678         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6679                                                xv, ret_bh);
6680 }
6681
6682 struct ocfs2_value_tree_metas {
6683         int num_metas;
6684         int credits;
6685         int num_recs;
6686 };
6687
6688 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6689                                         struct buffer_head *bh,
6690                                         struct ocfs2_xattr_header *xh,
6691                                         int offset,
6692                                         struct ocfs2_xattr_value_root **xv,
6693                                         struct buffer_head **ret_bh,
6694                                         void *para)
6695 {
6696         struct ocfs2_xattr_bucket *bucket =
6697                                 (struct ocfs2_xattr_bucket *)para;
6698
6699         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6700                                                xv, ret_bh);
6701 }
6702
6703 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6704                                       struct ocfs2_xattr_bucket *bucket,
6705                                       void *para)
6706 {
6707         struct ocfs2_value_tree_metas *metas =
6708                         (struct ocfs2_value_tree_metas *)para;
6709         struct ocfs2_xattr_header *xh =
6710                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6711
6712         /* Add the credits for this bucket first. */
6713         metas->credits += bucket->bu_blocks;
6714         return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6715                                         xh, &metas->num_metas,
6716                                         &metas->credits, &metas->num_recs,
6717                                         ocfs2_value_tree_metas_in_bucket,
6718                                         bucket);
6719 }
6720
6721 /*
6722  * Given a xattr extent rec starting from blkno and having len clusters,
6723  * iterate all the buckets calculate how much metadata we need for reflinking
6724  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6725  */
6726 static int ocfs2_lock_reflink_xattr_rec_allocators(
6727                                 struct ocfs2_reflink_xattr_tree_args *args,
6728                                 struct ocfs2_extent_tree *xt_et,
6729                                 u64 blkno, u32 len, int *credits,
6730                                 struct ocfs2_alloc_context **meta_ac,
6731                                 struct ocfs2_alloc_context **data_ac)
6732 {
6733         int ret, num_free_extents;
6734         struct ocfs2_value_tree_metas metas;
6735         struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6736         struct ocfs2_refcount_block *rb;
6737
6738         memset(&metas, 0, sizeof(metas));
6739
6740         ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6741                                           ocfs2_calc_value_tree_metas, &metas);
6742         if (ret) {
6743                 mlog_errno(ret);
6744                 goto out;
6745         }
6746
6747         *credits = metas.credits;
6748
6749         /*
6750          * Calculate we need for refcount tree change.
6751          *
6752          * We need to add/modify num_recs in refcount tree, so just calculate
6753          * an approximate number we need for refcount tree change.
6754          * Sometimes we need to split the tree, and after split,  half recs
6755          * will be moved to the new block, and a new block can only provide
6756          * half number of recs. So we multiple new blocks by 2.
6757          * In the end, we have to add credits for modifying the already
6758          * existed refcount block.
6759          */
6760         rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6761         metas.num_recs =
6762                 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6763                  ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6764         metas.num_metas += metas.num_recs;
6765         *credits += metas.num_recs +
6766                     metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6767         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6768                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6769                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6770         else
6771                 *credits += 1;
6772
6773         /* count in the xattr tree change. */
6774         num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6775         if (num_free_extents < 0) {
6776                 ret = num_free_extents;
6777                 mlog_errno(ret);
6778                 goto out;
6779         }
6780
6781         if (num_free_extents < len)
6782                 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6783
6784         *credits += ocfs2_calc_extend_credits(osb->sb,
6785                                               xt_et->et_root_el, len);
6786
6787         if (metas.num_metas) {
6788                 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6789                                                         meta_ac);
6790                 if (ret) {
6791                         mlog_errno(ret);
6792                         goto out;
6793                 }
6794         }
6795
6796         if (len) {
6797                 ret = ocfs2_reserve_clusters(osb, len, data_ac);
6798                 if (ret)
6799                         mlog_errno(ret);
6800         }
6801 out:
6802         if (ret) {
6803                 if (*meta_ac) {
6804                         ocfs2_free_alloc_context(*meta_ac);
6805                         meta_ac = NULL;
6806                 }
6807         }
6808
6809         return ret;
6810 }
6811
6812 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6813                                 u64 blkno, u64 new_blkno, u32 clusters,
6814                                 struct ocfs2_alloc_context *meta_ac,
6815                                 struct ocfs2_alloc_context *data_ac,
6816                                 struct ocfs2_reflink_xattr_tree_args *args)
6817 {
6818         int i, j, ret = 0;
6819         struct super_block *sb = args->reflink->old_inode->i_sb;
6820         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6821         u32 num_buckets = clusters * bpc;
6822         int bpb = args->old_bucket->bu_blocks;
6823         struct ocfs2_xattr_value_buf vb = {
6824                 .vb_access = ocfs2_journal_access,
6825         };
6826
6827         for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6828                 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6829                 if (ret) {
6830                         mlog_errno(ret);
6831                         break;
6832                 }
6833
6834                 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
6835                 if (ret) {
6836                         mlog_errno(ret);
6837                         break;
6838                 }
6839
6840                 /*
6841                  * The real bucket num in this series of blocks is stored
6842                  * in the 1st bucket.
6843                  */
6844                 if (i == 0)
6845                         num_buckets = le16_to_cpu(
6846                                 bucket_xh(args->old_bucket)->xh_num_buckets);
6847
6848                 ret = ocfs2_xattr_bucket_journal_access(handle,
6849                                                 args->new_bucket,
6850                                                 OCFS2_JOURNAL_ACCESS_CREATE);
6851                 if (ret) {
6852                         mlog_errno(ret);
6853                         break;
6854                 }
6855
6856                 for (j = 0; j < bpb; j++)
6857                         memcpy(bucket_block(args->new_bucket, j),
6858                                bucket_block(args->old_bucket, j),
6859                                sb->s_blocksize);
6860
6861                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6862
6863                 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6864                                         args->old_bucket->bu_bhs[0],
6865                                         bucket_xh(args->old_bucket),
6866                                         args->new_bucket->bu_bhs[0],
6867                                         bucket_xh(args->new_bucket),
6868                                         &vb, meta_ac,
6869                                         ocfs2_get_reflink_xattr_value_root,
6870                                         args);
6871                 if (ret) {
6872                         mlog_errno(ret);
6873                         break;
6874                 }
6875
6876                 /*
6877                  * Re-access and dirty the bucket to calculate metaecc.
6878                  * Because we may extend the transaction in reflink_xattr_header
6879                  * which will let the already accessed block gone.
6880                  */
6881                 ret = ocfs2_xattr_bucket_journal_access(handle,
6882                                                 args->new_bucket,
6883                                                 OCFS2_JOURNAL_ACCESS_WRITE);
6884                 if (ret) {
6885                         mlog_errno(ret);
6886                         break;
6887                 }
6888
6889                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6890                 ocfs2_xattr_bucket_relse(args->old_bucket);
6891                 ocfs2_xattr_bucket_relse(args->new_bucket);
6892         }
6893
6894         ocfs2_xattr_bucket_relse(args->old_bucket);
6895         ocfs2_xattr_bucket_relse(args->new_bucket);
6896         return ret;
6897 }
6898 /*
6899  * Create the same xattr extent record in the new inode's xattr tree.
6900  */
6901 static int ocfs2_reflink_xattr_rec(struct inode *inode,
6902                                    struct buffer_head *root_bh,
6903                                    u64 blkno,
6904                                    u32 cpos,
6905                                    u32 len,
6906                                    void *para)
6907 {
6908         int ret, credits = 0;
6909         u32 p_cluster, num_clusters;
6910         u64 new_blkno;
6911         handle_t *handle;
6912         struct ocfs2_reflink_xattr_tree_args *args =
6913                         (struct ocfs2_reflink_xattr_tree_args *)para;
6914         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6915         struct ocfs2_alloc_context *meta_ac = NULL;
6916         struct ocfs2_alloc_context *data_ac = NULL;
6917         struct ocfs2_extent_tree et;
6918
6919         ocfs2_init_xattr_tree_extent_tree(&et,
6920                                           INODE_CACHE(args->reflink->new_inode),
6921                                           args->new_blk_bh);
6922
6923         ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
6924                                                       len, &credits,
6925                                                       &meta_ac, &data_ac);
6926         if (ret) {
6927                 mlog_errno(ret);
6928                 goto out;
6929         }
6930
6931         handle = ocfs2_start_trans(osb, credits);
6932         if (IS_ERR(handle)) {
6933                 ret = PTR_ERR(handle);
6934                 mlog_errno(ret);
6935                 goto out;
6936         }
6937
6938         ret = ocfs2_claim_clusters(osb, handle, data_ac,
6939                                    len, &p_cluster, &num_clusters);
6940         if (ret) {
6941                 mlog_errno(ret);
6942                 goto out_commit;
6943         }
6944
6945         new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6946
6947         mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6948              (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6949         ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6950                                           meta_ac, data_ac, args);
6951         if (ret) {
6952                 mlog_errno(ret);
6953                 goto out_commit;
6954         }
6955
6956         mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6957              (unsigned long long)new_blkno, len, cpos);
6958         ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
6959                                   len, 0, meta_ac);
6960         if (ret)
6961                 mlog_errno(ret);
6962
6963 out_commit:
6964         ocfs2_commit_trans(osb, handle);
6965
6966 out:
6967         if (meta_ac)
6968                 ocfs2_free_alloc_context(meta_ac);
6969         if (data_ac)
6970                 ocfs2_free_alloc_context(data_ac);
6971         return ret;
6972 }
6973
6974 /*
6975  * Create reflinked xattr buckets.
6976  * We will add bucket one by one, and refcount all the xattrs in the bucket
6977  * if they are stored outside.
6978  */
6979 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
6980                                     struct buffer_head *blk_bh,
6981                                     struct buffer_head *new_blk_bh)
6982 {
6983         int ret;
6984         struct ocfs2_reflink_xattr_tree_args para;
6985
6986         memset(&para, 0, sizeof(para));
6987         para.reflink = args;
6988         para.old_blk_bh = blk_bh;
6989         para.new_blk_bh = new_blk_bh;
6990
6991         para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
6992         if (!para.old_bucket) {
6993                 mlog_errno(-ENOMEM);
6994                 return -ENOMEM;
6995         }
6996
6997         para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
6998         if (!para.new_bucket) {
6999                 ret = -ENOMEM;
7000                 mlog_errno(ret);
7001                 goto out;
7002         }
7003
7004         ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7005                                               ocfs2_reflink_xattr_rec,
7006                                               &para);
7007         if (ret)
7008                 mlog_errno(ret);
7009
7010 out:
7011         ocfs2_xattr_bucket_free(para.old_bucket);
7012         ocfs2_xattr_bucket_free(para.new_bucket);
7013         return ret;
7014 }
7015
7016 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7017                                         struct buffer_head *blk_bh)
7018 {
7019         int ret, indexed = 0;
7020         struct buffer_head *new_blk_bh = NULL;
7021         struct ocfs2_xattr_block *xb =
7022                         (struct ocfs2_xattr_block *)blk_bh->b_data;
7023
7024
7025         if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7026                 indexed = 1;
7027
7028         ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7029                                              &new_blk_bh, indexed);
7030         if (ret) {
7031                 mlog_errno(ret);
7032                 goto out;
7033         }
7034
7035         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
7036                 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7037         else
7038                 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7039         if (ret)
7040                 mlog_errno(ret);
7041
7042 out:
7043         brelse(new_blk_bh);
7044         return ret;
7045 }
7046
7047 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7048 {
7049         int type = ocfs2_xattr_get_type(xe);
7050
7051         return type != OCFS2_XATTR_INDEX_SECURITY &&
7052                type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7053                type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7054 }
7055
7056 int ocfs2_reflink_xattrs(struct inode *old_inode,
7057                          struct buffer_head *old_bh,
7058                          struct inode *new_inode,
7059                          struct buffer_head *new_bh,
7060                          bool preserve_security)
7061 {
7062         int ret;
7063         struct ocfs2_xattr_reflink args;
7064         struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7065         struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7066         struct buffer_head *blk_bh = NULL;
7067         struct ocfs2_cached_dealloc_ctxt dealloc;
7068         struct ocfs2_refcount_tree *ref_tree;
7069         struct buffer_head *ref_root_bh = NULL;
7070
7071         ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7072                                        le64_to_cpu(di->i_refcount_loc),
7073                                        1, &ref_tree, &ref_root_bh);
7074         if (ret) {
7075                 mlog_errno(ret);
7076                 goto out;
7077         }
7078
7079         ocfs2_init_dealloc_ctxt(&dealloc);
7080
7081         args.old_inode = old_inode;
7082         args.new_inode = new_inode;
7083         args.old_bh = old_bh;
7084         args.new_bh = new_bh;
7085         args.ref_ci = &ref_tree->rf_ci;
7086         args.ref_root_bh = ref_root_bh;
7087         args.dealloc = &dealloc;
7088         if (preserve_security)
7089                 args.xattr_reflinked = NULL;
7090         else
7091                 args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7092
7093         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7094                 ret = ocfs2_reflink_xattr_inline(&args);
7095                 if (ret) {
7096                         mlog_errno(ret);
7097                         goto out_unlock;
7098                 }
7099         }
7100
7101         if (!di->i_xattr_loc)
7102                 goto out_unlock;
7103
7104         ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7105                                      &blk_bh);
7106         if (ret < 0) {
7107                 mlog_errno(ret);
7108                 goto out_unlock;
7109         }
7110
7111         ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7112         if (ret)
7113                 mlog_errno(ret);
7114
7115         brelse(blk_bh);
7116
7117 out_unlock:
7118         ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7119                                    ref_tree, 1);
7120         brelse(ref_root_bh);
7121
7122         if (ocfs2_dealloc_has_cluster(&dealloc)) {
7123                 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7124                 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7125         }
7126
7127 out:
7128         return ret;
7129 }
7130
7131 /*
7132  * Initialize security and acl for a already created inode.
7133  * Used for reflink a non-preserve-security file.
7134  *
7135  * It uses common api like ocfs2_xattr_set, so the caller
7136  * must not hold any lock expect i_mutex.
7137  */
7138 int ocfs2_init_security_and_acl(struct inode *dir,
7139                                 struct inode *inode)
7140 {
7141         int ret = 0;
7142         struct buffer_head *dir_bh = NULL;
7143         struct ocfs2_security_xattr_info si = {
7144                 .enable = 1,
7145         };
7146
7147         ret = ocfs2_init_security_get(inode, dir, &si);
7148         if (!ret) {
7149                 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7150                                       si.name, si.value, si.value_len,
7151                                       XATTR_CREATE);
7152                 if (ret) {
7153                         mlog_errno(ret);
7154                         goto leave;
7155                 }
7156         } else if (ret != -EOPNOTSUPP) {
7157                 mlog_errno(ret);
7158                 goto leave;
7159         }
7160
7161         ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7162         if (ret) {
7163                 mlog_errno(ret);
7164                 goto leave;
7165         }
7166
7167         ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7168         if (ret)
7169                 mlog_errno(ret);
7170
7171         ocfs2_inode_unlock(dir, 0);
7172         brelse(dir_bh);
7173 leave:
7174         return ret;
7175 }
7176 /*
7177  * 'security' attributes support
7178  */
7179 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7180                                         size_t list_size, const char *name,
7181                                         size_t name_len, int type)
7182 {
7183         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7184         const size_t total_len = prefix_len + name_len + 1;
7185
7186         if (list && total_len <= list_size) {
7187                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7188                 memcpy(list + prefix_len, name, name_len);
7189                 list[prefix_len + name_len] = '\0';
7190         }
7191         return total_len;
7192 }
7193
7194 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7195                                     void *buffer, size_t size, int type)
7196 {
7197         if (strcmp(name, "") == 0)
7198                 return -EINVAL;
7199         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7200                                name, buffer, size);
7201 }
7202
7203 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7204                 const void *value, size_t size, int flags, int type)
7205 {
7206         if (strcmp(name, "") == 0)
7207                 return -EINVAL;
7208
7209         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7210                                name, value, size, flags);
7211 }
7212
7213 int ocfs2_init_security_get(struct inode *inode,
7214                             struct inode *dir,
7215                             struct ocfs2_security_xattr_info *si)
7216 {
7217         /* check whether ocfs2 support feature xattr */
7218         if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7219                 return -EOPNOTSUPP;
7220         return security_inode_init_security(inode, dir, &si->name, &si->value,
7221                                             &si->value_len);
7222 }
7223
7224 int ocfs2_init_security_set(handle_t *handle,
7225                             struct inode *inode,
7226                             struct buffer_head *di_bh,
7227                             struct ocfs2_security_xattr_info *si,
7228                             struct ocfs2_alloc_context *xattr_ac,
7229                             struct ocfs2_alloc_context *data_ac)
7230 {
7231         return ocfs2_xattr_set_handle(handle, inode, di_bh,
7232                                      OCFS2_XATTR_INDEX_SECURITY,
7233                                      si->name, si->value, si->value_len, 0,
7234                                      xattr_ac, data_ac);
7235 }
7236
7237 struct xattr_handler ocfs2_xattr_security_handler = {
7238         .prefix = XATTR_SECURITY_PREFIX,
7239         .list   = ocfs2_xattr_security_list,
7240         .get    = ocfs2_xattr_security_get,
7241         .set    = ocfs2_xattr_security_set,
7242 };
7243
7244 /*
7245  * 'trusted' attributes support
7246  */
7247 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7248                                        size_t list_size, const char *name,
7249                                        size_t name_len, int type)
7250 {
7251         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7252         const size_t total_len = prefix_len + name_len + 1;
7253
7254         if (list && total_len <= list_size) {
7255                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7256                 memcpy(list + prefix_len, name, name_len);
7257                 list[prefix_len + name_len] = '\0';
7258         }
7259         return total_len;
7260 }
7261
7262 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7263                 void *buffer, size_t size, int type)
7264 {
7265         if (strcmp(name, "") == 0)
7266                 return -EINVAL;
7267         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7268                                name, buffer, size);
7269 }
7270
7271 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7272                 const void *value, size_t size, int flags, int type)
7273 {
7274         if (strcmp(name, "") == 0)
7275                 return -EINVAL;
7276
7277         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7278                                name, value, size, flags);
7279 }
7280
7281 struct xattr_handler ocfs2_xattr_trusted_handler = {
7282         .prefix = XATTR_TRUSTED_PREFIX,
7283         .list   = ocfs2_xattr_trusted_list,
7284         .get    = ocfs2_xattr_trusted_get,
7285         .set    = ocfs2_xattr_trusted_set,
7286 };
7287
7288 /*
7289  * 'user' attributes support
7290  */
7291 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7292                                     size_t list_size, const char *name,
7293                                     size_t name_len, int type)
7294 {
7295         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7296         const size_t total_len = prefix_len + name_len + 1;
7297         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7298
7299         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7300                 return 0;
7301
7302         if (list && total_len <= list_size) {
7303                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
7304                 memcpy(list + prefix_len, name, name_len);
7305                 list[prefix_len + name_len] = '\0';
7306         }
7307         return total_len;
7308 }
7309
7310 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7311                 void *buffer, size_t size, int type)
7312 {
7313         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7314
7315         if (strcmp(name, "") == 0)
7316                 return -EINVAL;
7317         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7318                 return -EOPNOTSUPP;
7319         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7320                                buffer, size);
7321 }
7322
7323 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7324                 const void *value, size_t size, int flags, int type)
7325 {
7326         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7327
7328         if (strcmp(name, "") == 0)
7329                 return -EINVAL;
7330         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7331                 return -EOPNOTSUPP;
7332
7333         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7334                                name, value, size, flags);
7335 }
7336
7337 struct xattr_handler ocfs2_xattr_user_handler = {
7338         .prefix = XATTR_USER_PREFIX,
7339         .list   = ocfs2_xattr_user_list,
7340         .get    = ocfs2_xattr_user_get,
7341         .set    = ocfs2_xattr_user_set,
7342 };