]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/ocfs2/xattr.c
ocfs2: Add a name_len field to ocfs2_xattr_info.
[karo-tx-linux.git] / fs / ocfs2 / xattr.c
1 /* -*- mode: c; c-basic-offset: 8; -*-
2  * vim: noexpandtab sw=8 ts=8 sts=0:
3  *
4  * xattr.c
5  *
6  * Copyright (C) 2004, 2008 Oracle.  All rights reserved.
7  *
8  * CREDITS:
9  * Lots of code in this file is copy from linux/fs/ext3/xattr.c.
10  * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
11  *
12  * This program is free software; you can redistribute it and/or
13  * modify it under the terms of the GNU General Public
14  * License version 2 as published by the Free Software Foundation.
15  *
16  * This program is distributed in the hope that it will be useful,
17  * but WITHOUT ANY WARRANTY; without even the implied warranty of
18  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19  * General Public License for more details.
20  */
21
22 #include <linux/capability.h>
23 #include <linux/fs.h>
24 #include <linux/types.h>
25 #include <linux/slab.h>
26 #include <linux/highmem.h>
27 #include <linux/pagemap.h>
28 #include <linux/uio.h>
29 #include <linux/sched.h>
30 #include <linux/splice.h>
31 #include <linux/mount.h>
32 #include <linux/writeback.h>
33 #include <linux/falloc.h>
34 #include <linux/sort.h>
35 #include <linux/init.h>
36 #include <linux/module.h>
37 #include <linux/string.h>
38 #include <linux/security.h>
39
40 #define MLOG_MASK_PREFIX ML_XATTR
41 #include <cluster/masklog.h>
42
43 #include "ocfs2.h"
44 #include "alloc.h"
45 #include "blockcheck.h"
46 #include "dlmglue.h"
47 #include "file.h"
48 #include "symlink.h"
49 #include "sysfile.h"
50 #include "inode.h"
51 #include "journal.h"
52 #include "ocfs2_fs.h"
53 #include "suballoc.h"
54 #include "uptodate.h"
55 #include "buffer_head_io.h"
56 #include "super.h"
57 #include "xattr.h"
58 #include "refcounttree.h"
59 #include "acl.h"
60
61 struct ocfs2_xattr_def_value_root {
62         struct ocfs2_xattr_value_root   xv;
63         struct ocfs2_extent_rec         er;
64 };
65
66 struct ocfs2_xattr_bucket {
67         /* The inode these xattrs are associated with */
68         struct inode *bu_inode;
69
70         /* The actual buffers that make up the bucket */
71         struct buffer_head *bu_bhs[OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET];
72
73         /* How many blocks make up one bucket for this filesystem */
74         int bu_blocks;
75 };
76
77 struct ocfs2_xattr_set_ctxt {
78         handle_t *handle;
79         struct ocfs2_alloc_context *meta_ac;
80         struct ocfs2_alloc_context *data_ac;
81         struct ocfs2_cached_dealloc_ctxt dealloc;
82 };
83
84 #define OCFS2_XATTR_ROOT_SIZE   (sizeof(struct ocfs2_xattr_def_value_root))
85 #define OCFS2_XATTR_INLINE_SIZE 80
86 #define OCFS2_XATTR_HEADER_GAP  4
87 #define OCFS2_XATTR_FREE_IN_IBODY       (OCFS2_MIN_XATTR_INLINE_SIZE \
88                                          - sizeof(struct ocfs2_xattr_header) \
89                                          - OCFS2_XATTR_HEADER_GAP)
90 #define OCFS2_XATTR_FREE_IN_BLOCK(ptr)  ((ptr)->i_sb->s_blocksize \
91                                          - sizeof(struct ocfs2_xattr_block) \
92                                          - sizeof(struct ocfs2_xattr_header) \
93                                          - OCFS2_XATTR_HEADER_GAP)
94
95 static struct ocfs2_xattr_def_value_root def_xv = {
96         .xv.xr_list.l_count = cpu_to_le16(1),
97 };
98
99 struct xattr_handler *ocfs2_xattr_handlers[] = {
100         &ocfs2_xattr_user_handler,
101         &ocfs2_xattr_acl_access_handler,
102         &ocfs2_xattr_acl_default_handler,
103         &ocfs2_xattr_trusted_handler,
104         &ocfs2_xattr_security_handler,
105         NULL
106 };
107
108 static struct xattr_handler *ocfs2_xattr_handler_map[OCFS2_XATTR_MAX] = {
109         [OCFS2_XATTR_INDEX_USER]        = &ocfs2_xattr_user_handler,
110         [OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS]
111                                         = &ocfs2_xattr_acl_access_handler,
112         [OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT]
113                                         = &ocfs2_xattr_acl_default_handler,
114         [OCFS2_XATTR_INDEX_TRUSTED]     = &ocfs2_xattr_trusted_handler,
115         [OCFS2_XATTR_INDEX_SECURITY]    = &ocfs2_xattr_security_handler,
116 };
117
118 struct ocfs2_xattr_info {
119         int             xi_name_index;
120         const char      *xi_name;
121         int             xi_name_len;
122         const void      *xi_value;
123         size_t          xi_value_len;
124 };
125
126 struct ocfs2_xattr_search {
127         struct buffer_head *inode_bh;
128         /*
129          * xattr_bh point to the block buffer head which has extended attribute
130          * when extended attribute in inode, xattr_bh is equal to inode_bh.
131          */
132         struct buffer_head *xattr_bh;
133         struct ocfs2_xattr_header *header;
134         struct ocfs2_xattr_bucket *bucket;
135         void *base;
136         void *end;
137         struct ocfs2_xattr_entry *here;
138         int not_found;
139 };
140
141 /* Operations on struct ocfs2_xa_entry */
142 struct ocfs2_xa_loc;
143 struct ocfs2_xa_loc_operations {
144         /*
145          * Return a pointer to the appropriate buffer in loc->xl_storage
146          * at the given offset from loc->xl_header.
147          */
148         void *(*xlo_offset_pointer)(struct ocfs2_xa_loc *loc, int offset);
149
150         /*
151          * Remove the name+value at this location.  Do whatever is
152          * appropriate with the remaining name+value pairs.
153          */
154         void (*xlo_wipe_namevalue)(struct ocfs2_xa_loc *loc);
155 };
156
157 /*
158  * Describes an xattr entry location.  This is a memory structure
159  * tracking the on-disk structure.
160  */
161 struct ocfs2_xa_loc {
162         /* The ocfs2_xattr_header inside the on-disk storage. Not NULL. */
163         struct ocfs2_xattr_header *xl_header;
164
165         /* Bytes from xl_header to the end of the storage */
166         int xl_size;
167
168         /*
169          * The ocfs2_xattr_entry this location describes.  If this is
170          * NULL, this location describes the on-disk structure where it
171          * would have been.
172          */
173         struct ocfs2_xattr_entry *xl_entry;
174
175         /*
176          * Internal housekeeping
177          */
178
179         /* Buffer(s) containing this entry */
180         void *xl_storage;
181
182         /* Operations on the storage backing this location */
183         const struct ocfs2_xa_loc_operations *xl_ops;
184 };
185
186 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
187                                              struct ocfs2_xattr_header *xh,
188                                              int index,
189                                              int *block_off,
190                                              int *new_offset);
191
192 static int ocfs2_xattr_block_find(struct inode *inode,
193                                   int name_index,
194                                   const char *name,
195                                   struct ocfs2_xattr_search *xs);
196 static int ocfs2_xattr_index_block_find(struct inode *inode,
197                                         struct buffer_head *root_bh,
198                                         int name_index,
199                                         const char *name,
200                                         struct ocfs2_xattr_search *xs);
201
202 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
203                                         struct buffer_head *blk_bh,
204                                         char *buffer,
205                                         size_t buffer_size);
206
207 static int ocfs2_xattr_create_index_block(struct inode *inode,
208                                           struct ocfs2_xattr_search *xs,
209                                           struct ocfs2_xattr_set_ctxt *ctxt);
210
211 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
212                                              struct ocfs2_xattr_info *xi,
213                                              struct ocfs2_xattr_search *xs,
214                                              struct ocfs2_xattr_set_ctxt *ctxt);
215
216 typedef int (xattr_tree_rec_func)(struct inode *inode,
217                                   struct buffer_head *root_bh,
218                                   u64 blkno, u32 cpos, u32 len, void *para);
219 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
220                                            struct buffer_head *root_bh,
221                                            xattr_tree_rec_func *rec_func,
222                                            void *para);
223 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
224                                         struct ocfs2_xattr_bucket *bucket,
225                                         void *para);
226 static int ocfs2_rm_xattr_cluster(struct inode *inode,
227                                   struct buffer_head *root_bh,
228                                   u64 blkno,
229                                   u32 cpos,
230                                   u32 len,
231                                   void *para);
232
233 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
234                                   u64 src_blk, u64 last_blk, u64 to_blk,
235                                   unsigned int start_bucket,
236                                   u32 *first_hash);
237 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
238                                         struct ocfs2_dinode *di,
239                                         struct ocfs2_xattr_info *xi,
240                                         struct ocfs2_xattr_search *xis,
241                                         struct ocfs2_xattr_search *xbs,
242                                         struct ocfs2_refcount_tree **ref_tree,
243                                         int *meta_need,
244                                         int *credits);
245 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
246                                            struct ocfs2_xattr_bucket *bucket,
247                                            int offset,
248                                            struct ocfs2_xattr_value_root **xv,
249                                            struct buffer_head **bh);
250
251 static inline u16 ocfs2_xattr_buckets_per_cluster(struct ocfs2_super *osb)
252 {
253         return (1 << osb->s_clustersize_bits) / OCFS2_XATTR_BUCKET_SIZE;
254 }
255
256 static inline u16 ocfs2_blocks_per_xattr_bucket(struct super_block *sb)
257 {
258         return OCFS2_XATTR_BUCKET_SIZE / (1 << sb->s_blocksize_bits);
259 }
260
261 static inline u16 ocfs2_xattr_max_xe_in_bucket(struct super_block *sb)
262 {
263         u16 len = sb->s_blocksize -
264                  offsetof(struct ocfs2_xattr_header, xh_entries);
265
266         return len / sizeof(struct ocfs2_xattr_entry);
267 }
268
269 #define bucket_blkno(_b) ((_b)->bu_bhs[0]->b_blocknr)
270 #define bucket_block(_b, _n) ((_b)->bu_bhs[(_n)]->b_data)
271 #define bucket_xh(_b) ((struct ocfs2_xattr_header *)bucket_block((_b), 0))
272
273 static struct ocfs2_xattr_bucket *ocfs2_xattr_bucket_new(struct inode *inode)
274 {
275         struct ocfs2_xattr_bucket *bucket;
276         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
277
278         BUG_ON(blks > OCFS2_XATTR_MAX_BLOCKS_PER_BUCKET);
279
280         bucket = kzalloc(sizeof(struct ocfs2_xattr_bucket), GFP_NOFS);
281         if (bucket) {
282                 bucket->bu_inode = inode;
283                 bucket->bu_blocks = blks;
284         }
285
286         return bucket;
287 }
288
289 static void ocfs2_xattr_bucket_relse(struct ocfs2_xattr_bucket *bucket)
290 {
291         int i;
292
293         for (i = 0; i < bucket->bu_blocks; i++) {
294                 brelse(bucket->bu_bhs[i]);
295                 bucket->bu_bhs[i] = NULL;
296         }
297 }
298
299 static void ocfs2_xattr_bucket_free(struct ocfs2_xattr_bucket *bucket)
300 {
301         if (bucket) {
302                 ocfs2_xattr_bucket_relse(bucket);
303                 bucket->bu_inode = NULL;
304                 kfree(bucket);
305         }
306 }
307
308 /*
309  * A bucket that has never been written to disk doesn't need to be
310  * read.  We just need the buffer_heads.  Don't call this for
311  * buckets that are already on disk.  ocfs2_read_xattr_bucket() initializes
312  * them fully.
313  */
314 static int ocfs2_init_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
315                                    u64 xb_blkno)
316 {
317         int i, rc = 0;
318
319         for (i = 0; i < bucket->bu_blocks; i++) {
320                 bucket->bu_bhs[i] = sb_getblk(bucket->bu_inode->i_sb,
321                                               xb_blkno + i);
322                 if (!bucket->bu_bhs[i]) {
323                         rc = -EIO;
324                         mlog_errno(rc);
325                         break;
326                 }
327
328                 if (!ocfs2_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
329                                            bucket->bu_bhs[i]))
330                         ocfs2_set_new_buffer_uptodate(INODE_CACHE(bucket->bu_inode),
331                                                       bucket->bu_bhs[i]);
332         }
333
334         if (rc)
335                 ocfs2_xattr_bucket_relse(bucket);
336         return rc;
337 }
338
339 /* Read the xattr bucket at xb_blkno */
340 static int ocfs2_read_xattr_bucket(struct ocfs2_xattr_bucket *bucket,
341                                    u64 xb_blkno)
342 {
343         int rc;
344
345         rc = ocfs2_read_blocks(INODE_CACHE(bucket->bu_inode), xb_blkno,
346                                bucket->bu_blocks, bucket->bu_bhs, 0,
347                                NULL);
348         if (!rc) {
349                 spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
350                 rc = ocfs2_validate_meta_ecc_bhs(bucket->bu_inode->i_sb,
351                                                  bucket->bu_bhs,
352                                                  bucket->bu_blocks,
353                                                  &bucket_xh(bucket)->xh_check);
354                 spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
355                 if (rc)
356                         mlog_errno(rc);
357         }
358
359         if (rc)
360                 ocfs2_xattr_bucket_relse(bucket);
361         return rc;
362 }
363
364 static int ocfs2_xattr_bucket_journal_access(handle_t *handle,
365                                              struct ocfs2_xattr_bucket *bucket,
366                                              int type)
367 {
368         int i, rc = 0;
369
370         for (i = 0; i < bucket->bu_blocks; i++) {
371                 rc = ocfs2_journal_access(handle,
372                                           INODE_CACHE(bucket->bu_inode),
373                                           bucket->bu_bhs[i], type);
374                 if (rc) {
375                         mlog_errno(rc);
376                         break;
377                 }
378         }
379
380         return rc;
381 }
382
383 static void ocfs2_xattr_bucket_journal_dirty(handle_t *handle,
384                                              struct ocfs2_xattr_bucket *bucket)
385 {
386         int i;
387
388         spin_lock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
389         ocfs2_compute_meta_ecc_bhs(bucket->bu_inode->i_sb,
390                                    bucket->bu_bhs, bucket->bu_blocks,
391                                    &bucket_xh(bucket)->xh_check);
392         spin_unlock(&OCFS2_SB(bucket->bu_inode->i_sb)->osb_xattr_lock);
393
394         for (i = 0; i < bucket->bu_blocks; i++)
395                 ocfs2_journal_dirty(handle, bucket->bu_bhs[i]);
396 }
397
398 static void ocfs2_xattr_bucket_copy_data(struct ocfs2_xattr_bucket *dest,
399                                          struct ocfs2_xattr_bucket *src)
400 {
401         int i;
402         int blocksize = src->bu_inode->i_sb->s_blocksize;
403
404         BUG_ON(dest->bu_blocks != src->bu_blocks);
405         BUG_ON(dest->bu_inode != src->bu_inode);
406
407         for (i = 0; i < src->bu_blocks; i++) {
408                 memcpy(bucket_block(dest, i), bucket_block(src, i),
409                        blocksize);
410         }
411 }
412
413 static int ocfs2_validate_xattr_block(struct super_block *sb,
414                                       struct buffer_head *bh)
415 {
416         int rc;
417         struct ocfs2_xattr_block *xb =
418                 (struct ocfs2_xattr_block *)bh->b_data;
419
420         mlog(0, "Validating xattr block %llu\n",
421              (unsigned long long)bh->b_blocknr);
422
423         BUG_ON(!buffer_uptodate(bh));
424
425         /*
426          * If the ecc fails, we return the error but otherwise
427          * leave the filesystem running.  We know any error is
428          * local to this block.
429          */
430         rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &xb->xb_check);
431         if (rc)
432                 return rc;
433
434         /*
435          * Errors after here are fatal
436          */
437
438         if (!OCFS2_IS_VALID_XATTR_BLOCK(xb)) {
439                 ocfs2_error(sb,
440                             "Extended attribute block #%llu has bad "
441                             "signature %.*s",
442                             (unsigned long long)bh->b_blocknr, 7,
443                             xb->xb_signature);
444                 return -EINVAL;
445         }
446
447         if (le64_to_cpu(xb->xb_blkno) != bh->b_blocknr) {
448                 ocfs2_error(sb,
449                             "Extended attribute block #%llu has an "
450                             "invalid xb_blkno of %llu",
451                             (unsigned long long)bh->b_blocknr,
452                             (unsigned long long)le64_to_cpu(xb->xb_blkno));
453                 return -EINVAL;
454         }
455
456         if (le32_to_cpu(xb->xb_fs_generation) != OCFS2_SB(sb)->fs_generation) {
457                 ocfs2_error(sb,
458                             "Extended attribute block #%llu has an invalid "
459                             "xb_fs_generation of #%u",
460                             (unsigned long long)bh->b_blocknr,
461                             le32_to_cpu(xb->xb_fs_generation));
462                 return -EINVAL;
463         }
464
465         return 0;
466 }
467
468 static int ocfs2_read_xattr_block(struct inode *inode, u64 xb_blkno,
469                                   struct buffer_head **bh)
470 {
471         int rc;
472         struct buffer_head *tmp = *bh;
473
474         rc = ocfs2_read_block(INODE_CACHE(inode), xb_blkno, &tmp,
475                               ocfs2_validate_xattr_block);
476
477         /* If ocfs2_read_block() got us a new bh, pass it up. */
478         if (!rc && !*bh)
479                 *bh = tmp;
480
481         return rc;
482 }
483
484 static inline const char *ocfs2_xattr_prefix(int name_index)
485 {
486         struct xattr_handler *handler = NULL;
487
488         if (name_index > 0 && name_index < OCFS2_XATTR_MAX)
489                 handler = ocfs2_xattr_handler_map[name_index];
490
491         return handler ? handler->prefix : NULL;
492 }
493
494 static u32 ocfs2_xattr_name_hash(struct inode *inode,
495                                  const char *name,
496                                  int name_len)
497 {
498         /* Get hash value of uuid from super block */
499         u32 hash = OCFS2_SB(inode->i_sb)->uuid_hash;
500         int i;
501
502         /* hash extended attribute name */
503         for (i = 0; i < name_len; i++) {
504                 hash = (hash << OCFS2_HASH_SHIFT) ^
505                        (hash >> (8*sizeof(hash) - OCFS2_HASH_SHIFT)) ^
506                        *name++;
507         }
508
509         return hash;
510 }
511
512 /*
513  * ocfs2_xattr_hash_entry()
514  *
515  * Compute the hash of an extended attribute.
516  */
517 static void ocfs2_xattr_hash_entry(struct inode *inode,
518                                    struct ocfs2_xattr_header *header,
519                                    struct ocfs2_xattr_entry *entry)
520 {
521         u32 hash = 0;
522         char *name = (char *)header + le16_to_cpu(entry->xe_name_offset);
523
524         hash = ocfs2_xattr_name_hash(inode, name, entry->xe_name_len);
525         entry->xe_name_hash = cpu_to_le32(hash);
526
527         return;
528 }
529
530 static int ocfs2_xattr_entry_real_size(int name_len, size_t value_len)
531 {
532         int size = 0;
533
534         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
535                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_SIZE(value_len);
536         else
537                 size = OCFS2_XATTR_SIZE(name_len) + OCFS2_XATTR_ROOT_SIZE;
538         size += sizeof(struct ocfs2_xattr_entry);
539
540         return size;
541 }
542
543 int ocfs2_calc_security_init(struct inode *dir,
544                              struct ocfs2_security_xattr_info *si,
545                              int *want_clusters,
546                              int *xattr_credits,
547                              struct ocfs2_alloc_context **xattr_ac)
548 {
549         int ret = 0;
550         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
551         int s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
552                                                  si->value_len);
553
554         /*
555          * The max space of security xattr taken inline is
556          * 256(name) + 80(value) + 16(entry) = 352 bytes,
557          * So reserve one metadata block for it is ok.
558          */
559         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
560             s_size > OCFS2_XATTR_FREE_IN_IBODY) {
561                 ret = ocfs2_reserve_new_metadata_blocks(osb, 1, xattr_ac);
562                 if (ret) {
563                         mlog_errno(ret);
564                         return ret;
565                 }
566                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
567         }
568
569         /* reserve clusters for xattr value which will be set in B tree*/
570         if (si->value_len > OCFS2_XATTR_INLINE_SIZE) {
571                 int new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
572                                                             si->value_len);
573
574                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
575                                                            new_clusters);
576                 *want_clusters += new_clusters;
577         }
578         return ret;
579 }
580
581 int ocfs2_calc_xattr_init(struct inode *dir,
582                           struct buffer_head *dir_bh,
583                           int mode,
584                           struct ocfs2_security_xattr_info *si,
585                           int *want_clusters,
586                           int *xattr_credits,
587                           int *want_meta)
588 {
589         int ret = 0;
590         struct ocfs2_super *osb = OCFS2_SB(dir->i_sb);
591         int s_size = 0, a_size = 0, acl_len = 0, new_clusters;
592
593         if (si->enable)
594                 s_size = ocfs2_xattr_entry_real_size(strlen(si->name),
595                                                      si->value_len);
596
597         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
598                 acl_len = ocfs2_xattr_get_nolock(dir, dir_bh,
599                                         OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT,
600                                         "", NULL, 0);
601                 if (acl_len > 0) {
602                         a_size = ocfs2_xattr_entry_real_size(0, acl_len);
603                         if (S_ISDIR(mode))
604                                 a_size <<= 1;
605                 } else if (acl_len != 0 && acl_len != -ENODATA) {
606                         mlog_errno(ret);
607                         return ret;
608                 }
609         }
610
611         if (!(s_size + a_size))
612                 return ret;
613
614         /*
615          * The max space of security xattr taken inline is
616          * 256(name) + 80(value) + 16(entry) = 352 bytes,
617          * The max space of acl xattr taken inline is
618          * 80(value) + 16(entry) * 2(if directory) = 192 bytes,
619          * when blocksize = 512, may reserve one more cluser for
620          * xattr bucket, otherwise reserve one metadata block
621          * for them is ok.
622          * If this is a new directory with inline data,
623          * we choose to reserve the entire inline area for
624          * directory contents and force an external xattr block.
625          */
626         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE ||
627             (S_ISDIR(mode) && ocfs2_supports_inline_data(osb)) ||
628             (s_size + a_size) > OCFS2_XATTR_FREE_IN_IBODY) {
629                 *want_meta = *want_meta + 1;
630                 *xattr_credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
631         }
632
633         if (dir->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE &&
634             (s_size + a_size) > OCFS2_XATTR_FREE_IN_BLOCK(dir)) {
635                 *want_clusters += 1;
636                 *xattr_credits += ocfs2_blocks_per_xattr_bucket(dir->i_sb);
637         }
638
639         /*
640          * reserve credits and clusters for xattrs which has large value
641          * and have to be set outside
642          */
643         if (si->enable && si->value_len > OCFS2_XATTR_INLINE_SIZE) {
644                 new_clusters = ocfs2_clusters_for_bytes(dir->i_sb,
645                                                         si->value_len);
646                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
647                                                            new_clusters);
648                 *want_clusters += new_clusters;
649         }
650         if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL &&
651             acl_len > OCFS2_XATTR_INLINE_SIZE) {
652                 /* for directory, it has DEFAULT and ACCESS two types of acls */
653                 new_clusters = (S_ISDIR(mode) ? 2 : 1) *
654                                 ocfs2_clusters_for_bytes(dir->i_sb, acl_len);
655                 *xattr_credits += ocfs2_clusters_to_blocks(dir->i_sb,
656                                                            new_clusters);
657                 *want_clusters += new_clusters;
658         }
659
660         return ret;
661 }
662
663 static int ocfs2_xattr_extend_allocation(struct inode *inode,
664                                          u32 clusters_to_add,
665                                          struct ocfs2_xattr_value_buf *vb,
666                                          struct ocfs2_xattr_set_ctxt *ctxt)
667 {
668         int status = 0;
669         handle_t *handle = ctxt->handle;
670         enum ocfs2_alloc_restarted why;
671         u32 prev_clusters, logical_start = le32_to_cpu(vb->vb_xv->xr_clusters);
672         struct ocfs2_extent_tree et;
673
674         mlog(0, "(clusters_to_add for xattr= %u)\n", clusters_to_add);
675
676         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
677
678         status = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
679                               OCFS2_JOURNAL_ACCESS_WRITE);
680         if (status < 0) {
681                 mlog_errno(status);
682                 goto leave;
683         }
684
685         prev_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
686         status = ocfs2_add_clusters_in_btree(handle,
687                                              &et,
688                                              &logical_start,
689                                              clusters_to_add,
690                                              0,
691                                              ctxt->data_ac,
692                                              ctxt->meta_ac,
693                                              &why);
694         if (status < 0) {
695                 mlog_errno(status);
696                 goto leave;
697         }
698
699         status = ocfs2_journal_dirty(handle, vb->vb_bh);
700         if (status < 0) {
701                 mlog_errno(status);
702                 goto leave;
703         }
704
705         clusters_to_add -= le32_to_cpu(vb->vb_xv->xr_clusters) - prev_clusters;
706
707         /*
708          * We should have already allocated enough space before the transaction,
709          * so no need to restart.
710          */
711         BUG_ON(why != RESTART_NONE || clusters_to_add);
712
713 leave:
714
715         return status;
716 }
717
718 static int __ocfs2_remove_xattr_range(struct inode *inode,
719                                       struct ocfs2_xattr_value_buf *vb,
720                                       u32 cpos, u32 phys_cpos, u32 len,
721                                       unsigned int ext_flags,
722                                       struct ocfs2_xattr_set_ctxt *ctxt)
723 {
724         int ret;
725         u64 phys_blkno = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
726         handle_t *handle = ctxt->handle;
727         struct ocfs2_extent_tree et;
728
729         ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
730
731         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
732                             OCFS2_JOURNAL_ACCESS_WRITE);
733         if (ret) {
734                 mlog_errno(ret);
735                 goto out;
736         }
737
738         ret = ocfs2_remove_extent(handle, &et, cpos, len, ctxt->meta_ac,
739                                   &ctxt->dealloc);
740         if (ret) {
741                 mlog_errno(ret);
742                 goto out;
743         }
744
745         le32_add_cpu(&vb->vb_xv->xr_clusters, -len);
746
747         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
748         if (ret) {
749                 mlog_errno(ret);
750                 goto out;
751         }
752
753         if (ext_flags & OCFS2_EXT_REFCOUNTED)
754                 ret = ocfs2_decrease_refcount(inode, handle,
755                                         ocfs2_blocks_to_clusters(inode->i_sb,
756                                                                  phys_blkno),
757                                         len, ctxt->meta_ac, &ctxt->dealloc, 1);
758         else
759                 ret = ocfs2_cache_cluster_dealloc(&ctxt->dealloc,
760                                                   phys_blkno, len);
761         if (ret)
762                 mlog_errno(ret);
763
764 out:
765         return ret;
766 }
767
768 static int ocfs2_xattr_shrink_size(struct inode *inode,
769                                    u32 old_clusters,
770                                    u32 new_clusters,
771                                    struct ocfs2_xattr_value_buf *vb,
772                                    struct ocfs2_xattr_set_ctxt *ctxt)
773 {
774         int ret = 0;
775         unsigned int ext_flags;
776         u32 trunc_len, cpos, phys_cpos, alloc_size;
777         u64 block;
778
779         if (old_clusters <= new_clusters)
780                 return 0;
781
782         cpos = new_clusters;
783         trunc_len = old_clusters - new_clusters;
784         while (trunc_len) {
785                 ret = ocfs2_xattr_get_clusters(inode, cpos, &phys_cpos,
786                                                &alloc_size,
787                                                &vb->vb_xv->xr_list, &ext_flags);
788                 if (ret) {
789                         mlog_errno(ret);
790                         goto out;
791                 }
792
793                 if (alloc_size > trunc_len)
794                         alloc_size = trunc_len;
795
796                 ret = __ocfs2_remove_xattr_range(inode, vb, cpos,
797                                                  phys_cpos, alloc_size,
798                                                  ext_flags, ctxt);
799                 if (ret) {
800                         mlog_errno(ret);
801                         goto out;
802                 }
803
804                 block = ocfs2_clusters_to_blocks(inode->i_sb, phys_cpos);
805                 ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode),
806                                                        block, alloc_size);
807                 cpos += alloc_size;
808                 trunc_len -= alloc_size;
809         }
810
811 out:
812         return ret;
813 }
814
815 static int ocfs2_xattr_value_truncate(struct inode *inode,
816                                       struct ocfs2_xattr_value_buf *vb,
817                                       int len,
818                                       struct ocfs2_xattr_set_ctxt *ctxt)
819 {
820         int ret;
821         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb, len);
822         u32 old_clusters = le32_to_cpu(vb->vb_xv->xr_clusters);
823
824         if (new_clusters == old_clusters)
825                 return 0;
826
827         if (new_clusters > old_clusters)
828                 ret = ocfs2_xattr_extend_allocation(inode,
829                                                     new_clusters - old_clusters,
830                                                     vb, ctxt);
831         else
832                 ret = ocfs2_xattr_shrink_size(inode,
833                                               old_clusters, new_clusters,
834                                               vb, ctxt);
835
836         return ret;
837 }
838
839 static int ocfs2_xattr_list_entry(char *buffer, size_t size,
840                                   size_t *result, const char *prefix,
841                                   const char *name, int name_len)
842 {
843         char *p = buffer + *result;
844         int prefix_len = strlen(prefix);
845         int total_len = prefix_len + name_len + 1;
846
847         *result += total_len;
848
849         /* we are just looking for how big our buffer needs to be */
850         if (!size)
851                 return 0;
852
853         if (*result > size)
854                 return -ERANGE;
855
856         memcpy(p, prefix, prefix_len);
857         memcpy(p + prefix_len, name, name_len);
858         p[prefix_len + name_len] = '\0';
859
860         return 0;
861 }
862
863 static int ocfs2_xattr_list_entries(struct inode *inode,
864                                     struct ocfs2_xattr_header *header,
865                                     char *buffer, size_t buffer_size)
866 {
867         size_t result = 0;
868         int i, type, ret;
869         const char *prefix, *name;
870
871         for (i = 0 ; i < le16_to_cpu(header->xh_count); i++) {
872                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
873                 type = ocfs2_xattr_get_type(entry);
874                 prefix = ocfs2_xattr_prefix(type);
875
876                 if (prefix) {
877                         name = (const char *)header +
878                                 le16_to_cpu(entry->xe_name_offset);
879
880                         ret = ocfs2_xattr_list_entry(buffer, buffer_size,
881                                                      &result, prefix, name,
882                                                      entry->xe_name_len);
883                         if (ret)
884                                 return ret;
885                 }
886         }
887
888         return result;
889 }
890
891 int ocfs2_has_inline_xattr_value_outside(struct inode *inode,
892                                          struct ocfs2_dinode *di)
893 {
894         struct ocfs2_xattr_header *xh;
895         int i;
896
897         xh = (struct ocfs2_xattr_header *)
898                  ((void *)di + inode->i_sb->s_blocksize -
899                  le16_to_cpu(di->i_xattr_inline_size));
900
901         for (i = 0; i < le16_to_cpu(xh->xh_count); i++)
902                 if (!ocfs2_xattr_is_local(&xh->xh_entries[i]))
903                         return 1;
904
905         return 0;
906 }
907
908 static int ocfs2_xattr_ibody_list(struct inode *inode,
909                                   struct ocfs2_dinode *di,
910                                   char *buffer,
911                                   size_t buffer_size)
912 {
913         struct ocfs2_xattr_header *header = NULL;
914         struct ocfs2_inode_info *oi = OCFS2_I(inode);
915         int ret = 0;
916
917         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
918                 return ret;
919
920         header = (struct ocfs2_xattr_header *)
921                  ((void *)di + inode->i_sb->s_blocksize -
922                  le16_to_cpu(di->i_xattr_inline_size));
923
924         ret = ocfs2_xattr_list_entries(inode, header, buffer, buffer_size);
925
926         return ret;
927 }
928
929 static int ocfs2_xattr_block_list(struct inode *inode,
930                                   struct ocfs2_dinode *di,
931                                   char *buffer,
932                                   size_t buffer_size)
933 {
934         struct buffer_head *blk_bh = NULL;
935         struct ocfs2_xattr_block *xb;
936         int ret = 0;
937
938         if (!di->i_xattr_loc)
939                 return ret;
940
941         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
942                                      &blk_bh);
943         if (ret < 0) {
944                 mlog_errno(ret);
945                 return ret;
946         }
947
948         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
949         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
950                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
951                 ret = ocfs2_xattr_list_entries(inode, header,
952                                                buffer, buffer_size);
953         } else
954                 ret = ocfs2_xattr_tree_list_index_block(inode, blk_bh,
955                                                    buffer, buffer_size);
956
957         brelse(blk_bh);
958
959         return ret;
960 }
961
962 ssize_t ocfs2_listxattr(struct dentry *dentry,
963                         char *buffer,
964                         size_t size)
965 {
966         int ret = 0, i_ret = 0, b_ret = 0;
967         struct buffer_head *di_bh = NULL;
968         struct ocfs2_dinode *di = NULL;
969         struct ocfs2_inode_info *oi = OCFS2_I(dentry->d_inode);
970
971         if (!ocfs2_supports_xattr(OCFS2_SB(dentry->d_sb)))
972                 return -EOPNOTSUPP;
973
974         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
975                 return ret;
976
977         ret = ocfs2_inode_lock(dentry->d_inode, &di_bh, 0);
978         if (ret < 0) {
979                 mlog_errno(ret);
980                 return ret;
981         }
982
983         di = (struct ocfs2_dinode *)di_bh->b_data;
984
985         down_read(&oi->ip_xattr_sem);
986         i_ret = ocfs2_xattr_ibody_list(dentry->d_inode, di, buffer, size);
987         if (i_ret < 0)
988                 b_ret = 0;
989         else {
990                 if (buffer) {
991                         buffer += i_ret;
992                         size -= i_ret;
993                 }
994                 b_ret = ocfs2_xattr_block_list(dentry->d_inode, di,
995                                                buffer, size);
996                 if (b_ret < 0)
997                         i_ret = 0;
998         }
999         up_read(&oi->ip_xattr_sem);
1000         ocfs2_inode_unlock(dentry->d_inode, 0);
1001
1002         brelse(di_bh);
1003
1004         return i_ret + b_ret;
1005 }
1006
1007 static int ocfs2_xattr_find_entry(int name_index,
1008                                   const char *name,
1009                                   struct ocfs2_xattr_search *xs)
1010 {
1011         struct ocfs2_xattr_entry *entry;
1012         size_t name_len;
1013         int i, cmp = 1;
1014
1015         if (name == NULL)
1016                 return -EINVAL;
1017
1018         name_len = strlen(name);
1019         entry = xs->here;
1020         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
1021                 cmp = name_index - ocfs2_xattr_get_type(entry);
1022                 if (!cmp)
1023                         cmp = name_len - entry->xe_name_len;
1024                 if (!cmp)
1025                         cmp = memcmp(name, (xs->base +
1026                                      le16_to_cpu(entry->xe_name_offset)),
1027                                      name_len);
1028                 if (cmp == 0)
1029                         break;
1030                 entry += 1;
1031         }
1032         xs->here = entry;
1033
1034         return cmp ? -ENODATA : 0;
1035 }
1036
1037 static int ocfs2_xattr_get_value_outside(struct inode *inode,
1038                                          struct ocfs2_xattr_value_root *xv,
1039                                          void *buffer,
1040                                          size_t len)
1041 {
1042         u32 cpos, p_cluster, num_clusters, bpc, clusters;
1043         u64 blkno;
1044         int i, ret = 0;
1045         size_t cplen, blocksize;
1046         struct buffer_head *bh = NULL;
1047         struct ocfs2_extent_list *el;
1048
1049         el = &xv->xr_list;
1050         clusters = le32_to_cpu(xv->xr_clusters);
1051         bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1052         blocksize = inode->i_sb->s_blocksize;
1053
1054         cpos = 0;
1055         while (cpos < clusters) {
1056                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1057                                                &num_clusters, el, NULL);
1058                 if (ret) {
1059                         mlog_errno(ret);
1060                         goto out;
1061                 }
1062
1063                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1064                 /* Copy ocfs2_xattr_value */
1065                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1066                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1067                                                &bh, NULL);
1068                         if (ret) {
1069                                 mlog_errno(ret);
1070                                 goto out;
1071                         }
1072
1073                         cplen = len >= blocksize ? blocksize : len;
1074                         memcpy(buffer, bh->b_data, cplen);
1075                         len -= cplen;
1076                         buffer += cplen;
1077
1078                         brelse(bh);
1079                         bh = NULL;
1080                         if (len == 0)
1081                                 break;
1082                 }
1083                 cpos += num_clusters;
1084         }
1085 out:
1086         return ret;
1087 }
1088
1089 static int ocfs2_xattr_ibody_get(struct inode *inode,
1090                                  int name_index,
1091                                  const char *name,
1092                                  void *buffer,
1093                                  size_t buffer_size,
1094                                  struct ocfs2_xattr_search *xs)
1095 {
1096         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1097         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1098         struct ocfs2_xattr_value_root *xv;
1099         size_t size;
1100         int ret = 0;
1101
1102         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL))
1103                 return -ENODATA;
1104
1105         xs->end = (void *)di + inode->i_sb->s_blocksize;
1106         xs->header = (struct ocfs2_xattr_header *)
1107                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
1108         xs->base = (void *)xs->header;
1109         xs->here = xs->header->xh_entries;
1110
1111         ret = ocfs2_xattr_find_entry(name_index, name, xs);
1112         if (ret)
1113                 return ret;
1114         size = le64_to_cpu(xs->here->xe_value_size);
1115         if (buffer) {
1116                 if (size > buffer_size)
1117                         return -ERANGE;
1118                 if (ocfs2_xattr_is_local(xs->here)) {
1119                         memcpy(buffer, (void *)xs->base +
1120                                le16_to_cpu(xs->here->xe_name_offset) +
1121                                OCFS2_XATTR_SIZE(xs->here->xe_name_len), size);
1122                 } else {
1123                         xv = (struct ocfs2_xattr_value_root *)
1124                                 (xs->base + le16_to_cpu(
1125                                  xs->here->xe_name_offset) +
1126                                 OCFS2_XATTR_SIZE(xs->here->xe_name_len));
1127                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1128                                                             buffer, size);
1129                         if (ret < 0) {
1130                                 mlog_errno(ret);
1131                                 return ret;
1132                         }
1133                 }
1134         }
1135
1136         return size;
1137 }
1138
1139 static int ocfs2_xattr_block_get(struct inode *inode,
1140                                  int name_index,
1141                                  const char *name,
1142                                  void *buffer,
1143                                  size_t buffer_size,
1144                                  struct ocfs2_xattr_search *xs)
1145 {
1146         struct ocfs2_xattr_block *xb;
1147         struct ocfs2_xattr_value_root *xv;
1148         size_t size;
1149         int ret = -ENODATA, name_offset, name_len, i;
1150         int uninitialized_var(block_off);
1151
1152         xs->bucket = ocfs2_xattr_bucket_new(inode);
1153         if (!xs->bucket) {
1154                 ret = -ENOMEM;
1155                 mlog_errno(ret);
1156                 goto cleanup;
1157         }
1158
1159         ret = ocfs2_xattr_block_find(inode, name_index, name, xs);
1160         if (ret) {
1161                 mlog_errno(ret);
1162                 goto cleanup;
1163         }
1164
1165         if (xs->not_found) {
1166                 ret = -ENODATA;
1167                 goto cleanup;
1168         }
1169
1170         xb = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
1171         size = le64_to_cpu(xs->here->xe_value_size);
1172         if (buffer) {
1173                 ret = -ERANGE;
1174                 if (size > buffer_size)
1175                         goto cleanup;
1176
1177                 name_offset = le16_to_cpu(xs->here->xe_name_offset);
1178                 name_len = OCFS2_XATTR_SIZE(xs->here->xe_name_len);
1179                 i = xs->here - xs->header->xh_entries;
1180
1181                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
1182                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
1183                                                                 bucket_xh(xs->bucket),
1184                                                                 i,
1185                                                                 &block_off,
1186                                                                 &name_offset);
1187                         xs->base = bucket_block(xs->bucket, block_off);
1188                 }
1189                 if (ocfs2_xattr_is_local(xs->here)) {
1190                         memcpy(buffer, (void *)xs->base +
1191                                name_offset + name_len, size);
1192                 } else {
1193                         xv = (struct ocfs2_xattr_value_root *)
1194                                 (xs->base + name_offset + name_len);
1195                         ret = ocfs2_xattr_get_value_outside(inode, xv,
1196                                                             buffer, size);
1197                         if (ret < 0) {
1198                                 mlog_errno(ret);
1199                                 goto cleanup;
1200                         }
1201                 }
1202         }
1203         ret = size;
1204 cleanup:
1205         ocfs2_xattr_bucket_free(xs->bucket);
1206
1207         brelse(xs->xattr_bh);
1208         xs->xattr_bh = NULL;
1209         return ret;
1210 }
1211
1212 int ocfs2_xattr_get_nolock(struct inode *inode,
1213                            struct buffer_head *di_bh,
1214                            int name_index,
1215                            const char *name,
1216                            void *buffer,
1217                            size_t buffer_size)
1218 {
1219         int ret;
1220         struct ocfs2_dinode *di = NULL;
1221         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1222         struct ocfs2_xattr_search xis = {
1223                 .not_found = -ENODATA,
1224         };
1225         struct ocfs2_xattr_search xbs = {
1226                 .not_found = -ENODATA,
1227         };
1228
1229         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
1230                 return -EOPNOTSUPP;
1231
1232         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
1233                 ret = -ENODATA;
1234
1235         xis.inode_bh = xbs.inode_bh = di_bh;
1236         di = (struct ocfs2_dinode *)di_bh->b_data;
1237
1238         down_read(&oi->ip_xattr_sem);
1239         ret = ocfs2_xattr_ibody_get(inode, name_index, name, buffer,
1240                                     buffer_size, &xis);
1241         if (ret == -ENODATA && di->i_xattr_loc)
1242                 ret = ocfs2_xattr_block_get(inode, name_index, name, buffer,
1243                                             buffer_size, &xbs);
1244         up_read(&oi->ip_xattr_sem);
1245
1246         return ret;
1247 }
1248
1249 /* ocfs2_xattr_get()
1250  *
1251  * Copy an extended attribute into the buffer provided.
1252  * Buffer is NULL to compute the size of buffer required.
1253  */
1254 static int ocfs2_xattr_get(struct inode *inode,
1255                            int name_index,
1256                            const char *name,
1257                            void *buffer,
1258                            size_t buffer_size)
1259 {
1260         int ret;
1261         struct buffer_head *di_bh = NULL;
1262
1263         ret = ocfs2_inode_lock(inode, &di_bh, 0);
1264         if (ret < 0) {
1265                 mlog_errno(ret);
1266                 return ret;
1267         }
1268         ret = ocfs2_xattr_get_nolock(inode, di_bh, name_index,
1269                                      name, buffer, buffer_size);
1270
1271         ocfs2_inode_unlock(inode, 0);
1272
1273         brelse(di_bh);
1274
1275         return ret;
1276 }
1277
1278 static int __ocfs2_xattr_set_value_outside(struct inode *inode,
1279                                            handle_t *handle,
1280                                            struct ocfs2_xattr_value_buf *vb,
1281                                            const void *value,
1282                                            int value_len)
1283 {
1284         int ret = 0, i, cp_len;
1285         u16 blocksize = inode->i_sb->s_blocksize;
1286         u32 p_cluster, num_clusters;
1287         u32 cpos = 0, bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
1288         u32 clusters = ocfs2_clusters_for_bytes(inode->i_sb, value_len);
1289         u64 blkno;
1290         struct buffer_head *bh = NULL;
1291         unsigned int ext_flags;
1292         struct ocfs2_xattr_value_root *xv = vb->vb_xv;
1293
1294         BUG_ON(clusters > le32_to_cpu(xv->xr_clusters));
1295
1296         while (cpos < clusters) {
1297                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
1298                                                &num_clusters, &xv->xr_list,
1299                                                &ext_flags);
1300                 if (ret) {
1301                         mlog_errno(ret);
1302                         goto out;
1303                 }
1304
1305                 BUG_ON(ext_flags & OCFS2_EXT_REFCOUNTED);
1306
1307                 blkno = ocfs2_clusters_to_blocks(inode->i_sb, p_cluster);
1308
1309                 for (i = 0; i < num_clusters * bpc; i++, blkno++) {
1310                         ret = ocfs2_read_block(INODE_CACHE(inode), blkno,
1311                                                &bh, NULL);
1312                         if (ret) {
1313                                 mlog_errno(ret);
1314                                 goto out;
1315                         }
1316
1317                         ret = ocfs2_journal_access(handle,
1318                                                    INODE_CACHE(inode),
1319                                                    bh,
1320                                                    OCFS2_JOURNAL_ACCESS_WRITE);
1321                         if (ret < 0) {
1322                                 mlog_errno(ret);
1323                                 goto out;
1324                         }
1325
1326                         cp_len = value_len > blocksize ? blocksize : value_len;
1327                         memcpy(bh->b_data, value, cp_len);
1328                         value_len -= cp_len;
1329                         value += cp_len;
1330                         if (cp_len < blocksize)
1331                                 memset(bh->b_data + cp_len, 0,
1332                                        blocksize - cp_len);
1333
1334                         ret = ocfs2_journal_dirty(handle, bh);
1335                         if (ret < 0) {
1336                                 mlog_errno(ret);
1337                                 goto out;
1338                         }
1339                         brelse(bh);
1340                         bh = NULL;
1341
1342                         /*
1343                          * XXX: do we need to empty all the following
1344                          * blocks in this cluster?
1345                          */
1346                         if (!value_len)
1347                                 break;
1348                 }
1349                 cpos += num_clusters;
1350         }
1351 out:
1352         brelse(bh);
1353
1354         return ret;
1355 }
1356
1357 static int ocfs2_xattr_cleanup(struct inode *inode,
1358                                handle_t *handle,
1359                                struct ocfs2_xattr_info *xi,
1360                                struct ocfs2_xattr_search *xs,
1361                                struct ocfs2_xattr_value_buf *vb,
1362                                size_t offs)
1363 {
1364         int ret = 0;
1365         void *val = xs->base + offs;
1366         size_t size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
1367                 OCFS2_XATTR_ROOT_SIZE;
1368
1369         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1370                             OCFS2_JOURNAL_ACCESS_WRITE);
1371         if (ret) {
1372                 mlog_errno(ret);
1373                 goto out;
1374         }
1375         /* Decrease xattr count */
1376         le16_add_cpu(&xs->header->xh_count, -1);
1377         /* Remove the xattr entry and tree root which has already be set*/
1378         memset((void *)xs->here, 0, sizeof(struct ocfs2_xattr_entry));
1379         memset(val, 0, size);
1380
1381         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1382         if (ret < 0)
1383                 mlog_errno(ret);
1384 out:
1385         return ret;
1386 }
1387
1388 static int ocfs2_xattr_update_entry(struct inode *inode,
1389                                     handle_t *handle,
1390                                     struct ocfs2_xattr_info *xi,
1391                                     struct ocfs2_xattr_search *xs,
1392                                     struct ocfs2_xattr_value_buf *vb,
1393                                     size_t offs)
1394 {
1395         int ret;
1396
1397         ret = vb->vb_access(handle, INODE_CACHE(inode), vb->vb_bh,
1398                             OCFS2_JOURNAL_ACCESS_WRITE);
1399         if (ret) {
1400                 mlog_errno(ret);
1401                 goto out;
1402         }
1403
1404         xs->here->xe_name_offset = cpu_to_le16(offs);
1405         xs->here->xe_value_size = cpu_to_le64(xi->xi_value_len);
1406         if (xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE)
1407                 ocfs2_xattr_set_local(xs->here, 1);
1408         else
1409                 ocfs2_xattr_set_local(xs->here, 0);
1410         ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1411
1412         ret = ocfs2_journal_dirty(handle, vb->vb_bh);
1413         if (ret < 0)
1414                 mlog_errno(ret);
1415 out:
1416         return ret;
1417 }
1418
1419 /*
1420  * ocfs2_xattr_set_value_outside()
1421  *
1422  * Set large size value in B tree.
1423  */
1424 static int ocfs2_xattr_set_value_outside(struct inode *inode,
1425                                          struct ocfs2_xattr_info *xi,
1426                                          struct ocfs2_xattr_search *xs,
1427                                          struct ocfs2_xattr_set_ctxt *ctxt,
1428                                          struct ocfs2_xattr_value_buf *vb,
1429                                          size_t offs)
1430 {
1431         void *val = xs->base + offs;
1432         struct ocfs2_xattr_value_root *xv = NULL;
1433         size_t size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
1434                 OCFS2_XATTR_ROOT_SIZE;
1435         int ret = 0;
1436
1437         memset(val, 0, size);
1438         memcpy(val, xi->xi_name, xi->xi_name_len);
1439         xv = (struct ocfs2_xattr_value_root *)
1440                 (val + OCFS2_XATTR_SIZE(xi->xi_name_len));
1441         xv->xr_clusters = 0;
1442         xv->xr_last_eb_blk = 0;
1443         xv->xr_list.l_tree_depth = 0;
1444         xv->xr_list.l_count = cpu_to_le16(1);
1445         xv->xr_list.l_next_free_rec = 0;
1446         vb->vb_xv = xv;
1447
1448         ret = ocfs2_xattr_value_truncate(inode, vb, xi->xi_value_len, ctxt);
1449         if (ret < 0) {
1450                 mlog_errno(ret);
1451                 return ret;
1452         }
1453         ret = ocfs2_xattr_update_entry(inode, ctxt->handle, xi, xs, vb, offs);
1454         if (ret < 0) {
1455                 mlog_errno(ret);
1456                 return ret;
1457         }
1458         ret = __ocfs2_xattr_set_value_outside(inode, ctxt->handle, vb,
1459                                               xi->xi_value, xi->xi_value_len);
1460         if (ret < 0)
1461                 mlog_errno(ret);
1462
1463         return ret;
1464 }
1465
1466 /*
1467  * Wipe the name+value pair and allow the storage to reclaim it.  This
1468  * must be followed by either removal of the entry or a call to
1469  * ocfs2_xa_add_namevalue().
1470  */
1471 static void ocfs2_xa_wipe_namevalue(struct ocfs2_xa_loc *loc)
1472 {
1473         loc->xl_ops->xlo_wipe_namevalue(loc);
1474 }
1475
1476 static void *ocfs2_xa_block_offset_pointer(struct ocfs2_xa_loc *loc,
1477                                            int offset)
1478 {
1479         BUG_ON(offset >= loc->xl_size);
1480         return (char *)loc->xl_header + offset;
1481 }
1482
1483 /*
1484  * Block storage for xattrs keeps the name+value pairs compacted.  When
1485  * we remove one, we have to shift any that preceded it towards the end.
1486  */
1487 static void ocfs2_xa_block_wipe_namevalue(struct ocfs2_xa_loc *loc)
1488 {
1489         int i, offset;
1490         int namevalue_offset, first_namevalue_offset, namevalue_size;
1491         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1492         struct ocfs2_xattr_header *xh = loc->xl_header;
1493         u64 value_size = le64_to_cpu(entry->xe_value_size);
1494         int count = le16_to_cpu(xh->xh_count);
1495
1496         namevalue_offset = le16_to_cpu(entry->xe_name_offset);
1497         namevalue_size = OCFS2_XATTR_SIZE(entry->xe_name_len);
1498         if (value_size > OCFS2_XATTR_INLINE_SIZE)
1499                 namevalue_size += OCFS2_XATTR_ROOT_SIZE;
1500         else
1501                 namevalue_size += OCFS2_XATTR_SIZE(value_size);
1502
1503         for (i = 0, first_namevalue_offset = loc->xl_size;
1504              i < count; i++) {
1505                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1506                 if (offset < first_namevalue_offset)
1507                         first_namevalue_offset = offset;
1508         }
1509
1510         /* Shift the name+value pairs */
1511         memmove((char *)xh + first_namevalue_offset + namevalue_size,
1512                 (char *)xh + first_namevalue_offset,
1513                 namevalue_offset - first_namevalue_offset);
1514         memset((char *)xh + first_namevalue_offset, 0, namevalue_size);
1515
1516         /* Now tell xh->xh_entries about it */
1517         for (i = 0; i < count; i++) {
1518                 offset = le16_to_cpu(xh->xh_entries[i].xe_name_offset);
1519                 if (offset < namevalue_offset)
1520                         le16_add_cpu(&xh->xh_entries[i].xe_name_offset,
1521                                      namevalue_size);
1522         }
1523
1524         /*
1525          * Note that we don't update xh_free_start or xh_name_value_len
1526          * because they're not used in block-stored xattrs.
1527          */
1528 }
1529
1530 /*
1531  * Operations for xattrs stored in blocks.  This includes inline inode
1532  * storage and unindexed ocfs2_xattr_blocks.
1533  */
1534 static const struct ocfs2_xa_loc_operations ocfs2_xa_block_loc_ops = {
1535         .xlo_offset_pointer     = ocfs2_xa_block_offset_pointer,
1536         .xlo_wipe_namevalue     = ocfs2_xa_block_wipe_namevalue,
1537 };
1538
1539 static void *ocfs2_xa_bucket_offset_pointer(struct ocfs2_xa_loc *loc,
1540                                             int offset)
1541 {
1542         struct ocfs2_xattr_bucket *bucket = loc->xl_storage;
1543         int block, block_offset;
1544
1545         BUG_ON(offset >= OCFS2_XATTR_BUCKET_SIZE);
1546
1547         /* The header is at the front of the bucket */
1548         block = offset >> bucket->bu_inode->i_sb->s_blocksize_bits;
1549         block_offset = offset % bucket->bu_inode->i_sb->s_blocksize;
1550
1551         return bucket_block(bucket, block) + block_offset;
1552 }
1553
1554 static void ocfs2_xa_bucket_wipe_namevalue(struct ocfs2_xa_loc *loc)
1555 {
1556         int namevalue_size;
1557         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1558         u64 value_size = le64_to_cpu(entry->xe_value_size);
1559
1560         namevalue_size = OCFS2_XATTR_SIZE(entry->xe_name_len);
1561         if (value_size > OCFS2_XATTR_INLINE_SIZE)
1562                 namevalue_size += OCFS2_XATTR_ROOT_SIZE;
1563         else
1564                 namevalue_size += OCFS2_XATTR_SIZE(value_size);
1565
1566         le16_add_cpu(&loc->xl_header->xh_name_value_len, -namevalue_size);
1567 }
1568
1569 /* Operations for xattrs stored in buckets. */
1570 static const struct ocfs2_xa_loc_operations ocfs2_xa_bucket_loc_ops = {
1571         .xlo_offset_pointer     = ocfs2_xa_bucket_offset_pointer,
1572         .xlo_wipe_namevalue     = ocfs2_xa_bucket_wipe_namevalue,
1573 };
1574
1575 static void ocfs2_xa_remove_entry(struct ocfs2_xa_loc *loc)
1576 {
1577         int index, count;
1578         struct ocfs2_xattr_header *xh = loc->xl_header;
1579         struct ocfs2_xattr_entry *entry = loc->xl_entry;
1580
1581         ocfs2_xa_wipe_namevalue(loc);
1582         loc->xl_entry = NULL;
1583
1584         le16_add_cpu(&xh->xh_count, -1);
1585         count = le16_to_cpu(xh->xh_count);
1586
1587         /*
1588          * Only zero out the entry if there are more remaining.  This is
1589          * important for an empty bucket, as it keeps track of the
1590          * bucket's hash value.  It doesn't hurt empty block storage.
1591          */
1592         if (count) {
1593                 index = ((char *)entry - (char *)&xh->xh_entries) /
1594                         sizeof(struct ocfs2_xattr_entry);
1595                 memmove(&xh->xh_entries[index], &xh->xh_entries[index + 1],
1596                         (count - index) * sizeof(struct ocfs2_xattr_entry));
1597                 memset(&xh->xh_entries[count], 0,
1598                        sizeof(struct ocfs2_xattr_entry));
1599         }
1600 }
1601
1602 static void ocfs2_init_dinode_xa_loc(struct ocfs2_xa_loc *loc,
1603                                      struct inode *inode,
1604                                      struct buffer_head *bh,
1605                                      struct ocfs2_xattr_entry *entry)
1606 {
1607         struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
1608
1609         loc->xl_ops = &ocfs2_xa_block_loc_ops;
1610         loc->xl_storage = bh;
1611         loc->xl_entry = entry;
1612
1613         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
1614                 loc->xl_size = le16_to_cpu(di->i_xattr_inline_size);
1615         else {
1616                 BUG_ON(entry);
1617                 loc->xl_size = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
1618         }
1619         loc->xl_header =
1620                 (struct ocfs2_xattr_header *)(bh->b_data + bh->b_size -
1621                                               loc->xl_size);
1622 }
1623
1624 static void ocfs2_init_xattr_block_xa_loc(struct ocfs2_xa_loc *loc,
1625                                           struct buffer_head *bh,
1626                                           struct ocfs2_xattr_entry *entry)
1627 {
1628         struct ocfs2_xattr_block *xb =
1629                 (struct ocfs2_xattr_block *)bh->b_data;
1630
1631         BUG_ON(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED);
1632
1633         loc->xl_ops = &ocfs2_xa_block_loc_ops;
1634         loc->xl_storage = bh;
1635         loc->xl_header = &(xb->xb_attrs.xb_header);
1636         loc->xl_entry = entry;
1637         loc->xl_size = bh->b_size - offsetof(struct ocfs2_xattr_block,
1638                                              xb_attrs.xb_header);
1639 }
1640
1641 static void ocfs2_init_xattr_bucket_xa_loc(struct ocfs2_xa_loc *loc,
1642                                            struct ocfs2_xattr_bucket *bucket,
1643                                            struct ocfs2_xattr_entry *entry)
1644 {
1645         loc->xl_ops = &ocfs2_xa_bucket_loc_ops;
1646         loc->xl_storage = bucket;
1647         loc->xl_header = bucket_xh(bucket);
1648         loc->xl_entry = entry;
1649         loc->xl_size = OCFS2_XATTR_BUCKET_SIZE;
1650 }
1651
1652 /*
1653  * ocfs2_xattr_set_entry_local()
1654  *
1655  * Set, replace or remove extended attribute in local.
1656  */
1657 static void ocfs2_xattr_set_entry_local(struct inode *inode,
1658                                         struct ocfs2_xattr_info *xi,
1659                                         struct ocfs2_xattr_search *xs,
1660                                         struct ocfs2_xattr_entry *last,
1661                                         size_t min_offs)
1662 {
1663         struct ocfs2_xa_loc loc;
1664
1665         if (xs->xattr_bh == xs->inode_bh)
1666                 ocfs2_init_dinode_xa_loc(&loc, inode, xs->inode_bh,
1667                                          xs->not_found ? NULL : xs->here);
1668         else
1669                 ocfs2_init_xattr_block_xa_loc(&loc, xs->xattr_bh,
1670                                               xs->not_found ? NULL : xs->here);
1671         if (xi->xi_value && xs->not_found) {
1672                 /* Insert the new xattr entry. */
1673                 le16_add_cpu(&xs->header->xh_count, 1);
1674                 ocfs2_xattr_set_type(last, xi->xi_name_index);
1675                 ocfs2_xattr_set_local(last, 1);
1676                 last->xe_name_len = xi->xi_name_len;
1677         } else {
1678                 void *first_val;
1679                 void *val;
1680                 size_t offs, size;
1681
1682                 first_val = xs->base + min_offs;
1683                 offs = le16_to_cpu(xs->here->xe_name_offset);
1684                 val = xs->base + offs;
1685
1686                 if (le64_to_cpu(xs->here->xe_value_size) >
1687                     OCFS2_XATTR_INLINE_SIZE)
1688                         size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
1689                                 OCFS2_XATTR_ROOT_SIZE;
1690                 else
1691                         size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
1692                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1693
1694                 if (xi->xi_value && size == OCFS2_XATTR_SIZE(xi->xi_name_len) +
1695                                 OCFS2_XATTR_SIZE(xi->xi_value_len)) {
1696                         /* The old and the new value have the
1697                            same size. Just replace the value. */
1698                         ocfs2_xattr_set_local(xs->here, 1);
1699                         xs->here->xe_value_size = cpu_to_le64(xi->xi_value_len);
1700                         /* Clear value bytes. */
1701                         memset(val + OCFS2_XATTR_SIZE(xi->xi_name_len),
1702                                0,
1703                                OCFS2_XATTR_SIZE(xi->xi_value_len));
1704                         memcpy(val + OCFS2_XATTR_SIZE(xi->xi_name_len),
1705                                xi->xi_value,
1706                                xi->xi_value_len);
1707                         return;
1708                 }
1709
1710                 if (!xi->xi_value)
1711                         ocfs2_xa_remove_entry(&loc);
1712                 else
1713                         ocfs2_xa_wipe_namevalue(&loc);
1714
1715                 min_offs += size;
1716         }
1717         if (xi->xi_value) {
1718                 /* Insert the new name+value. */
1719                 size_t size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
1720                                 OCFS2_XATTR_SIZE(xi->xi_value_len);
1721                 void *val = xs->base + min_offs - size;
1722
1723                 xs->here->xe_name_offset = cpu_to_le16(min_offs - size);
1724                 memset(val, 0, size);
1725                 memcpy(val, xi->xi_name, xi->xi_name_len);
1726                 memcpy(val + OCFS2_XATTR_SIZE(xi->xi_name_len),
1727                        xi->xi_value,
1728                        xi->xi_value_len);
1729                 xs->here->xe_value_size = cpu_to_le64(xi->xi_value_len);
1730                 ocfs2_xattr_set_local(xs->here, 1);
1731                 ocfs2_xattr_hash_entry(inode, xs->header, xs->here);
1732         }
1733
1734         return;
1735 }
1736
1737 /*
1738  * ocfs2_xattr_set_entry()
1739  *
1740  * Set extended attribute entry into inode or block.
1741  *
1742  * If extended attribute value size > OCFS2_XATTR_INLINE_SIZE,
1743  * We first insert tree root(ocfs2_xattr_value_root) with set_entry_local(),
1744  * then set value in B tree with set_value_outside().
1745  */
1746 static int ocfs2_xattr_set_entry(struct inode *inode,
1747                                  struct ocfs2_xattr_info *xi,
1748                                  struct ocfs2_xattr_search *xs,
1749                                  struct ocfs2_xattr_set_ctxt *ctxt,
1750                                  int flag)
1751 {
1752         struct ocfs2_xattr_entry *last;
1753         struct ocfs2_inode_info *oi = OCFS2_I(inode);
1754         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
1755         size_t min_offs = xs->end - xs->base;
1756         size_t size_l = 0;
1757         handle_t *handle = ctxt->handle;
1758         int free, i, ret;
1759         struct ocfs2_xattr_info xi_l = {
1760                 .xi_name_index = xi->xi_name_index,
1761                 .xi_name = xi->xi_name,
1762                 .xi_name_len = xi->xi_name_len,
1763                 .xi_value = xi->xi_value,
1764                 .xi_value_len = xi->xi_value_len,
1765         };
1766         struct ocfs2_xattr_value_buf vb = {
1767                 .vb_bh = xs->xattr_bh,
1768                 .vb_access = ocfs2_journal_access_di,
1769         };
1770
1771         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1772                 BUG_ON(xs->xattr_bh == xs->inode_bh);
1773                 vb.vb_access = ocfs2_journal_access_xb;
1774         } else
1775                 BUG_ON(xs->xattr_bh != xs->inode_bh);
1776
1777         /* Compute min_offs, last and free space. */
1778         last = xs->header->xh_entries;
1779
1780         for (i = 0 ; i < le16_to_cpu(xs->header->xh_count); i++) {
1781                 size_t offs = le16_to_cpu(last->xe_name_offset);
1782                 if (offs < min_offs)
1783                         min_offs = offs;
1784                 last += 1;
1785         }
1786
1787         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
1788         if (free < 0)
1789                 return -EIO;
1790
1791         if (!xs->not_found) {
1792                 size_t size = 0;
1793                 if (ocfs2_xattr_is_local(xs->here))
1794                         size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
1795                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1796                 else
1797                         size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
1798                                 OCFS2_XATTR_ROOT_SIZE;
1799                 free += (size + sizeof(struct ocfs2_xattr_entry));
1800         }
1801         /* Check free space in inode or block */
1802         if (xi->xi_value && xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
1803                 if (free < sizeof(struct ocfs2_xattr_entry) +
1804                            OCFS2_XATTR_SIZE(xi->xi_name_len) +
1805                            OCFS2_XATTR_ROOT_SIZE) {
1806                         ret = -ENOSPC;
1807                         goto out;
1808                 }
1809                 size_l = OCFS2_XATTR_SIZE(xi->xi_name_len) +
1810                         OCFS2_XATTR_ROOT_SIZE;
1811                 xi_l.xi_value = (void *)&def_xv;
1812                 xi_l.xi_value_len = OCFS2_XATTR_ROOT_SIZE;
1813         } else if (xi->xi_value) {
1814                 if (free < sizeof(struct ocfs2_xattr_entry) +
1815                            OCFS2_XATTR_SIZE(xi->xi_name_len) +
1816                            OCFS2_XATTR_SIZE(xi->xi_value_len)) {
1817                         ret = -ENOSPC;
1818                         goto out;
1819                 }
1820         }
1821
1822         if (!xs->not_found) {
1823                 /* For existing extended attribute */
1824                 size_t size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
1825                         OCFS2_XATTR_SIZE(le64_to_cpu(xs->here->xe_value_size));
1826                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1827                 void *val = xs->base + offs;
1828
1829                 if (ocfs2_xattr_is_local(xs->here) && size == size_l) {
1830                         /* Replace existing local xattr with tree root */
1831                         ret = ocfs2_xattr_set_value_outside(inode, xi, xs,
1832                                                             ctxt, &vb, offs);
1833                         if (ret < 0)
1834                                 mlog_errno(ret);
1835                         goto out;
1836                 } else if (!ocfs2_xattr_is_local(xs->here)) {
1837                         /* For existing xattr which has value outside */
1838                         vb.vb_xv = (struct ocfs2_xattr_value_root *)
1839                                 (val + OCFS2_XATTR_SIZE(xi->xi_name_len));
1840
1841                         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
1842                                 /*
1843                                  * If new value need set outside also,
1844                                  * first truncate old value to new value,
1845                                  * then set new value with set_value_outside().
1846                                  */
1847                                 ret = ocfs2_xattr_value_truncate(inode,
1848                                                         &vb,
1849                                                         xi->xi_value_len,
1850                                                         ctxt);
1851                                 if (ret < 0) {
1852                                         mlog_errno(ret);
1853                                         goto out;
1854                                 }
1855
1856                                 ret = ocfs2_xattr_update_entry(inode,
1857                                                                handle,
1858                                                                xi,
1859                                                                xs,
1860                                                                &vb,
1861                                                                offs);
1862                                 if (ret < 0) {
1863                                         mlog_errno(ret);
1864                                         goto out;
1865                                 }
1866
1867                                 ret = __ocfs2_xattr_set_value_outside(inode,
1868                                                         handle,
1869                                                         &vb,
1870                                                         xi->xi_value,
1871                                                         xi->xi_value_len);
1872                                 if (ret < 0)
1873                                         mlog_errno(ret);
1874                                 goto out;
1875                         } else {
1876                                 /*
1877                                  * If new value need set in local,
1878                                  * just trucate old value to zero.
1879                                  */
1880                                  ret = ocfs2_xattr_value_truncate(inode,
1881                                                                   &vb,
1882                                                                   0,
1883                                                                   ctxt);
1884                                 if (ret < 0)
1885                                         mlog_errno(ret);
1886                         }
1887                 }
1888         }
1889
1890         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), xs->inode_bh,
1891                                       OCFS2_JOURNAL_ACCESS_WRITE);
1892         if (ret) {
1893                 mlog_errno(ret);
1894                 goto out;
1895         }
1896
1897         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1898                 ret = vb.vb_access(handle, INODE_CACHE(inode), vb.vb_bh,
1899                                    OCFS2_JOURNAL_ACCESS_WRITE);
1900                 if (ret) {
1901                         mlog_errno(ret);
1902                         goto out;
1903                 }
1904         }
1905
1906         /*
1907          * Set value in local, include set tree root in local.
1908          * This is the first step for value size >INLINE_SIZE.
1909          */
1910         ocfs2_xattr_set_entry_local(inode, &xi_l, xs, last, min_offs);
1911
1912         if (!(flag & OCFS2_INLINE_XATTR_FL)) {
1913                 ret = ocfs2_journal_dirty(handle, xs->xattr_bh);
1914                 if (ret < 0) {
1915                         mlog_errno(ret);
1916                         goto out;
1917                 }
1918         }
1919
1920         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) &&
1921             (flag & OCFS2_INLINE_XATTR_FL)) {
1922                 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1923                 unsigned int xattrsize = osb->s_xattr_inline_size;
1924
1925                 /*
1926                  * Adjust extent record count or inline data size
1927                  * to reserve space for extended attribute.
1928                  */
1929                 if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
1930                         struct ocfs2_inline_data *idata = &di->id2.i_data;
1931                         le16_add_cpu(&idata->id_count, -xattrsize);
1932                 } else if (!(ocfs2_inode_is_fast_symlink(inode))) {
1933                         struct ocfs2_extent_list *el = &di->id2.i_list;
1934                         le16_add_cpu(&el->l_count, -(xattrsize /
1935                                         sizeof(struct ocfs2_extent_rec)));
1936                 }
1937                 di->i_xattr_inline_size = cpu_to_le16(xattrsize);
1938         }
1939         /* Update xattr flag */
1940         spin_lock(&oi->ip_lock);
1941         oi->ip_dyn_features |= flag;
1942         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
1943         spin_unlock(&oi->ip_lock);
1944
1945         ret = ocfs2_journal_dirty(handle, xs->inode_bh);
1946         if (ret < 0)
1947                 mlog_errno(ret);
1948
1949         if (!ret && xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
1950                 /*
1951                  * Set value outside in B tree.
1952                  * This is the second step for value size > INLINE_SIZE.
1953                  */
1954                 size_t offs = le16_to_cpu(xs->here->xe_name_offset);
1955                 ret = ocfs2_xattr_set_value_outside(inode, xi, xs, ctxt,
1956                                                     &vb, offs);
1957                 if (ret < 0) {
1958                         int ret2;
1959
1960                         mlog_errno(ret);
1961                         /*
1962                          * If set value outside failed, we have to clean
1963                          * the junk tree root we have already set in local.
1964                          */
1965                         ret2 = ocfs2_xattr_cleanup(inode, ctxt->handle,
1966                                                    xi, xs, &vb, offs);
1967                         if (ret2 < 0)
1968                                 mlog_errno(ret2);
1969                 }
1970         }
1971 out:
1972         return ret;
1973 }
1974
1975 /*
1976  * In xattr remove, if it is stored outside and refcounted, we may have
1977  * the chance to split the refcount tree. So need the allocators.
1978  */
1979 static int ocfs2_lock_xattr_remove_allocators(struct inode *inode,
1980                                         struct ocfs2_xattr_value_root *xv,
1981                                         struct ocfs2_caching_info *ref_ci,
1982                                         struct buffer_head *ref_root_bh,
1983                                         struct ocfs2_alloc_context **meta_ac,
1984                                         int *ref_credits)
1985 {
1986         int ret, meta_add = 0;
1987         u32 p_cluster, num_clusters;
1988         unsigned int ext_flags;
1989
1990         *ref_credits = 0;
1991         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
1992                                        &num_clusters,
1993                                        &xv->xr_list,
1994                                        &ext_flags);
1995         if (ret) {
1996                 mlog_errno(ret);
1997                 goto out;
1998         }
1999
2000         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
2001                 goto out;
2002
2003         ret = ocfs2_refcounted_xattr_delete_need(inode, ref_ci,
2004                                                  ref_root_bh, xv,
2005                                                  &meta_add, ref_credits);
2006         if (ret) {
2007                 mlog_errno(ret);
2008                 goto out;
2009         }
2010
2011         ret = ocfs2_reserve_new_metadata_blocks(OCFS2_SB(inode->i_sb),
2012                                                 meta_add, meta_ac);
2013         if (ret)
2014                 mlog_errno(ret);
2015
2016 out:
2017         return ret;
2018 }
2019
2020 static int ocfs2_remove_value_outside(struct inode*inode,
2021                                       struct ocfs2_xattr_value_buf *vb,
2022                                       struct ocfs2_xattr_header *header,
2023                                       struct ocfs2_caching_info *ref_ci,
2024                                       struct buffer_head *ref_root_bh)
2025 {
2026         int ret = 0, i, ref_credits;
2027         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2028         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
2029         void *val;
2030
2031         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
2032
2033         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
2034                 struct ocfs2_xattr_entry *entry = &header->xh_entries[i];
2035
2036                 if (ocfs2_xattr_is_local(entry))
2037                         continue;
2038
2039                 val = (void *)header +
2040                         le16_to_cpu(entry->xe_name_offset);
2041                 vb->vb_xv = (struct ocfs2_xattr_value_root *)
2042                         (val + OCFS2_XATTR_SIZE(entry->xe_name_len));
2043
2044                 ret = ocfs2_lock_xattr_remove_allocators(inode, vb->vb_xv,
2045                                                          ref_ci, ref_root_bh,
2046                                                          &ctxt.meta_ac,
2047                                                          &ref_credits);
2048
2049                 ctxt.handle = ocfs2_start_trans(osb, ref_credits +
2050                                         ocfs2_remove_extent_credits(osb->sb));
2051                 if (IS_ERR(ctxt.handle)) {
2052                         ret = PTR_ERR(ctxt.handle);
2053                         mlog_errno(ret);
2054                         break;
2055                 }
2056
2057                 ret = ocfs2_xattr_value_truncate(inode, vb, 0, &ctxt);
2058                 if (ret < 0) {
2059                         mlog_errno(ret);
2060                         break;
2061                 }
2062
2063                 ocfs2_commit_trans(osb, ctxt.handle);
2064                 if (ctxt.meta_ac) {
2065                         ocfs2_free_alloc_context(ctxt.meta_ac);
2066                         ctxt.meta_ac = NULL;
2067                 }
2068         }
2069
2070         if (ctxt.meta_ac)
2071                 ocfs2_free_alloc_context(ctxt.meta_ac);
2072         ocfs2_schedule_truncate_log_flush(osb, 1);
2073         ocfs2_run_deallocs(osb, &ctxt.dealloc);
2074         return ret;
2075 }
2076
2077 static int ocfs2_xattr_ibody_remove(struct inode *inode,
2078                                     struct buffer_head *di_bh,
2079                                     struct ocfs2_caching_info *ref_ci,
2080                                     struct buffer_head *ref_root_bh)
2081 {
2082
2083         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2084         struct ocfs2_xattr_header *header;
2085         int ret;
2086         struct ocfs2_xattr_value_buf vb = {
2087                 .vb_bh = di_bh,
2088                 .vb_access = ocfs2_journal_access_di,
2089         };
2090
2091         header = (struct ocfs2_xattr_header *)
2092                  ((void *)di + inode->i_sb->s_blocksize -
2093                  le16_to_cpu(di->i_xattr_inline_size));
2094
2095         ret = ocfs2_remove_value_outside(inode, &vb, header,
2096                                          ref_ci, ref_root_bh);
2097
2098         return ret;
2099 }
2100
2101 struct ocfs2_rm_xattr_bucket_para {
2102         struct ocfs2_caching_info *ref_ci;
2103         struct buffer_head *ref_root_bh;
2104 };
2105
2106 static int ocfs2_xattr_block_remove(struct inode *inode,
2107                                     struct buffer_head *blk_bh,
2108                                     struct ocfs2_caching_info *ref_ci,
2109                                     struct buffer_head *ref_root_bh)
2110 {
2111         struct ocfs2_xattr_block *xb;
2112         int ret = 0;
2113         struct ocfs2_xattr_value_buf vb = {
2114                 .vb_bh = blk_bh,
2115                 .vb_access = ocfs2_journal_access_xb,
2116         };
2117         struct ocfs2_rm_xattr_bucket_para args = {
2118                 .ref_ci = ref_ci,
2119                 .ref_root_bh = ref_root_bh,
2120         };
2121
2122         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2123         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2124                 struct ocfs2_xattr_header *header = &(xb->xb_attrs.xb_header);
2125                 ret = ocfs2_remove_value_outside(inode, &vb, header,
2126                                                  ref_ci, ref_root_bh);
2127         } else
2128                 ret = ocfs2_iterate_xattr_index_block(inode,
2129                                                 blk_bh,
2130                                                 ocfs2_rm_xattr_cluster,
2131                                                 &args);
2132
2133         return ret;
2134 }
2135
2136 static int ocfs2_xattr_free_block(struct inode *inode,
2137                                   u64 block,
2138                                   struct ocfs2_caching_info *ref_ci,
2139                                   struct buffer_head *ref_root_bh)
2140 {
2141         struct inode *xb_alloc_inode;
2142         struct buffer_head *xb_alloc_bh = NULL;
2143         struct buffer_head *blk_bh = NULL;
2144         struct ocfs2_xattr_block *xb;
2145         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2146         handle_t *handle;
2147         int ret = 0;
2148         u64 blk, bg_blkno;
2149         u16 bit;
2150
2151         ret = ocfs2_read_xattr_block(inode, block, &blk_bh);
2152         if (ret < 0) {
2153                 mlog_errno(ret);
2154                 goto out;
2155         }
2156
2157         ret = ocfs2_xattr_block_remove(inode, blk_bh, ref_ci, ref_root_bh);
2158         if (ret < 0) {
2159                 mlog_errno(ret);
2160                 goto out;
2161         }
2162
2163         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2164         blk = le64_to_cpu(xb->xb_blkno);
2165         bit = le16_to_cpu(xb->xb_suballoc_bit);
2166         bg_blkno = ocfs2_which_suballoc_group(blk, bit);
2167
2168         xb_alloc_inode = ocfs2_get_system_file_inode(osb,
2169                                 EXTENT_ALLOC_SYSTEM_INODE,
2170                                 le16_to_cpu(xb->xb_suballoc_slot));
2171         if (!xb_alloc_inode) {
2172                 ret = -ENOMEM;
2173                 mlog_errno(ret);
2174                 goto out;
2175         }
2176         mutex_lock(&xb_alloc_inode->i_mutex);
2177
2178         ret = ocfs2_inode_lock(xb_alloc_inode, &xb_alloc_bh, 1);
2179         if (ret < 0) {
2180                 mlog_errno(ret);
2181                 goto out_mutex;
2182         }
2183
2184         handle = ocfs2_start_trans(osb, OCFS2_SUBALLOC_FREE);
2185         if (IS_ERR(handle)) {
2186                 ret = PTR_ERR(handle);
2187                 mlog_errno(ret);
2188                 goto out_unlock;
2189         }
2190
2191         ret = ocfs2_free_suballoc_bits(handle, xb_alloc_inode, xb_alloc_bh,
2192                                        bit, bg_blkno, 1);
2193         if (ret < 0)
2194                 mlog_errno(ret);
2195
2196         ocfs2_commit_trans(osb, handle);
2197 out_unlock:
2198         ocfs2_inode_unlock(xb_alloc_inode, 1);
2199         brelse(xb_alloc_bh);
2200 out_mutex:
2201         mutex_unlock(&xb_alloc_inode->i_mutex);
2202         iput(xb_alloc_inode);
2203 out:
2204         brelse(blk_bh);
2205         return ret;
2206 }
2207
2208 /*
2209  * ocfs2_xattr_remove()
2210  *
2211  * Free extended attribute resources associated with this inode.
2212  */
2213 int ocfs2_xattr_remove(struct inode *inode, struct buffer_head *di_bh)
2214 {
2215         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2216         struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
2217         struct ocfs2_refcount_tree *ref_tree = NULL;
2218         struct buffer_head *ref_root_bh = NULL;
2219         struct ocfs2_caching_info *ref_ci = NULL;
2220         handle_t *handle;
2221         int ret;
2222
2223         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
2224                 return 0;
2225
2226         if (!(oi->ip_dyn_features & OCFS2_HAS_XATTR_FL))
2227                 return 0;
2228
2229         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL) {
2230                 ret = ocfs2_lock_refcount_tree(OCFS2_SB(inode->i_sb),
2231                                                le64_to_cpu(di->i_refcount_loc),
2232                                                1, &ref_tree, &ref_root_bh);
2233                 if (ret) {
2234                         mlog_errno(ret);
2235                         goto out;
2236                 }
2237                 ref_ci = &ref_tree->rf_ci;
2238
2239         }
2240
2241         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2242                 ret = ocfs2_xattr_ibody_remove(inode, di_bh,
2243                                                ref_ci, ref_root_bh);
2244                 if (ret < 0) {
2245                         mlog_errno(ret);
2246                         goto out;
2247                 }
2248         }
2249
2250         if (di->i_xattr_loc) {
2251                 ret = ocfs2_xattr_free_block(inode,
2252                                              le64_to_cpu(di->i_xattr_loc),
2253                                              ref_ci, ref_root_bh);
2254                 if (ret < 0) {
2255                         mlog_errno(ret);
2256                         goto out;
2257                 }
2258         }
2259
2260         handle = ocfs2_start_trans((OCFS2_SB(inode->i_sb)),
2261                                    OCFS2_INODE_UPDATE_CREDITS);
2262         if (IS_ERR(handle)) {
2263                 ret = PTR_ERR(handle);
2264                 mlog_errno(ret);
2265                 goto out;
2266         }
2267         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
2268                                       OCFS2_JOURNAL_ACCESS_WRITE);
2269         if (ret) {
2270                 mlog_errno(ret);
2271                 goto out_commit;
2272         }
2273
2274         di->i_xattr_loc = 0;
2275
2276         spin_lock(&oi->ip_lock);
2277         oi->ip_dyn_features &= ~(OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL);
2278         di->i_dyn_features = cpu_to_le16(oi->ip_dyn_features);
2279         spin_unlock(&oi->ip_lock);
2280
2281         ret = ocfs2_journal_dirty(handle, di_bh);
2282         if (ret < 0)
2283                 mlog_errno(ret);
2284 out_commit:
2285         ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
2286 out:
2287         if (ref_tree)
2288                 ocfs2_unlock_refcount_tree(OCFS2_SB(inode->i_sb), ref_tree, 1);
2289         brelse(ref_root_bh);
2290         return ret;
2291 }
2292
2293 static int ocfs2_xattr_has_space_inline(struct inode *inode,
2294                                         struct ocfs2_dinode *di)
2295 {
2296         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2297         unsigned int xattrsize = OCFS2_SB(inode->i_sb)->s_xattr_inline_size;
2298         int free;
2299
2300         if (xattrsize < OCFS2_MIN_XATTR_INLINE_SIZE)
2301                 return 0;
2302
2303         if (oi->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
2304                 struct ocfs2_inline_data *idata = &di->id2.i_data;
2305                 free = le16_to_cpu(idata->id_count) - le64_to_cpu(di->i_size);
2306         } else if (ocfs2_inode_is_fast_symlink(inode)) {
2307                 free = ocfs2_fast_symlink_chars(inode->i_sb) -
2308                         le64_to_cpu(di->i_size);
2309         } else {
2310                 struct ocfs2_extent_list *el = &di->id2.i_list;
2311                 free = (le16_to_cpu(el->l_count) -
2312                         le16_to_cpu(el->l_next_free_rec)) *
2313                         sizeof(struct ocfs2_extent_rec);
2314         }
2315         if (free >= xattrsize)
2316                 return 1;
2317
2318         return 0;
2319 }
2320
2321 /*
2322  * ocfs2_xattr_ibody_find()
2323  *
2324  * Find extended attribute in inode block and
2325  * fill search info into struct ocfs2_xattr_search.
2326  */
2327 static int ocfs2_xattr_ibody_find(struct inode *inode,
2328                                   int name_index,
2329                                   const char *name,
2330                                   struct ocfs2_xattr_search *xs)
2331 {
2332         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2333         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2334         int ret;
2335         int has_space = 0;
2336
2337         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2338                 return 0;
2339
2340         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2341                 down_read(&oi->ip_alloc_sem);
2342                 has_space = ocfs2_xattr_has_space_inline(inode, di);
2343                 up_read(&oi->ip_alloc_sem);
2344                 if (!has_space)
2345                         return 0;
2346         }
2347
2348         xs->xattr_bh = xs->inode_bh;
2349         xs->end = (void *)di + inode->i_sb->s_blocksize;
2350         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)
2351                 xs->header = (struct ocfs2_xattr_header *)
2352                         (xs->end - le16_to_cpu(di->i_xattr_inline_size));
2353         else
2354                 xs->header = (struct ocfs2_xattr_header *)
2355                         (xs->end - OCFS2_SB(inode->i_sb)->s_xattr_inline_size);
2356         xs->base = (void *)xs->header;
2357         xs->here = xs->header->xh_entries;
2358
2359         /* Find the named attribute. */
2360         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
2361                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2362                 if (ret && ret != -ENODATA)
2363                         return ret;
2364                 xs->not_found = ret;
2365         }
2366
2367         return 0;
2368 }
2369
2370 /*
2371  * ocfs2_xattr_ibody_set()
2372  *
2373  * Set, replace or remove an extended attribute into inode block.
2374  *
2375  */
2376 static int ocfs2_xattr_ibody_set(struct inode *inode,
2377                                  struct ocfs2_xattr_info *xi,
2378                                  struct ocfs2_xattr_search *xs,
2379                                  struct ocfs2_xattr_set_ctxt *ctxt)
2380 {
2381         struct ocfs2_inode_info *oi = OCFS2_I(inode);
2382         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2383         int ret;
2384
2385         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE)
2386                 return -ENOSPC;
2387
2388         down_write(&oi->ip_alloc_sem);
2389         if (!(oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL)) {
2390                 if (!ocfs2_xattr_has_space_inline(inode, di)) {
2391                         ret = -ENOSPC;
2392                         goto out;
2393                 }
2394         }
2395
2396         ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2397                                 (OCFS2_INLINE_XATTR_FL | OCFS2_HAS_XATTR_FL));
2398 out:
2399         up_write(&oi->ip_alloc_sem);
2400
2401         return ret;
2402 }
2403
2404 /*
2405  * ocfs2_xattr_block_find()
2406  *
2407  * Find extended attribute in external block and
2408  * fill search info into struct ocfs2_xattr_search.
2409  */
2410 static int ocfs2_xattr_block_find(struct inode *inode,
2411                                   int name_index,
2412                                   const char *name,
2413                                   struct ocfs2_xattr_search *xs)
2414 {
2415         struct ocfs2_dinode *di = (struct ocfs2_dinode *)xs->inode_bh->b_data;
2416         struct buffer_head *blk_bh = NULL;
2417         struct ocfs2_xattr_block *xb;
2418         int ret = 0;
2419
2420         if (!di->i_xattr_loc)
2421                 return ret;
2422
2423         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
2424                                      &blk_bh);
2425         if (ret < 0) {
2426                 mlog_errno(ret);
2427                 return ret;
2428         }
2429
2430         xs->xattr_bh = blk_bh;
2431         xb = (struct ocfs2_xattr_block *)blk_bh->b_data;
2432
2433         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
2434                 xs->header = &xb->xb_attrs.xb_header;
2435                 xs->base = (void *)xs->header;
2436                 xs->end = (void *)(blk_bh->b_data) + blk_bh->b_size;
2437                 xs->here = xs->header->xh_entries;
2438
2439                 ret = ocfs2_xattr_find_entry(name_index, name, xs);
2440         } else
2441                 ret = ocfs2_xattr_index_block_find(inode, blk_bh,
2442                                                    name_index,
2443                                                    name, xs);
2444
2445         if (ret && ret != -ENODATA) {
2446                 xs->xattr_bh = NULL;
2447                 goto cleanup;
2448         }
2449         xs->not_found = ret;
2450         return 0;
2451 cleanup:
2452         brelse(blk_bh);
2453
2454         return ret;
2455 }
2456
2457 static int ocfs2_create_xattr_block(handle_t *handle,
2458                                     struct inode *inode,
2459                                     struct buffer_head *inode_bh,
2460                                     struct ocfs2_alloc_context *meta_ac,
2461                                     struct buffer_head **ret_bh,
2462                                     int indexed)
2463 {
2464         int ret;
2465         u16 suballoc_bit_start;
2466         u32 num_got;
2467         u64 first_blkno;
2468         struct ocfs2_dinode *di =  (struct ocfs2_dinode *)inode_bh->b_data;
2469         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2470         struct buffer_head *new_bh = NULL;
2471         struct ocfs2_xattr_block *xblk;
2472
2473         ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), inode_bh,
2474                                       OCFS2_JOURNAL_ACCESS_CREATE);
2475         if (ret < 0) {
2476                 mlog_errno(ret);
2477                 goto end;
2478         }
2479
2480         ret = ocfs2_claim_metadata(osb, handle, meta_ac, 1,
2481                                    &suballoc_bit_start, &num_got,
2482                                    &first_blkno);
2483         if (ret < 0) {
2484                 mlog_errno(ret);
2485                 goto end;
2486         }
2487
2488         new_bh = sb_getblk(inode->i_sb, first_blkno);
2489         ocfs2_set_new_buffer_uptodate(INODE_CACHE(inode), new_bh);
2490
2491         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode),
2492                                       new_bh,
2493                                       OCFS2_JOURNAL_ACCESS_CREATE);
2494         if (ret < 0) {
2495                 mlog_errno(ret);
2496                 goto end;
2497         }
2498
2499         /* Initialize ocfs2_xattr_block */
2500         xblk = (struct ocfs2_xattr_block *)new_bh->b_data;
2501         memset(xblk, 0, inode->i_sb->s_blocksize);
2502         strcpy((void *)xblk, OCFS2_XATTR_BLOCK_SIGNATURE);
2503         xblk->xb_suballoc_slot = cpu_to_le16(meta_ac->ac_alloc_slot);
2504         xblk->xb_suballoc_bit = cpu_to_le16(suballoc_bit_start);
2505         xblk->xb_fs_generation = cpu_to_le32(osb->fs_generation);
2506         xblk->xb_blkno = cpu_to_le64(first_blkno);
2507
2508         if (indexed) {
2509                 struct ocfs2_xattr_tree_root *xr = &xblk->xb_attrs.xb_root;
2510                 xr->xt_clusters = cpu_to_le32(1);
2511                 xr->xt_last_eb_blk = 0;
2512                 xr->xt_list.l_tree_depth = 0;
2513                 xr->xt_list.l_count = cpu_to_le16(
2514                                         ocfs2_xattr_recs_per_xb(inode->i_sb));
2515                 xr->xt_list.l_next_free_rec = cpu_to_le16(1);
2516                 xblk->xb_flags = cpu_to_le16(OCFS2_XATTR_INDEXED);
2517         }
2518
2519         ret = ocfs2_journal_dirty(handle, new_bh);
2520         if (ret < 0) {
2521                 mlog_errno(ret);
2522                 goto end;
2523         }
2524         di->i_xattr_loc = cpu_to_le64(first_blkno);
2525         ocfs2_journal_dirty(handle, inode_bh);
2526
2527         *ret_bh = new_bh;
2528         new_bh = NULL;
2529
2530 end:
2531         brelse(new_bh);
2532         return ret;
2533 }
2534
2535 /*
2536  * ocfs2_xattr_block_set()
2537  *
2538  * Set, replace or remove an extended attribute into external block.
2539  *
2540  */
2541 static int ocfs2_xattr_block_set(struct inode *inode,
2542                                  struct ocfs2_xattr_info *xi,
2543                                  struct ocfs2_xattr_search *xs,
2544                                  struct ocfs2_xattr_set_ctxt *ctxt)
2545 {
2546         struct buffer_head *new_bh = NULL;
2547         handle_t *handle = ctxt->handle;
2548         struct ocfs2_xattr_block *xblk = NULL;
2549         int ret;
2550
2551         if (!xs->xattr_bh) {
2552                 ret = ocfs2_create_xattr_block(handle, inode, xs->inode_bh,
2553                                                ctxt->meta_ac, &new_bh, 0);
2554                 if (ret) {
2555                         mlog_errno(ret);
2556                         goto end;
2557                 }
2558
2559                 xs->xattr_bh = new_bh;
2560                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2561                 xs->header = &xblk->xb_attrs.xb_header;
2562                 xs->base = (void *)xs->header;
2563                 xs->end = (void *)xblk + inode->i_sb->s_blocksize;
2564                 xs->here = xs->header->xh_entries;
2565         } else
2566                 xblk = (struct ocfs2_xattr_block *)xs->xattr_bh->b_data;
2567
2568         if (!(le16_to_cpu(xblk->xb_flags) & OCFS2_XATTR_INDEXED)) {
2569                 /* Set extended attribute into external block */
2570                 ret = ocfs2_xattr_set_entry(inode, xi, xs, ctxt,
2571                                             OCFS2_HAS_XATTR_FL);
2572                 if (!ret || ret != -ENOSPC)
2573                         goto end;
2574
2575                 ret = ocfs2_xattr_create_index_block(inode, xs, ctxt);
2576                 if (ret)
2577                         goto end;
2578         }
2579
2580         ret = ocfs2_xattr_set_entry_index_block(inode, xi, xs, ctxt);
2581
2582 end:
2583
2584         return ret;
2585 }
2586
2587 /* Check whether the new xattr can be inserted into the inode. */
2588 static int ocfs2_xattr_can_be_in_inode(struct inode *inode,
2589                                        struct ocfs2_xattr_info *xi,
2590                                        struct ocfs2_xattr_search *xs)
2591 {
2592         u64 value_size;
2593         struct ocfs2_xattr_entry *last;
2594         int free, i;
2595         size_t min_offs = xs->end - xs->base;
2596
2597         if (!xs->header)
2598                 return 0;
2599
2600         last = xs->header->xh_entries;
2601
2602         for (i = 0; i < le16_to_cpu(xs->header->xh_count); i++) {
2603                 size_t offs = le16_to_cpu(last->xe_name_offset);
2604                 if (offs < min_offs)
2605                         min_offs = offs;
2606                 last += 1;
2607         }
2608
2609         free = min_offs - ((void *)last - xs->base) - OCFS2_XATTR_HEADER_GAP;
2610         if (free < 0)
2611                 return 0;
2612
2613         BUG_ON(!xs->not_found);
2614
2615         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2616                 value_size = OCFS2_XATTR_ROOT_SIZE;
2617         else
2618                 value_size = OCFS2_XATTR_SIZE(xi->xi_value_len);
2619
2620         if (free >= sizeof(struct ocfs2_xattr_entry) +
2621                    OCFS2_XATTR_SIZE(xi->xi_name_len) + value_size)
2622                 return 1;
2623
2624         return 0;
2625 }
2626
2627 static int ocfs2_calc_xattr_set_need(struct inode *inode,
2628                                      struct ocfs2_dinode *di,
2629                                      struct ocfs2_xattr_info *xi,
2630                                      struct ocfs2_xattr_search *xis,
2631                                      struct ocfs2_xattr_search *xbs,
2632                                      int *clusters_need,
2633                                      int *meta_need,
2634                                      int *credits_need)
2635 {
2636         int ret = 0, old_in_xb = 0;
2637         int clusters_add = 0, meta_add = 0, credits = 0;
2638         struct buffer_head *bh = NULL;
2639         struct ocfs2_xattr_block *xb = NULL;
2640         struct ocfs2_xattr_entry *xe = NULL;
2641         struct ocfs2_xattr_value_root *xv = NULL;
2642         char *base = NULL;
2643         int name_offset, name_len = 0;
2644         u32 new_clusters = ocfs2_clusters_for_bytes(inode->i_sb,
2645                                                     xi->xi_value_len);
2646         u64 value_size;
2647
2648         /*
2649          * Calculate the clusters we need to write.
2650          * No matter whether we replace an old one or add a new one,
2651          * we need this for writing.
2652          */
2653         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
2654                 credits += new_clusters *
2655                            ocfs2_clusters_to_blocks(inode->i_sb, 1);
2656
2657         if (xis->not_found && xbs->not_found) {
2658                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2659
2660                 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2661                         clusters_add += new_clusters;
2662                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2663                                                         &def_xv.xv.xr_list,
2664                                                         new_clusters);
2665                 }
2666
2667                 goto meta_guess;
2668         }
2669
2670         if (!xis->not_found) {
2671                 xe = xis->here;
2672                 name_offset = le16_to_cpu(xe->xe_name_offset);
2673                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2674                 base = xis->base;
2675                 credits += OCFS2_INODE_UPDATE_CREDITS;
2676         } else {
2677                 int i, block_off = 0;
2678                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2679                 xe = xbs->here;
2680                 name_offset = le16_to_cpu(xe->xe_name_offset);
2681                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
2682                 i = xbs->here - xbs->header->xh_entries;
2683                 old_in_xb = 1;
2684
2685                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2686                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
2687                                                         bucket_xh(xbs->bucket),
2688                                                         i, &block_off,
2689                                                         &name_offset);
2690                         base = bucket_block(xbs->bucket, block_off);
2691                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2692                 } else {
2693                         base = xbs->base;
2694                         credits += OCFS2_XATTR_BLOCK_UPDATE_CREDITS;
2695                 }
2696         }
2697
2698         /*
2699          * delete a xattr doesn't need metadata and cluster allocation.
2700          * so just calculate the credits and return.
2701          *
2702          * The credits for removing the value tree will be extended
2703          * by ocfs2_remove_extent itself.
2704          */
2705         if (!xi->xi_value) {
2706                 if (!ocfs2_xattr_is_local(xe))
2707                         credits += ocfs2_remove_extent_credits(inode->i_sb);
2708
2709                 goto out;
2710         }
2711
2712         /* do cluster allocation guess first. */
2713         value_size = le64_to_cpu(xe->xe_value_size);
2714
2715         if (old_in_xb) {
2716                 /*
2717                  * In xattr set, we always try to set the xe in inode first,
2718                  * so if it can be inserted into inode successfully, the old
2719                  * one will be removed from the xattr block, and this xattr
2720                  * will be inserted into inode as a new xattr in inode.
2721                  */
2722                 if (ocfs2_xattr_can_be_in_inode(inode, xi, xis)) {
2723                         clusters_add += new_clusters;
2724                         credits += ocfs2_remove_extent_credits(inode->i_sb) +
2725                                     OCFS2_INODE_UPDATE_CREDITS;
2726                         if (!ocfs2_xattr_is_local(xe))
2727                                 credits += ocfs2_calc_extend_credits(
2728                                                         inode->i_sb,
2729                                                         &def_xv.xv.xr_list,
2730                                                         new_clusters);
2731                         goto out;
2732                 }
2733         }
2734
2735         if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE) {
2736                 /* the new values will be stored outside. */
2737                 u32 old_clusters = 0;
2738
2739                 if (!ocfs2_xattr_is_local(xe)) {
2740                         old_clusters =  ocfs2_clusters_for_bytes(inode->i_sb,
2741                                                                  value_size);
2742                         xv = (struct ocfs2_xattr_value_root *)
2743                              (base + name_offset + name_len);
2744                         value_size = OCFS2_XATTR_ROOT_SIZE;
2745                 } else
2746                         xv = &def_xv.xv;
2747
2748                 if (old_clusters >= new_clusters) {
2749                         credits += ocfs2_remove_extent_credits(inode->i_sb);
2750                         goto out;
2751                 } else {
2752                         meta_add += ocfs2_extend_meta_needed(&xv->xr_list);
2753                         clusters_add += new_clusters - old_clusters;
2754                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2755                                                              &xv->xr_list,
2756                                                              new_clusters -
2757                                                              old_clusters);
2758                         if (value_size >= OCFS2_XATTR_ROOT_SIZE)
2759                                 goto out;
2760                 }
2761         } else {
2762                 /*
2763                  * Now the new value will be stored inside. So if the new
2764                  * value is smaller than the size of value root or the old
2765                  * value, we don't need any allocation, otherwise we have
2766                  * to guess metadata allocation.
2767                  */
2768                 if ((ocfs2_xattr_is_local(xe) &&
2769                      (value_size >= xi->xi_value_len)) ||
2770                     (!ocfs2_xattr_is_local(xe) &&
2771                      OCFS2_XATTR_ROOT_SIZE >= xi->xi_value_len))
2772                         goto out;
2773         }
2774
2775 meta_guess:
2776         /* calculate metadata allocation. */
2777         if (di->i_xattr_loc) {
2778                 if (!xbs->xattr_bh) {
2779                         ret = ocfs2_read_xattr_block(inode,
2780                                                      le64_to_cpu(di->i_xattr_loc),
2781                                                      &bh);
2782                         if (ret) {
2783                                 mlog_errno(ret);
2784                                 goto out;
2785                         }
2786
2787                         xb = (struct ocfs2_xattr_block *)bh->b_data;
2788                 } else
2789                         xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
2790
2791                 /*
2792                  * If there is already an xattr tree, good, we can calculate
2793                  * like other b-trees. Otherwise we may have the chance of
2794                  * create a tree, the credit calculation is borrowed from
2795                  * ocfs2_calc_extend_credits with root_el = NULL. And the
2796                  * new tree will be cluster based, so no meta is needed.
2797                  */
2798                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
2799                         struct ocfs2_extent_list *el =
2800                                  &xb->xb_attrs.xb_root.xt_list;
2801                         meta_add += ocfs2_extend_meta_needed(el);
2802                         credits += ocfs2_calc_extend_credits(inode->i_sb,
2803                                                              el, 1);
2804                 } else
2805                         credits += OCFS2_SUBALLOC_ALLOC + 1;
2806
2807                 /*
2808                  * This cluster will be used either for new bucket or for
2809                  * new xattr block.
2810                  * If the cluster size is the same as the bucket size, one
2811                  * more is needed since we may need to extend the bucket
2812                  * also.
2813                  */
2814                 clusters_add += 1;
2815                 credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2816                 if (OCFS2_XATTR_BUCKET_SIZE ==
2817                         OCFS2_SB(inode->i_sb)->s_clustersize) {
2818                         credits += ocfs2_blocks_per_xattr_bucket(inode->i_sb);
2819                         clusters_add += 1;
2820                 }
2821         } else {
2822                 meta_add += 1;
2823                 credits += OCFS2_XATTR_BLOCK_CREATE_CREDITS;
2824         }
2825 out:
2826         if (clusters_need)
2827                 *clusters_need = clusters_add;
2828         if (meta_need)
2829                 *meta_need = meta_add;
2830         if (credits_need)
2831                 *credits_need = credits;
2832         brelse(bh);
2833         return ret;
2834 }
2835
2836 static int ocfs2_init_xattr_set_ctxt(struct inode *inode,
2837                                      struct ocfs2_dinode *di,
2838                                      struct ocfs2_xattr_info *xi,
2839                                      struct ocfs2_xattr_search *xis,
2840                                      struct ocfs2_xattr_search *xbs,
2841                                      struct ocfs2_xattr_set_ctxt *ctxt,
2842                                      int extra_meta,
2843                                      int *credits)
2844 {
2845         int clusters_add, meta_add, ret;
2846         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
2847
2848         memset(ctxt, 0, sizeof(struct ocfs2_xattr_set_ctxt));
2849
2850         ocfs2_init_dealloc_ctxt(&ctxt->dealloc);
2851
2852         ret = ocfs2_calc_xattr_set_need(inode, di, xi, xis, xbs,
2853                                         &clusters_add, &meta_add, credits);
2854         if (ret) {
2855                 mlog_errno(ret);
2856                 return ret;
2857         }
2858
2859         meta_add += extra_meta;
2860         mlog(0, "Set xattr %s, reserve meta blocks = %d, clusters = %d, "
2861              "credits = %d\n", xi->xi_name, meta_add, clusters_add, *credits);
2862
2863         if (meta_add) {
2864                 ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add,
2865                                                         &ctxt->meta_ac);
2866                 if (ret) {
2867                         mlog_errno(ret);
2868                         goto out;
2869                 }
2870         }
2871
2872         if (clusters_add) {
2873                 ret = ocfs2_reserve_clusters(osb, clusters_add, &ctxt->data_ac);
2874                 if (ret)
2875                         mlog_errno(ret);
2876         }
2877 out:
2878         if (ret) {
2879                 if (ctxt->meta_ac) {
2880                         ocfs2_free_alloc_context(ctxt->meta_ac);
2881                         ctxt->meta_ac = NULL;
2882                 }
2883
2884                 /*
2885                  * We cannot have an error and a non null ctxt->data_ac.
2886                  */
2887         }
2888
2889         return ret;
2890 }
2891
2892 static int __ocfs2_xattr_set_handle(struct inode *inode,
2893                                     struct ocfs2_dinode *di,
2894                                     struct ocfs2_xattr_info *xi,
2895                                     struct ocfs2_xattr_search *xis,
2896                                     struct ocfs2_xattr_search *xbs,
2897                                     struct ocfs2_xattr_set_ctxt *ctxt)
2898 {
2899         int ret = 0, credits, old_found;
2900
2901         if (!xi->xi_value) {
2902                 /* Remove existing extended attribute */
2903                 if (!xis->not_found)
2904                         ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2905                 else if (!xbs->not_found)
2906                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2907         } else {
2908                 /* We always try to set extended attribute into inode first*/
2909                 ret = ocfs2_xattr_ibody_set(inode, xi, xis, ctxt);
2910                 if (!ret && !xbs->not_found) {
2911                         /*
2912                          * If succeed and that extended attribute existing in
2913                          * external block, then we will remove it.
2914                          */
2915                         xi->xi_value = NULL;
2916                         xi->xi_value_len = 0;
2917
2918                         old_found = xis->not_found;
2919                         xis->not_found = -ENODATA;
2920                         ret = ocfs2_calc_xattr_set_need(inode,
2921                                                         di,
2922                                                         xi,
2923                                                         xis,
2924                                                         xbs,
2925                                                         NULL,
2926                                                         NULL,
2927                                                         &credits);
2928                         xis->not_found = old_found;
2929                         if (ret) {
2930                                 mlog_errno(ret);
2931                                 goto out;
2932                         }
2933
2934                         ret = ocfs2_extend_trans(ctxt->handle, credits +
2935                                         ctxt->handle->h_buffer_credits);
2936                         if (ret) {
2937                                 mlog_errno(ret);
2938                                 goto out;
2939                         }
2940                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2941                 } else if (ret == -ENOSPC) {
2942                         if (di->i_xattr_loc && !xbs->xattr_bh) {
2943                                 ret = ocfs2_xattr_block_find(inode,
2944                                                              xi->xi_name_index,
2945                                                              xi->xi_name, xbs);
2946                                 if (ret)
2947                                         goto out;
2948
2949                                 old_found = xis->not_found;
2950                                 xis->not_found = -ENODATA;
2951                                 ret = ocfs2_calc_xattr_set_need(inode,
2952                                                                 di,
2953                                                                 xi,
2954                                                                 xis,
2955                                                                 xbs,
2956                                                                 NULL,
2957                                                                 NULL,
2958                                                                 &credits);
2959                                 xis->not_found = old_found;
2960                                 if (ret) {
2961                                         mlog_errno(ret);
2962                                         goto out;
2963                                 }
2964
2965                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
2966                                         ctxt->handle->h_buffer_credits);
2967                                 if (ret) {
2968                                         mlog_errno(ret);
2969                                         goto out;
2970                                 }
2971                         }
2972                         /*
2973                          * If no space in inode, we will set extended attribute
2974                          * into external block.
2975                          */
2976                         ret = ocfs2_xattr_block_set(inode, xi, xbs, ctxt);
2977                         if (ret)
2978                                 goto out;
2979                         if (!xis->not_found) {
2980                                 /*
2981                                  * If succeed and that extended attribute
2982                                  * existing in inode, we will remove it.
2983                                  */
2984                                 xi->xi_value = NULL;
2985                                 xi->xi_value_len = 0;
2986                                 xbs->not_found = -ENODATA;
2987                                 ret = ocfs2_calc_xattr_set_need(inode,
2988                                                                 di,
2989                                                                 xi,
2990                                                                 xis,
2991                                                                 xbs,
2992                                                                 NULL,
2993                                                                 NULL,
2994                                                                 &credits);
2995                                 if (ret) {
2996                                         mlog_errno(ret);
2997                                         goto out;
2998                                 }
2999
3000                                 ret = ocfs2_extend_trans(ctxt->handle, credits +
3001                                                 ctxt->handle->h_buffer_credits);
3002                                 if (ret) {
3003                                         mlog_errno(ret);
3004                                         goto out;
3005                                 }
3006                                 ret = ocfs2_xattr_ibody_set(inode, xi,
3007                                                             xis, ctxt);
3008                         }
3009                 }
3010         }
3011
3012         if (!ret) {
3013                 /* Update inode ctime. */
3014                 ret = ocfs2_journal_access_di(ctxt->handle, INODE_CACHE(inode),
3015                                               xis->inode_bh,
3016                                               OCFS2_JOURNAL_ACCESS_WRITE);
3017                 if (ret) {
3018                         mlog_errno(ret);
3019                         goto out;
3020                 }
3021
3022                 inode->i_ctime = CURRENT_TIME;
3023                 di->i_ctime = cpu_to_le64(inode->i_ctime.tv_sec);
3024                 di->i_ctime_nsec = cpu_to_le32(inode->i_ctime.tv_nsec);
3025                 ocfs2_journal_dirty(ctxt->handle, xis->inode_bh);
3026         }
3027 out:
3028         return ret;
3029 }
3030
3031 /*
3032  * This function only called duing creating inode
3033  * for init security/acl xattrs of the new inode.
3034  * All transanction credits have been reserved in mknod.
3035  */
3036 int ocfs2_xattr_set_handle(handle_t *handle,
3037                            struct inode *inode,
3038                            struct buffer_head *di_bh,
3039                            int name_index,
3040                            const char *name,
3041                            const void *value,
3042                            size_t value_len,
3043                            int flags,
3044                            struct ocfs2_alloc_context *meta_ac,
3045                            struct ocfs2_alloc_context *data_ac)
3046 {
3047         struct ocfs2_dinode *di;
3048         int ret;
3049
3050         struct ocfs2_xattr_info xi = {
3051                 .xi_name_index = name_index,
3052                 .xi_name = name,
3053                 .xi_name_len = strlen(name),
3054                 .xi_value = value,
3055                 .xi_value_len = value_len,
3056         };
3057
3058         struct ocfs2_xattr_search xis = {
3059                 .not_found = -ENODATA,
3060         };
3061
3062         struct ocfs2_xattr_search xbs = {
3063                 .not_found = -ENODATA,
3064         };
3065
3066         struct ocfs2_xattr_set_ctxt ctxt = {
3067                 .handle = handle,
3068                 .meta_ac = meta_ac,
3069                 .data_ac = data_ac,
3070         };
3071
3072         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3073                 return -EOPNOTSUPP;
3074
3075         /*
3076          * In extreme situation, may need xattr bucket when
3077          * block size is too small. And we have already reserved
3078          * the credits for bucket in mknod.
3079          */
3080         if (inode->i_sb->s_blocksize == OCFS2_MIN_BLOCKSIZE) {
3081                 xbs.bucket = ocfs2_xattr_bucket_new(inode);
3082                 if (!xbs.bucket) {
3083                         mlog_errno(-ENOMEM);
3084                         return -ENOMEM;
3085                 }
3086         }
3087
3088         xis.inode_bh = xbs.inode_bh = di_bh;
3089         di = (struct ocfs2_dinode *)di_bh->b_data;
3090
3091         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3092
3093         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3094         if (ret)
3095                 goto cleanup;
3096         if (xis.not_found) {
3097                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3098                 if (ret)
3099                         goto cleanup;
3100         }
3101
3102         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3103
3104 cleanup:
3105         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3106         brelse(xbs.xattr_bh);
3107         ocfs2_xattr_bucket_free(xbs.bucket);
3108
3109         return ret;
3110 }
3111
3112 /*
3113  * ocfs2_xattr_set()
3114  *
3115  * Set, replace or remove an extended attribute for this inode.
3116  * value is NULL to remove an existing extended attribute, else either
3117  * create or replace an extended attribute.
3118  */
3119 int ocfs2_xattr_set(struct inode *inode,
3120                     int name_index,
3121                     const char *name,
3122                     const void *value,
3123                     size_t value_len,
3124                     int flags)
3125 {
3126         struct buffer_head *di_bh = NULL;
3127         struct ocfs2_dinode *di;
3128         int ret, credits, ref_meta = 0, ref_credits = 0;
3129         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3130         struct inode *tl_inode = osb->osb_tl_inode;
3131         struct ocfs2_xattr_set_ctxt ctxt = { NULL, NULL, };
3132         struct ocfs2_refcount_tree *ref_tree = NULL;
3133
3134         struct ocfs2_xattr_info xi = {
3135                 .xi_name_index = name_index,
3136                 .xi_name = name,
3137                 .xi_name_len = strlen(name),
3138                 .xi_value = value,
3139                 .xi_value_len = value_len,
3140         };
3141
3142         struct ocfs2_xattr_search xis = {
3143                 .not_found = -ENODATA,
3144         };
3145
3146         struct ocfs2_xattr_search xbs = {
3147                 .not_found = -ENODATA,
3148         };
3149
3150         if (!ocfs2_supports_xattr(OCFS2_SB(inode->i_sb)))
3151                 return -EOPNOTSUPP;
3152
3153         /*
3154          * Only xbs will be used on indexed trees.  xis doesn't need a
3155          * bucket.
3156          */
3157         xbs.bucket = ocfs2_xattr_bucket_new(inode);
3158         if (!xbs.bucket) {
3159                 mlog_errno(-ENOMEM);
3160                 return -ENOMEM;
3161         }
3162
3163         ret = ocfs2_inode_lock(inode, &di_bh, 1);
3164         if (ret < 0) {
3165                 mlog_errno(ret);
3166                 goto cleanup_nolock;
3167         }
3168         xis.inode_bh = xbs.inode_bh = di_bh;
3169         di = (struct ocfs2_dinode *)di_bh->b_data;
3170
3171         down_write(&OCFS2_I(inode)->ip_xattr_sem);
3172         /*
3173          * Scan inode and external block to find the same name
3174          * extended attribute and collect search infomation.
3175          */
3176         ret = ocfs2_xattr_ibody_find(inode, name_index, name, &xis);
3177         if (ret)
3178                 goto cleanup;
3179         if (xis.not_found) {
3180                 ret = ocfs2_xattr_block_find(inode, name_index, name, &xbs);
3181                 if (ret)
3182                         goto cleanup;
3183         }
3184
3185         if (xis.not_found && xbs.not_found) {
3186                 ret = -ENODATA;
3187                 if (flags & XATTR_REPLACE)
3188                         goto cleanup;
3189                 ret = 0;
3190                 if (!value)
3191                         goto cleanup;
3192         } else {
3193                 ret = -EEXIST;
3194                 if (flags & XATTR_CREATE)
3195                         goto cleanup;
3196         }
3197
3198         /* Check whether the value is refcounted and do some prepartion. */
3199         if (OCFS2_I(inode)->ip_dyn_features & OCFS2_HAS_REFCOUNT_FL &&
3200             (!xis.not_found || !xbs.not_found)) {
3201                 ret = ocfs2_prepare_refcount_xattr(inode, di, &xi,
3202                                                    &xis, &xbs, &ref_tree,
3203                                                    &ref_meta, &ref_credits);
3204                 if (ret) {
3205                         mlog_errno(ret);
3206                         goto cleanup;
3207                 }
3208         }
3209
3210         mutex_lock(&tl_inode->i_mutex);
3211
3212         if (ocfs2_truncate_log_needs_flush(osb)) {
3213                 ret = __ocfs2_flush_truncate_log(osb);
3214                 if (ret < 0) {
3215                         mutex_unlock(&tl_inode->i_mutex);
3216                         mlog_errno(ret);
3217                         goto cleanup;
3218                 }
3219         }
3220         mutex_unlock(&tl_inode->i_mutex);
3221
3222         ret = ocfs2_init_xattr_set_ctxt(inode, di, &xi, &xis,
3223                                         &xbs, &ctxt, ref_meta, &credits);
3224         if (ret) {
3225                 mlog_errno(ret);
3226                 goto cleanup;
3227         }
3228
3229         /* we need to update inode's ctime field, so add credit for it. */
3230         credits += OCFS2_INODE_UPDATE_CREDITS;
3231         ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
3232         if (IS_ERR(ctxt.handle)) {
3233                 ret = PTR_ERR(ctxt.handle);
3234                 mlog_errno(ret);
3235                 goto cleanup;
3236         }
3237
3238         ret = __ocfs2_xattr_set_handle(inode, di, &xi, &xis, &xbs, &ctxt);
3239
3240         ocfs2_commit_trans(osb, ctxt.handle);
3241
3242         if (ctxt.data_ac)
3243                 ocfs2_free_alloc_context(ctxt.data_ac);
3244         if (ctxt.meta_ac)
3245                 ocfs2_free_alloc_context(ctxt.meta_ac);
3246         if (ocfs2_dealloc_has_cluster(&ctxt.dealloc))
3247                 ocfs2_schedule_truncate_log_flush(osb, 1);
3248         ocfs2_run_deallocs(osb, &ctxt.dealloc);
3249
3250 cleanup:
3251         if (ref_tree)
3252                 ocfs2_unlock_refcount_tree(osb, ref_tree, 1);
3253         up_write(&OCFS2_I(inode)->ip_xattr_sem);
3254         if (!value && !ret) {
3255                 ret = ocfs2_try_remove_refcount_tree(inode, di_bh);
3256                 if (ret)
3257                         mlog_errno(ret);
3258         }
3259         ocfs2_inode_unlock(inode, 1);
3260 cleanup_nolock:
3261         brelse(di_bh);
3262         brelse(xbs.xattr_bh);
3263         ocfs2_xattr_bucket_free(xbs.bucket);
3264
3265         return ret;
3266 }
3267
3268 /*
3269  * Find the xattr extent rec which may contains name_hash.
3270  * e_cpos will be the first name hash of the xattr rec.
3271  * el must be the ocfs2_xattr_header.xb_attrs.xb_root.xt_list.
3272  */
3273 static int ocfs2_xattr_get_rec(struct inode *inode,
3274                                u32 name_hash,
3275                                u64 *p_blkno,
3276                                u32 *e_cpos,
3277                                u32 *num_clusters,
3278                                struct ocfs2_extent_list *el)
3279 {
3280         int ret = 0, i;
3281         struct buffer_head *eb_bh = NULL;
3282         struct ocfs2_extent_block *eb;
3283         struct ocfs2_extent_rec *rec = NULL;
3284         u64 e_blkno = 0;
3285
3286         if (el->l_tree_depth) {
3287                 ret = ocfs2_find_leaf(INODE_CACHE(inode), el, name_hash,
3288                                       &eb_bh);
3289                 if (ret) {
3290                         mlog_errno(ret);
3291                         goto out;
3292                 }
3293
3294                 eb = (struct ocfs2_extent_block *) eb_bh->b_data;
3295                 el = &eb->h_list;
3296
3297                 if (el->l_tree_depth) {
3298                         ocfs2_error(inode->i_sb,
3299                                     "Inode %lu has non zero tree depth in "
3300                                     "xattr tree block %llu\n", inode->i_ino,
3301                                     (unsigned long long)eb_bh->b_blocknr);
3302                         ret = -EROFS;
3303                         goto out;
3304                 }
3305         }
3306
3307         for (i = le16_to_cpu(el->l_next_free_rec) - 1; i >= 0; i--) {
3308                 rec = &el->l_recs[i];
3309
3310                 if (le32_to_cpu(rec->e_cpos) <= name_hash) {
3311                         e_blkno = le64_to_cpu(rec->e_blkno);
3312                         break;
3313                 }
3314         }
3315
3316         if (!e_blkno) {
3317                 ocfs2_error(inode->i_sb, "Inode %lu has bad extent "
3318                             "record (%u, %u, 0) in xattr", inode->i_ino,
3319                             le32_to_cpu(rec->e_cpos),
3320                             ocfs2_rec_clusters(el, rec));
3321                 ret = -EROFS;
3322                 goto out;
3323         }
3324
3325         *p_blkno = le64_to_cpu(rec->e_blkno);
3326         *num_clusters = le16_to_cpu(rec->e_leaf_clusters);
3327         if (e_cpos)
3328                 *e_cpos = le32_to_cpu(rec->e_cpos);
3329 out:
3330         brelse(eb_bh);
3331         return ret;
3332 }
3333
3334 typedef int (xattr_bucket_func)(struct inode *inode,
3335                                 struct ocfs2_xattr_bucket *bucket,
3336                                 void *para);
3337
3338 static int ocfs2_find_xe_in_bucket(struct inode *inode,
3339                                    struct ocfs2_xattr_bucket *bucket,
3340                                    int name_index,
3341                                    const char *name,
3342                                    u32 name_hash,
3343                                    u16 *xe_index,
3344                                    int *found)
3345 {
3346         int i, ret = 0, cmp = 1, block_off, new_offset;
3347         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3348         size_t name_len = strlen(name);
3349         struct ocfs2_xattr_entry *xe = NULL;
3350         char *xe_name;
3351
3352         /*
3353          * We don't use binary search in the bucket because there
3354          * may be multiple entries with the same name hash.
3355          */
3356         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
3357                 xe = &xh->xh_entries[i];
3358
3359                 if (name_hash > le32_to_cpu(xe->xe_name_hash))
3360                         continue;
3361                 else if (name_hash < le32_to_cpu(xe->xe_name_hash))
3362                         break;
3363
3364                 cmp = name_index - ocfs2_xattr_get_type(xe);
3365                 if (!cmp)
3366                         cmp = name_len - xe->xe_name_len;
3367                 if (cmp)
3368                         continue;
3369
3370                 ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3371                                                         xh,
3372                                                         i,
3373                                                         &block_off,
3374                                                         &new_offset);
3375                 if (ret) {
3376                         mlog_errno(ret);
3377                         break;
3378                 }
3379
3380
3381                 xe_name = bucket_block(bucket, block_off) + new_offset;
3382                 if (!memcmp(name, xe_name, name_len)) {
3383                         *xe_index = i;
3384                         *found = 1;
3385                         ret = 0;
3386                         break;
3387                 }
3388         }
3389
3390         return ret;
3391 }
3392
3393 /*
3394  * Find the specified xattr entry in a series of buckets.
3395  * This series start from p_blkno and last for num_clusters.
3396  * The ocfs2_xattr_header.xh_num_buckets of the first bucket contains
3397  * the num of the valid buckets.
3398  *
3399  * Return the buffer_head this xattr should reside in. And if the xattr's
3400  * hash is in the gap of 2 buckets, return the lower bucket.
3401  */
3402 static int ocfs2_xattr_bucket_find(struct inode *inode,
3403                                    int name_index,
3404                                    const char *name,
3405                                    u32 name_hash,
3406                                    u64 p_blkno,
3407                                    u32 first_hash,
3408                                    u32 num_clusters,
3409                                    struct ocfs2_xattr_search *xs)
3410 {
3411         int ret, found = 0;
3412         struct ocfs2_xattr_header *xh = NULL;
3413         struct ocfs2_xattr_entry *xe = NULL;
3414         u16 index = 0;
3415         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3416         int low_bucket = 0, bucket, high_bucket;
3417         struct ocfs2_xattr_bucket *search;
3418         u32 last_hash;
3419         u64 blkno, lower_blkno = 0;
3420
3421         search = ocfs2_xattr_bucket_new(inode);
3422         if (!search) {
3423                 ret = -ENOMEM;
3424                 mlog_errno(ret);
3425                 goto out;
3426         }
3427
3428         ret = ocfs2_read_xattr_bucket(search, p_blkno);
3429         if (ret) {
3430                 mlog_errno(ret);
3431                 goto out;
3432         }
3433
3434         xh = bucket_xh(search);
3435         high_bucket = le16_to_cpu(xh->xh_num_buckets) - 1;
3436         while (low_bucket <= high_bucket) {
3437                 ocfs2_xattr_bucket_relse(search);
3438
3439                 bucket = (low_bucket + high_bucket) / 2;
3440                 blkno = p_blkno + bucket * blk_per_bucket;
3441                 ret = ocfs2_read_xattr_bucket(search, blkno);
3442                 if (ret) {
3443                         mlog_errno(ret);
3444                         goto out;
3445                 }
3446
3447                 xh = bucket_xh(search);
3448                 xe = &xh->xh_entries[0];
3449                 if (name_hash < le32_to_cpu(xe->xe_name_hash)) {
3450                         high_bucket = bucket - 1;
3451                         continue;
3452                 }
3453
3454                 /*
3455                  * Check whether the hash of the last entry in our
3456                  * bucket is larger than the search one. for an empty
3457                  * bucket, the last one is also the first one.
3458                  */
3459                 if (xh->xh_count)
3460                         xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3461
3462                 last_hash = le32_to_cpu(xe->xe_name_hash);
3463
3464                 /* record lower_blkno which may be the insert place. */
3465                 lower_blkno = blkno;
3466
3467                 if (name_hash > le32_to_cpu(xe->xe_name_hash)) {
3468                         low_bucket = bucket + 1;
3469                         continue;
3470                 }
3471
3472                 /* the searched xattr should reside in this bucket if exists. */
3473                 ret = ocfs2_find_xe_in_bucket(inode, search,
3474                                               name_index, name, name_hash,
3475                                               &index, &found);
3476                 if (ret) {
3477                         mlog_errno(ret);
3478                         goto out;
3479                 }
3480                 break;
3481         }
3482
3483         /*
3484          * Record the bucket we have found.
3485          * When the xattr's hash value is in the gap of 2 buckets, we will
3486          * always set it to the previous bucket.
3487          */
3488         if (!lower_blkno)
3489                 lower_blkno = p_blkno;
3490
3491         /* This should be in cache - we just read it during the search */
3492         ret = ocfs2_read_xattr_bucket(xs->bucket, lower_blkno);
3493         if (ret) {
3494                 mlog_errno(ret);
3495                 goto out;
3496         }
3497
3498         xs->header = bucket_xh(xs->bucket);
3499         xs->base = bucket_block(xs->bucket, 0);
3500         xs->end = xs->base + inode->i_sb->s_blocksize;
3501
3502         if (found) {
3503                 xs->here = &xs->header->xh_entries[index];
3504                 mlog(0, "find xattr %s in bucket %llu, entry = %u\n", name,
3505                      (unsigned long long)bucket_blkno(xs->bucket), index);
3506         } else
3507                 ret = -ENODATA;
3508
3509 out:
3510         ocfs2_xattr_bucket_free(search);
3511         return ret;
3512 }
3513
3514 static int ocfs2_xattr_index_block_find(struct inode *inode,
3515                                         struct buffer_head *root_bh,
3516                                         int name_index,
3517                                         const char *name,
3518                                         struct ocfs2_xattr_search *xs)
3519 {
3520         int ret;
3521         struct ocfs2_xattr_block *xb =
3522                         (struct ocfs2_xattr_block *)root_bh->b_data;
3523         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
3524         struct ocfs2_extent_list *el = &xb_root->xt_list;
3525         u64 p_blkno = 0;
3526         u32 first_hash, num_clusters = 0;
3527         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
3528
3529         if (le16_to_cpu(el->l_next_free_rec) == 0)
3530                 return -ENODATA;
3531
3532         mlog(0, "find xattr %s, hash = %u, index = %d in xattr tree\n",
3533              name, name_hash, name_index);
3534
3535         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &first_hash,
3536                                   &num_clusters, el);
3537         if (ret) {
3538                 mlog_errno(ret);
3539                 goto out;
3540         }
3541
3542         BUG_ON(p_blkno == 0 || num_clusters == 0 || first_hash > name_hash);
3543
3544         mlog(0, "find xattr extent rec %u clusters from %llu, the first hash "
3545              "in the rec is %u\n", num_clusters, (unsigned long long)p_blkno,
3546              first_hash);
3547
3548         ret = ocfs2_xattr_bucket_find(inode, name_index, name, name_hash,
3549                                       p_blkno, first_hash, num_clusters, xs);
3550
3551 out:
3552         return ret;
3553 }
3554
3555 static int ocfs2_iterate_xattr_buckets(struct inode *inode,
3556                                        u64 blkno,
3557                                        u32 clusters,
3558                                        xattr_bucket_func *func,
3559                                        void *para)
3560 {
3561         int i, ret = 0;
3562         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb));
3563         u32 num_buckets = clusters * bpc;
3564         struct ocfs2_xattr_bucket *bucket;
3565
3566         bucket = ocfs2_xattr_bucket_new(inode);
3567         if (!bucket) {
3568                 mlog_errno(-ENOMEM);
3569                 return -ENOMEM;
3570         }
3571
3572         mlog(0, "iterating xattr buckets in %u clusters starting from %llu\n",
3573              clusters, (unsigned long long)blkno);
3574
3575         for (i = 0; i < num_buckets; i++, blkno += bucket->bu_blocks) {
3576                 ret = ocfs2_read_xattr_bucket(bucket, blkno);
3577                 if (ret) {
3578                         mlog_errno(ret);
3579                         break;
3580                 }
3581
3582                 /*
3583                  * The real bucket num in this series of blocks is stored
3584                  * in the 1st bucket.
3585                  */
3586                 if (i == 0)
3587                         num_buckets = le16_to_cpu(bucket_xh(bucket)->xh_num_buckets);
3588
3589                 mlog(0, "iterating xattr bucket %llu, first hash %u\n",
3590                      (unsigned long long)blkno,
3591                      le32_to_cpu(bucket_xh(bucket)->xh_entries[0].xe_name_hash));
3592                 if (func) {
3593                         ret = func(inode, bucket, para);
3594                         if (ret && ret != -ERANGE)
3595                                 mlog_errno(ret);
3596                         /* Fall through to bucket_relse() */
3597                 }
3598
3599                 ocfs2_xattr_bucket_relse(bucket);
3600                 if (ret)
3601                         break;
3602         }
3603
3604         ocfs2_xattr_bucket_free(bucket);
3605         return ret;
3606 }
3607
3608 struct ocfs2_xattr_tree_list {
3609         char *buffer;
3610         size_t buffer_size;
3611         size_t result;
3612 };
3613
3614 static int ocfs2_xattr_bucket_get_name_value(struct super_block *sb,
3615                                              struct ocfs2_xattr_header *xh,
3616                                              int index,
3617                                              int *block_off,
3618                                              int *new_offset)
3619 {
3620         u16 name_offset;
3621
3622         if (index < 0 || index >= le16_to_cpu(xh->xh_count))
3623                 return -EINVAL;
3624
3625         name_offset = le16_to_cpu(xh->xh_entries[index].xe_name_offset);
3626
3627         *block_off = name_offset >> sb->s_blocksize_bits;
3628         *new_offset = name_offset % sb->s_blocksize;
3629
3630         return 0;
3631 }
3632
3633 static int ocfs2_list_xattr_bucket(struct inode *inode,
3634                                    struct ocfs2_xattr_bucket *bucket,
3635                                    void *para)
3636 {
3637         int ret = 0, type;
3638         struct ocfs2_xattr_tree_list *xl = (struct ocfs2_xattr_tree_list *)para;
3639         int i, block_off, new_offset;
3640         const char *prefix, *name;
3641
3642         for (i = 0 ; i < le16_to_cpu(bucket_xh(bucket)->xh_count); i++) {
3643                 struct ocfs2_xattr_entry *entry = &bucket_xh(bucket)->xh_entries[i];
3644                 type = ocfs2_xattr_get_type(entry);
3645                 prefix = ocfs2_xattr_prefix(type);
3646
3647                 if (prefix) {
3648                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
3649                                                                 bucket_xh(bucket),
3650                                                                 i,
3651                                                                 &block_off,
3652                                                                 &new_offset);
3653                         if (ret)
3654                                 break;
3655
3656                         name = (const char *)bucket_block(bucket, block_off) +
3657                                 new_offset;
3658                         ret = ocfs2_xattr_list_entry(xl->buffer,
3659                                                      xl->buffer_size,
3660                                                      &xl->result,
3661                                                      prefix, name,
3662                                                      entry->xe_name_len);
3663                         if (ret)
3664                                 break;
3665                 }
3666         }
3667
3668         return ret;
3669 }
3670
3671 static int ocfs2_iterate_xattr_index_block(struct inode *inode,
3672                                            struct buffer_head *blk_bh,
3673                                            xattr_tree_rec_func *rec_func,
3674                                            void *para)
3675 {
3676         struct ocfs2_xattr_block *xb =
3677                         (struct ocfs2_xattr_block *)blk_bh->b_data;
3678         struct ocfs2_extent_list *el = &xb->xb_attrs.xb_root.xt_list;
3679         int ret = 0;
3680         u32 name_hash = UINT_MAX, e_cpos = 0, num_clusters = 0;
3681         u64 p_blkno = 0;
3682
3683         if (!el->l_next_free_rec || !rec_func)
3684                 return 0;
3685
3686         while (name_hash > 0) {
3687                 ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno,
3688                                           &e_cpos, &num_clusters, el);
3689                 if (ret) {
3690                         mlog_errno(ret);
3691                         break;
3692                 }
3693
3694                 ret = rec_func(inode, blk_bh, p_blkno, e_cpos,
3695                                num_clusters, para);
3696                 if (ret) {
3697                         if (ret != -ERANGE)
3698                                 mlog_errno(ret);
3699                         break;
3700                 }
3701
3702                 if (e_cpos == 0)
3703                         break;
3704
3705                 name_hash = e_cpos - 1;
3706         }
3707
3708         return ret;
3709
3710 }
3711
3712 static int ocfs2_list_xattr_tree_rec(struct inode *inode,
3713                                      struct buffer_head *root_bh,
3714                                      u64 blkno, u32 cpos, u32 len, void *para)
3715 {
3716         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
3717                                            ocfs2_list_xattr_bucket, para);
3718 }
3719
3720 static int ocfs2_xattr_tree_list_index_block(struct inode *inode,
3721                                              struct buffer_head *blk_bh,
3722                                              char *buffer,
3723                                              size_t buffer_size)
3724 {
3725         int ret;
3726         struct ocfs2_xattr_tree_list xl = {
3727                 .buffer = buffer,
3728                 .buffer_size = buffer_size,
3729                 .result = 0,
3730         };
3731
3732         ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
3733                                               ocfs2_list_xattr_tree_rec, &xl);
3734         if (ret) {
3735                 mlog_errno(ret);
3736                 goto out;
3737         }
3738
3739         ret = xl.result;
3740 out:
3741         return ret;
3742 }
3743
3744 static int cmp_xe(const void *a, const void *b)
3745 {
3746         const struct ocfs2_xattr_entry *l = a, *r = b;
3747         u32 l_hash = le32_to_cpu(l->xe_name_hash);
3748         u32 r_hash = le32_to_cpu(r->xe_name_hash);
3749
3750         if (l_hash > r_hash)
3751                 return 1;
3752         if (l_hash < r_hash)
3753                 return -1;
3754         return 0;
3755 }
3756
3757 static void swap_xe(void *a, void *b, int size)
3758 {
3759         struct ocfs2_xattr_entry *l = a, *r = b, tmp;
3760
3761         tmp = *l;
3762         memcpy(l, r, sizeof(struct ocfs2_xattr_entry));
3763         memcpy(r, &tmp, sizeof(struct ocfs2_xattr_entry));
3764 }
3765
3766 /*
3767  * When the ocfs2_xattr_block is filled up, new bucket will be created
3768  * and all the xattr entries will be moved to the new bucket.
3769  * The header goes at the start of the bucket, and the names+values are
3770  * filled from the end.  This is why *target starts as the last buffer.
3771  * Note: we need to sort the entries since they are not saved in order
3772  * in the ocfs2_xattr_block.
3773  */
3774 static void ocfs2_cp_xattr_block_to_bucket(struct inode *inode,
3775                                            struct buffer_head *xb_bh,
3776                                            struct ocfs2_xattr_bucket *bucket)
3777 {
3778         int i, blocksize = inode->i_sb->s_blocksize;
3779         int blks = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3780         u16 offset, size, off_change;
3781         struct ocfs2_xattr_entry *xe;
3782         struct ocfs2_xattr_block *xb =
3783                                 (struct ocfs2_xattr_block *)xb_bh->b_data;
3784         struct ocfs2_xattr_header *xb_xh = &xb->xb_attrs.xb_header;
3785         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
3786         u16 count = le16_to_cpu(xb_xh->xh_count);
3787         char *src = xb_bh->b_data;
3788         char *target = bucket_block(bucket, blks - 1);
3789
3790         mlog(0, "cp xattr from block %llu to bucket %llu\n",
3791              (unsigned long long)xb_bh->b_blocknr,
3792              (unsigned long long)bucket_blkno(bucket));
3793
3794         for (i = 0; i < blks; i++)
3795                 memset(bucket_block(bucket, i), 0, blocksize);
3796
3797         /*
3798          * Since the xe_name_offset is based on ocfs2_xattr_header,
3799          * there is a offset change corresponding to the change of
3800          * ocfs2_xattr_header's position.
3801          */
3802         off_change = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3803         xe = &xb_xh->xh_entries[count - 1];
3804         offset = le16_to_cpu(xe->xe_name_offset) + off_change;
3805         size = blocksize - offset;
3806
3807         /* copy all the names and values. */
3808         memcpy(target + offset, src + offset, size);
3809
3810         /* Init new header now. */
3811         xh->xh_count = xb_xh->xh_count;
3812         xh->xh_num_buckets = cpu_to_le16(1);
3813         xh->xh_name_value_len = cpu_to_le16(size);
3814         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE - size);
3815
3816         /* copy all the entries. */
3817         target = bucket_block(bucket, 0);
3818         offset = offsetof(struct ocfs2_xattr_header, xh_entries);
3819         size = count * sizeof(struct ocfs2_xattr_entry);
3820         memcpy(target + offset, (char *)xb_xh + offset, size);
3821
3822         /* Change the xe offset for all the xe because of the move. */
3823         off_change = OCFS2_XATTR_BUCKET_SIZE - blocksize +
3824                  offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
3825         for (i = 0; i < count; i++)
3826                 le16_add_cpu(&xh->xh_entries[i].xe_name_offset, off_change);
3827
3828         mlog(0, "copy entry: start = %u, size = %u, offset_change = %u\n",
3829              offset, size, off_change);
3830
3831         sort(target + offset, count, sizeof(struct ocfs2_xattr_entry),
3832              cmp_xe, swap_xe);
3833 }
3834
3835 /*
3836  * After we move xattr from block to index btree, we have to
3837  * update ocfs2_xattr_search to the new xe and base.
3838  *
3839  * When the entry is in xattr block, xattr_bh indicates the storage place.
3840  * While if the entry is in index b-tree, "bucket" indicates the
3841  * real place of the xattr.
3842  */
3843 static void ocfs2_xattr_update_xattr_search(struct inode *inode,
3844                                             struct ocfs2_xattr_search *xs,
3845                                             struct buffer_head *old_bh)
3846 {
3847         char *buf = old_bh->b_data;
3848         struct ocfs2_xattr_block *old_xb = (struct ocfs2_xattr_block *)buf;
3849         struct ocfs2_xattr_header *old_xh = &old_xb->xb_attrs.xb_header;
3850         int i;
3851
3852         xs->header = bucket_xh(xs->bucket);
3853         xs->base = bucket_block(xs->bucket, 0);
3854         xs->end = xs->base + inode->i_sb->s_blocksize;
3855
3856         if (xs->not_found)
3857                 return;
3858
3859         i = xs->here - old_xh->xh_entries;
3860         xs->here = &xs->header->xh_entries[i];
3861 }
3862
3863 static int ocfs2_xattr_create_index_block(struct inode *inode,
3864                                           struct ocfs2_xattr_search *xs,
3865                                           struct ocfs2_xattr_set_ctxt *ctxt)
3866 {
3867         int ret;
3868         u32 bit_off, len;
3869         u64 blkno;
3870         handle_t *handle = ctxt->handle;
3871         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
3872         struct ocfs2_inode_info *oi = OCFS2_I(inode);
3873         struct buffer_head *xb_bh = xs->xattr_bh;
3874         struct ocfs2_xattr_block *xb =
3875                         (struct ocfs2_xattr_block *)xb_bh->b_data;
3876         struct ocfs2_xattr_tree_root *xr;
3877         u16 xb_flags = le16_to_cpu(xb->xb_flags);
3878
3879         mlog(0, "create xattr index block for %llu\n",
3880              (unsigned long long)xb_bh->b_blocknr);
3881
3882         BUG_ON(xb_flags & OCFS2_XATTR_INDEXED);
3883         BUG_ON(!xs->bucket);
3884
3885         /*
3886          * XXX:
3887          * We can use this lock for now, and maybe move to a dedicated mutex
3888          * if performance becomes a problem later.
3889          */
3890         down_write(&oi->ip_alloc_sem);
3891
3892         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), xb_bh,
3893                                       OCFS2_JOURNAL_ACCESS_WRITE);
3894         if (ret) {
3895                 mlog_errno(ret);
3896                 goto out;
3897         }
3898
3899         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac,
3900                                      1, 1, &bit_off, &len);
3901         if (ret) {
3902                 mlog_errno(ret);
3903                 goto out;
3904         }
3905
3906         /*
3907          * The bucket may spread in many blocks, and
3908          * we will only touch the 1st block and the last block
3909          * in the whole bucket(one for entry and one for data).
3910          */
3911         blkno = ocfs2_clusters_to_blocks(inode->i_sb, bit_off);
3912
3913         mlog(0, "allocate 1 cluster from %llu to xattr block\n",
3914              (unsigned long long)blkno);
3915
3916         ret = ocfs2_init_xattr_bucket(xs->bucket, blkno);
3917         if (ret) {
3918                 mlog_errno(ret);
3919                 goto out;
3920         }
3921
3922         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
3923                                                 OCFS2_JOURNAL_ACCESS_CREATE);
3924         if (ret) {
3925                 mlog_errno(ret);
3926                 goto out;
3927         }
3928
3929         ocfs2_cp_xattr_block_to_bucket(inode, xb_bh, xs->bucket);
3930         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
3931
3932         ocfs2_xattr_update_xattr_search(inode, xs, xb_bh);
3933
3934         /* Change from ocfs2_xattr_header to ocfs2_xattr_tree_root */
3935         memset(&xb->xb_attrs, 0, inode->i_sb->s_blocksize -
3936                offsetof(struct ocfs2_xattr_block, xb_attrs));
3937
3938         xr = &xb->xb_attrs.xb_root;
3939         xr->xt_clusters = cpu_to_le32(1);
3940         xr->xt_last_eb_blk = 0;
3941         xr->xt_list.l_tree_depth = 0;
3942         xr->xt_list.l_count = cpu_to_le16(ocfs2_xattr_recs_per_xb(inode->i_sb));
3943         xr->xt_list.l_next_free_rec = cpu_to_le16(1);
3944
3945         xr->xt_list.l_recs[0].e_cpos = 0;
3946         xr->xt_list.l_recs[0].e_blkno = cpu_to_le64(blkno);
3947         xr->xt_list.l_recs[0].e_leaf_clusters = cpu_to_le16(1);
3948
3949         xb->xb_flags = cpu_to_le16(xb_flags | OCFS2_XATTR_INDEXED);
3950
3951         ocfs2_journal_dirty(handle, xb_bh);
3952
3953 out:
3954         up_write(&oi->ip_alloc_sem);
3955
3956         return ret;
3957 }
3958
3959 static int cmp_xe_offset(const void *a, const void *b)
3960 {
3961         const struct ocfs2_xattr_entry *l = a, *r = b;
3962         u32 l_name_offset = le16_to_cpu(l->xe_name_offset);
3963         u32 r_name_offset = le16_to_cpu(r->xe_name_offset);
3964
3965         if (l_name_offset < r_name_offset)
3966                 return 1;
3967         if (l_name_offset > r_name_offset)
3968                 return -1;
3969         return 0;
3970 }
3971
3972 /*
3973  * defrag a xattr bucket if we find that the bucket has some
3974  * holes beteen name/value pairs.
3975  * We will move all the name/value pairs to the end of the bucket
3976  * so that we can spare some space for insertion.
3977  */
3978 static int ocfs2_defrag_xattr_bucket(struct inode *inode,
3979                                      handle_t *handle,
3980                                      struct ocfs2_xattr_bucket *bucket)
3981 {
3982         int ret, i;
3983         size_t end, offset, len, value_len;
3984         struct ocfs2_xattr_header *xh;
3985         char *entries, *buf, *bucket_buf = NULL;
3986         u64 blkno = bucket_blkno(bucket);
3987         u16 xh_free_start;
3988         size_t blocksize = inode->i_sb->s_blocksize;
3989         struct ocfs2_xattr_entry *xe;
3990
3991         /*
3992          * In order to make the operation more efficient and generic,
3993          * we copy all the blocks into a contiguous memory and do the
3994          * defragment there, so if anything is error, we will not touch
3995          * the real block.
3996          */
3997         bucket_buf = kmalloc(OCFS2_XATTR_BUCKET_SIZE, GFP_NOFS);
3998         if (!bucket_buf) {
3999                 ret = -EIO;
4000                 goto out;
4001         }
4002
4003         buf = bucket_buf;
4004         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4005                 memcpy(buf, bucket_block(bucket, i), blocksize);
4006
4007         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
4008                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4009         if (ret < 0) {
4010                 mlog_errno(ret);
4011                 goto out;
4012         }
4013
4014         xh = (struct ocfs2_xattr_header *)bucket_buf;
4015         entries = (char *)xh->xh_entries;
4016         xh_free_start = le16_to_cpu(xh->xh_free_start);
4017
4018         mlog(0, "adjust xattr bucket in %llu, count = %u, "
4019              "xh_free_start = %u, xh_name_value_len = %u.\n",
4020              (unsigned long long)blkno, le16_to_cpu(xh->xh_count),
4021              xh_free_start, le16_to_cpu(xh->xh_name_value_len));
4022
4023         /*
4024          * sort all the entries by their offset.
4025          * the largest will be the first, so that we can
4026          * move them to the end one by one.
4027          */
4028         sort(entries, le16_to_cpu(xh->xh_count),
4029              sizeof(struct ocfs2_xattr_entry),
4030              cmp_xe_offset, swap_xe);
4031
4032         /* Move all name/values to the end of the bucket. */
4033         xe = xh->xh_entries;
4034         end = OCFS2_XATTR_BUCKET_SIZE;
4035         for (i = 0; i < le16_to_cpu(xh->xh_count); i++, xe++) {
4036                 offset = le16_to_cpu(xe->xe_name_offset);
4037                 if (ocfs2_xattr_is_local(xe))
4038                         value_len = OCFS2_XATTR_SIZE(
4039                                         le64_to_cpu(xe->xe_value_size));
4040                 else
4041                         value_len = OCFS2_XATTR_ROOT_SIZE;
4042                 len = OCFS2_XATTR_SIZE(xe->xe_name_len) + value_len;
4043
4044                 /*
4045                  * We must make sure that the name/value pair
4046                  * exist in the same block. So adjust end to
4047                  * the previous block end if needed.
4048                  */
4049                 if (((end - len) / blocksize !=
4050                         (end - 1) / blocksize))
4051                         end = end - end % blocksize;
4052
4053                 if (end > offset + len) {
4054                         memmove(bucket_buf + end - len,
4055                                 bucket_buf + offset, len);
4056                         xe->xe_name_offset = cpu_to_le16(end - len);
4057                 }
4058
4059                 mlog_bug_on_msg(end < offset + len, "Defrag check failed for "
4060                                 "bucket %llu\n", (unsigned long long)blkno);
4061
4062                 end -= len;
4063         }
4064
4065         mlog_bug_on_msg(xh_free_start > end, "Defrag check failed for "
4066                         "bucket %llu\n", (unsigned long long)blkno);
4067
4068         if (xh_free_start == end)
4069                 goto out;
4070
4071         memset(bucket_buf + xh_free_start, 0, end - xh_free_start);
4072         xh->xh_free_start = cpu_to_le16(end);
4073
4074         /* sort the entries by their name_hash. */
4075         sort(entries, le16_to_cpu(xh->xh_count),
4076              sizeof(struct ocfs2_xattr_entry),
4077              cmp_xe, swap_xe);
4078
4079         buf = bucket_buf;
4080         for (i = 0; i < bucket->bu_blocks; i++, buf += blocksize)
4081                 memcpy(bucket_block(bucket, i), buf, blocksize);
4082         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
4083
4084 out:
4085         kfree(bucket_buf);
4086         return ret;
4087 }
4088
4089 /*
4090  * prev_blkno points to the start of an existing extent.  new_blkno
4091  * points to a newly allocated extent.  Because we know each of our
4092  * clusters contains more than bucket, we can easily split one cluster
4093  * at a bucket boundary.  So we take the last cluster of the existing
4094  * extent and split it down the middle.  We move the last half of the
4095  * buckets in the last cluster of the existing extent over to the new
4096  * extent.
4097  *
4098  * first_bh is the buffer at prev_blkno so we can update the existing
4099  * extent's bucket count.  header_bh is the bucket were we were hoping
4100  * to insert our xattr.  If the bucket move places the target in the new
4101  * extent, we'll update first_bh and header_bh after modifying the old
4102  * extent.
4103  *
4104  * first_hash will be set as the 1st xe's name_hash in the new extent.
4105  */
4106 static int ocfs2_mv_xattr_bucket_cross_cluster(struct inode *inode,
4107                                                handle_t *handle,
4108                                                struct ocfs2_xattr_bucket *first,
4109                                                struct ocfs2_xattr_bucket *target,
4110                                                u64 new_blkno,
4111                                                u32 num_clusters,
4112                                                u32 *first_hash)
4113 {
4114         int ret;
4115         struct super_block *sb = inode->i_sb;
4116         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(sb);
4117         int num_buckets = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
4118         int to_move = num_buckets / 2;
4119         u64 src_blkno;
4120         u64 last_cluster_blkno = bucket_blkno(first) +
4121                 ((num_clusters - 1) * ocfs2_clusters_to_blocks(sb, 1));
4122
4123         BUG_ON(le16_to_cpu(bucket_xh(first)->xh_num_buckets) < num_buckets);
4124         BUG_ON(OCFS2_XATTR_BUCKET_SIZE == OCFS2_SB(sb)->s_clustersize);
4125
4126         mlog(0, "move half of xattrs in cluster %llu to %llu\n",
4127              (unsigned long long)last_cluster_blkno, (unsigned long long)new_blkno);
4128
4129         ret = ocfs2_mv_xattr_buckets(inode, handle, bucket_blkno(first),
4130                                      last_cluster_blkno, new_blkno,
4131                                      to_move, first_hash);
4132         if (ret) {
4133                 mlog_errno(ret);
4134                 goto out;
4135         }
4136
4137         /* This is the first bucket that got moved */
4138         src_blkno = last_cluster_blkno + (to_move * blks_per_bucket);
4139
4140         /*
4141          * If the target bucket was part of the moved buckets, we need to
4142          * update first and target.
4143          */
4144         if (bucket_blkno(target) >= src_blkno) {
4145                 /* Find the block for the new target bucket */
4146                 src_blkno = new_blkno +
4147                         (bucket_blkno(target) - src_blkno);
4148
4149                 ocfs2_xattr_bucket_relse(first);
4150                 ocfs2_xattr_bucket_relse(target);
4151
4152                 /*
4153                  * These shouldn't fail - the buffers are in the
4154                  * journal from ocfs2_cp_xattr_bucket().
4155                  */
4156                 ret = ocfs2_read_xattr_bucket(first, new_blkno);
4157                 if (ret) {
4158                         mlog_errno(ret);
4159                         goto out;
4160                 }
4161                 ret = ocfs2_read_xattr_bucket(target, src_blkno);
4162                 if (ret)
4163                         mlog_errno(ret);
4164
4165         }
4166
4167 out:
4168         return ret;
4169 }
4170
4171 /*
4172  * Find the suitable pos when we divide a bucket into 2.
4173  * We have to make sure the xattrs with the same hash value exist
4174  * in the same bucket.
4175  *
4176  * If this ocfs2_xattr_header covers more than one hash value, find a
4177  * place where the hash value changes.  Try to find the most even split.
4178  * The most common case is that all entries have different hash values,
4179  * and the first check we make will find a place to split.
4180  */
4181 static int ocfs2_xattr_find_divide_pos(struct ocfs2_xattr_header *xh)
4182 {
4183         struct ocfs2_xattr_entry *entries = xh->xh_entries;
4184         int count = le16_to_cpu(xh->xh_count);
4185         int delta, middle = count / 2;
4186
4187         /*
4188          * We start at the middle.  Each step gets farther away in both
4189          * directions.  We therefore hit the change in hash value
4190          * nearest to the middle.  Note that this loop does not execute for
4191          * count < 2.
4192          */
4193         for (delta = 0; delta < middle; delta++) {
4194                 /* Let's check delta earlier than middle */
4195                 if (cmp_xe(&entries[middle - delta - 1],
4196                            &entries[middle - delta]))
4197                         return middle - delta;
4198
4199                 /* For even counts, don't walk off the end */
4200                 if ((middle + delta + 1) == count)
4201                         continue;
4202
4203                 /* Now try delta past middle */
4204                 if (cmp_xe(&entries[middle + delta],
4205                            &entries[middle + delta + 1]))
4206                         return middle + delta + 1;
4207         }
4208
4209         /* Every entry had the same hash */
4210         return count;
4211 }
4212
4213 /*
4214  * Move some xattrs in old bucket(blk) to new bucket(new_blk).
4215  * first_hash will record the 1st hash of the new bucket.
4216  *
4217  * Normally half of the xattrs will be moved.  But we have to make
4218  * sure that the xattrs with the same hash value are stored in the
4219  * same bucket. If all the xattrs in this bucket have the same hash
4220  * value, the new bucket will be initialized as an empty one and the
4221  * first_hash will be initialized as (hash_value+1).
4222  */
4223 static int ocfs2_divide_xattr_bucket(struct inode *inode,
4224                                     handle_t *handle,
4225                                     u64 blk,
4226                                     u64 new_blk,
4227                                     u32 *first_hash,
4228                                     int new_bucket_head)
4229 {
4230         int ret, i;
4231         int count, start, len, name_value_len = 0, xe_len, name_offset = 0;
4232         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4233         struct ocfs2_xattr_header *xh;
4234         struct ocfs2_xattr_entry *xe;
4235         int blocksize = inode->i_sb->s_blocksize;
4236
4237         mlog(0, "move some of xattrs from bucket %llu to %llu\n",
4238              (unsigned long long)blk, (unsigned long long)new_blk);
4239
4240         s_bucket = ocfs2_xattr_bucket_new(inode);
4241         t_bucket = ocfs2_xattr_bucket_new(inode);
4242         if (!s_bucket || !t_bucket) {
4243                 ret = -ENOMEM;
4244                 mlog_errno(ret);
4245                 goto out;
4246         }
4247
4248         ret = ocfs2_read_xattr_bucket(s_bucket, blk);
4249         if (ret) {
4250                 mlog_errno(ret);
4251                 goto out;
4252         }
4253
4254         ret = ocfs2_xattr_bucket_journal_access(handle, s_bucket,
4255                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4256         if (ret) {
4257                 mlog_errno(ret);
4258                 goto out;
4259         }
4260
4261         /*
4262          * Even if !new_bucket_head, we're overwriting t_bucket.  Thus,
4263          * there's no need to read it.
4264          */
4265         ret = ocfs2_init_xattr_bucket(t_bucket, new_blk);
4266         if (ret) {
4267                 mlog_errno(ret);
4268                 goto out;
4269         }
4270
4271         /*
4272          * Hey, if we're overwriting t_bucket, what difference does
4273          * ACCESS_CREATE vs ACCESS_WRITE make?  See the comment in the
4274          * same part of ocfs2_cp_xattr_bucket().
4275          */
4276         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4277                                                 new_bucket_head ?
4278                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4279                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4280         if (ret) {
4281                 mlog_errno(ret);
4282                 goto out;
4283         }
4284
4285         xh = bucket_xh(s_bucket);
4286         count = le16_to_cpu(xh->xh_count);
4287         start = ocfs2_xattr_find_divide_pos(xh);
4288
4289         if (start == count) {
4290                 xe = &xh->xh_entries[start-1];
4291
4292                 /*
4293                  * initialized a new empty bucket here.
4294                  * The hash value is set as one larger than
4295                  * that of the last entry in the previous bucket.
4296                  */
4297                 for (i = 0; i < t_bucket->bu_blocks; i++)
4298                         memset(bucket_block(t_bucket, i), 0, blocksize);
4299
4300                 xh = bucket_xh(t_bucket);
4301                 xh->xh_free_start = cpu_to_le16(blocksize);
4302                 xh->xh_entries[0].xe_name_hash = xe->xe_name_hash;
4303                 le32_add_cpu(&xh->xh_entries[0].xe_name_hash, 1);
4304
4305                 goto set_num_buckets;
4306         }
4307
4308         /* copy the whole bucket to the new first. */
4309         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4310
4311         /* update the new bucket. */
4312         xh = bucket_xh(t_bucket);
4313
4314         /*
4315          * Calculate the total name/value len and xh_free_start for
4316          * the old bucket first.
4317          */
4318         name_offset = OCFS2_XATTR_BUCKET_SIZE;
4319         name_value_len = 0;
4320         for (i = 0; i < start; i++) {
4321                 xe = &xh->xh_entries[i];
4322                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
4323                 if (ocfs2_xattr_is_local(xe))
4324                         xe_len +=
4325                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4326                 else
4327                         xe_len += OCFS2_XATTR_ROOT_SIZE;
4328                 name_value_len += xe_len;
4329                 if (le16_to_cpu(xe->xe_name_offset) < name_offset)
4330                         name_offset = le16_to_cpu(xe->xe_name_offset);
4331         }
4332
4333         /*
4334          * Now begin the modification to the new bucket.
4335          *
4336          * In the new bucket, We just move the xattr entry to the beginning
4337          * and don't touch the name/value. So there will be some holes in the
4338          * bucket, and they will be removed when ocfs2_defrag_xattr_bucket is
4339          * called.
4340          */
4341         xe = &xh->xh_entries[start];
4342         len = sizeof(struct ocfs2_xattr_entry) * (count - start);
4343         mlog(0, "mv xattr entry len %d from %d to %d\n", len,
4344              (int)((char *)xe - (char *)xh),
4345              (int)((char *)xh->xh_entries - (char *)xh));
4346         memmove((char *)xh->xh_entries, (char *)xe, len);
4347         xe = &xh->xh_entries[count - start];
4348         len = sizeof(struct ocfs2_xattr_entry) * start;
4349         memset((char *)xe, 0, len);
4350
4351         le16_add_cpu(&xh->xh_count, -start);
4352         le16_add_cpu(&xh->xh_name_value_len, -name_value_len);
4353
4354         /* Calculate xh_free_start for the new bucket. */
4355         xh->xh_free_start = cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
4356         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
4357                 xe = &xh->xh_entries[i];
4358                 xe_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
4359                 if (ocfs2_xattr_is_local(xe))
4360                         xe_len +=
4361                            OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
4362                 else
4363                         xe_len += OCFS2_XATTR_ROOT_SIZE;
4364                 if (le16_to_cpu(xe->xe_name_offset) <
4365                     le16_to_cpu(xh->xh_free_start))
4366                         xh->xh_free_start = xe->xe_name_offset;
4367         }
4368
4369 set_num_buckets:
4370         /* set xh->xh_num_buckets for the new xh. */
4371         if (new_bucket_head)
4372                 xh->xh_num_buckets = cpu_to_le16(1);
4373         else
4374                 xh->xh_num_buckets = 0;
4375
4376         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4377
4378         /* store the first_hash of the new bucket. */
4379         if (first_hash)
4380                 *first_hash = le32_to_cpu(xh->xh_entries[0].xe_name_hash);
4381
4382         /*
4383          * Now only update the 1st block of the old bucket.  If we
4384          * just added a new empty bucket, there is no need to modify
4385          * it.
4386          */
4387         if (start == count)
4388                 goto out;
4389
4390         xh = bucket_xh(s_bucket);
4391         memset(&xh->xh_entries[start], 0,
4392                sizeof(struct ocfs2_xattr_entry) * (count - start));
4393         xh->xh_count = cpu_to_le16(start);
4394         xh->xh_free_start = cpu_to_le16(name_offset);
4395         xh->xh_name_value_len = cpu_to_le16(name_value_len);
4396
4397         ocfs2_xattr_bucket_journal_dirty(handle, s_bucket);
4398
4399 out:
4400         ocfs2_xattr_bucket_free(s_bucket);
4401         ocfs2_xattr_bucket_free(t_bucket);
4402
4403         return ret;
4404 }
4405
4406 /*
4407  * Copy xattr from one bucket to another bucket.
4408  *
4409  * The caller must make sure that the journal transaction
4410  * has enough space for journaling.
4411  */
4412 static int ocfs2_cp_xattr_bucket(struct inode *inode,
4413                                  handle_t *handle,
4414                                  u64 s_blkno,
4415                                  u64 t_blkno,
4416                                  int t_is_new)
4417 {
4418         int ret;
4419         struct ocfs2_xattr_bucket *s_bucket = NULL, *t_bucket = NULL;
4420
4421         BUG_ON(s_blkno == t_blkno);
4422
4423         mlog(0, "cp bucket %llu to %llu, target is %d\n",
4424              (unsigned long long)s_blkno, (unsigned long long)t_blkno,
4425              t_is_new);
4426
4427         s_bucket = ocfs2_xattr_bucket_new(inode);
4428         t_bucket = ocfs2_xattr_bucket_new(inode);
4429         if (!s_bucket || !t_bucket) {
4430                 ret = -ENOMEM;
4431                 mlog_errno(ret);
4432                 goto out;
4433         }
4434
4435         ret = ocfs2_read_xattr_bucket(s_bucket, s_blkno);
4436         if (ret)
4437                 goto out;
4438
4439         /*
4440          * Even if !t_is_new, we're overwriting t_bucket.  Thus,
4441          * there's no need to read it.
4442          */
4443         ret = ocfs2_init_xattr_bucket(t_bucket, t_blkno);
4444         if (ret)
4445                 goto out;
4446
4447         /*
4448          * Hey, if we're overwriting t_bucket, what difference does
4449          * ACCESS_CREATE vs ACCESS_WRITE make?  Well, if we allocated a new
4450          * cluster to fill, we came here from
4451          * ocfs2_mv_xattr_buckets(), and it is really new -
4452          * ACCESS_CREATE is required.  But we also might have moved data
4453          * out of t_bucket before extending back into it.
4454          * ocfs2_add_new_xattr_bucket() can do this - its call to
4455          * ocfs2_add_new_xattr_cluster() may have created a new extent
4456          * and copied out the end of the old extent.  Then it re-extends
4457          * the old extent back to create space for new xattrs.  That's
4458          * how we get here, and the bucket isn't really new.
4459          */
4460         ret = ocfs2_xattr_bucket_journal_access(handle, t_bucket,
4461                                                 t_is_new ?
4462                                                 OCFS2_JOURNAL_ACCESS_CREATE :
4463                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4464         if (ret)
4465                 goto out;
4466
4467         ocfs2_xattr_bucket_copy_data(t_bucket, s_bucket);
4468         ocfs2_xattr_bucket_journal_dirty(handle, t_bucket);
4469
4470 out:
4471         ocfs2_xattr_bucket_free(t_bucket);
4472         ocfs2_xattr_bucket_free(s_bucket);
4473
4474         return ret;
4475 }
4476
4477 /*
4478  * src_blk points to the start of an existing extent.  last_blk points to
4479  * last cluster in that extent.  to_blk points to a newly allocated
4480  * extent.  We copy the buckets from the cluster at last_blk to the new
4481  * extent.  If start_bucket is non-zero, we skip that many buckets before
4482  * we start copying.  The new extent's xh_num_buckets gets set to the
4483  * number of buckets we copied.  The old extent's xh_num_buckets shrinks
4484  * by the same amount.
4485  */
4486 static int ocfs2_mv_xattr_buckets(struct inode *inode, handle_t *handle,
4487                                   u64 src_blk, u64 last_blk, u64 to_blk,
4488                                   unsigned int start_bucket,
4489                                   u32 *first_hash)
4490 {
4491         int i, ret, credits;
4492         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4493         int blks_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4494         int num_buckets = ocfs2_xattr_buckets_per_cluster(osb);
4495         struct ocfs2_xattr_bucket *old_first, *new_first;
4496
4497         mlog(0, "mv xattrs from cluster %llu to %llu\n",
4498              (unsigned long long)last_blk, (unsigned long long)to_blk);
4499
4500         BUG_ON(start_bucket >= num_buckets);
4501         if (start_bucket) {
4502                 num_buckets -= start_bucket;
4503                 last_blk += (start_bucket * blks_per_bucket);
4504         }
4505
4506         /* The first bucket of the original extent */
4507         old_first = ocfs2_xattr_bucket_new(inode);
4508         /* The first bucket of the new extent */
4509         new_first = ocfs2_xattr_bucket_new(inode);
4510         if (!old_first || !new_first) {
4511                 ret = -ENOMEM;
4512                 mlog_errno(ret);
4513                 goto out;
4514         }
4515
4516         ret = ocfs2_read_xattr_bucket(old_first, src_blk);
4517         if (ret) {
4518                 mlog_errno(ret);
4519                 goto out;
4520         }
4521
4522         /*
4523          * We need to update the first bucket of the old extent and all
4524          * the buckets going to the new extent.
4525          */
4526         credits = ((num_buckets + 1) * blks_per_bucket) +
4527                 handle->h_buffer_credits;
4528         ret = ocfs2_extend_trans(handle, credits);
4529         if (ret) {
4530                 mlog_errno(ret);
4531                 goto out;
4532         }
4533
4534         ret = ocfs2_xattr_bucket_journal_access(handle, old_first,
4535                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4536         if (ret) {
4537                 mlog_errno(ret);
4538                 goto out;
4539         }
4540
4541         for (i = 0; i < num_buckets; i++) {
4542                 ret = ocfs2_cp_xattr_bucket(inode, handle,
4543                                             last_blk + (i * blks_per_bucket),
4544                                             to_blk + (i * blks_per_bucket),
4545                                             1);
4546                 if (ret) {
4547                         mlog_errno(ret);
4548                         goto out;
4549                 }
4550         }
4551
4552         /*
4553          * Get the new bucket ready before we dirty anything
4554          * (This actually shouldn't fail, because we already dirtied
4555          * it once in ocfs2_cp_xattr_bucket()).
4556          */
4557         ret = ocfs2_read_xattr_bucket(new_first, to_blk);
4558         if (ret) {
4559                 mlog_errno(ret);
4560                 goto out;
4561         }
4562         ret = ocfs2_xattr_bucket_journal_access(handle, new_first,
4563                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4564         if (ret) {
4565                 mlog_errno(ret);
4566                 goto out;
4567         }
4568
4569         /* Now update the headers */
4570         le16_add_cpu(&bucket_xh(old_first)->xh_num_buckets, -num_buckets);
4571         ocfs2_xattr_bucket_journal_dirty(handle, old_first);
4572
4573         bucket_xh(new_first)->xh_num_buckets = cpu_to_le16(num_buckets);
4574         ocfs2_xattr_bucket_journal_dirty(handle, new_first);
4575
4576         if (first_hash)
4577                 *first_hash = le32_to_cpu(bucket_xh(new_first)->xh_entries[0].xe_name_hash);
4578
4579 out:
4580         ocfs2_xattr_bucket_free(new_first);
4581         ocfs2_xattr_bucket_free(old_first);
4582         return ret;
4583 }
4584
4585 /*
4586  * Move some xattrs in this cluster to the new cluster.
4587  * This function should only be called when bucket size == cluster size.
4588  * Otherwise ocfs2_mv_xattr_bucket_cross_cluster should be used instead.
4589  */
4590 static int ocfs2_divide_xattr_cluster(struct inode *inode,
4591                                       handle_t *handle,
4592                                       u64 prev_blk,
4593                                       u64 new_blk,
4594                                       u32 *first_hash)
4595 {
4596         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4597         int ret, credits = 2 * blk_per_bucket + handle->h_buffer_credits;
4598
4599         BUG_ON(OCFS2_XATTR_BUCKET_SIZE < OCFS2_SB(inode->i_sb)->s_clustersize);
4600
4601         ret = ocfs2_extend_trans(handle, credits);
4602         if (ret) {
4603                 mlog_errno(ret);
4604                 return ret;
4605         }
4606
4607         /* Move half of the xattr in start_blk to the next bucket. */
4608         return  ocfs2_divide_xattr_bucket(inode, handle, prev_blk,
4609                                           new_blk, first_hash, 1);
4610 }
4611
4612 /*
4613  * Move some xattrs from the old cluster to the new one since they are not
4614  * contiguous in ocfs2 xattr tree.
4615  *
4616  * new_blk starts a new separate cluster, and we will move some xattrs from
4617  * prev_blk to it. v_start will be set as the first name hash value in this
4618  * new cluster so that it can be used as e_cpos during tree insertion and
4619  * don't collide with our original b-tree operations. first_bh and header_bh
4620  * will also be updated since they will be used in ocfs2_extend_xattr_bucket
4621  * to extend the insert bucket.
4622  *
4623  * The problem is how much xattr should we move to the new one and when should
4624  * we update first_bh and header_bh?
4625  * 1. If cluster size > bucket size, that means the previous cluster has more
4626  *    than 1 bucket, so just move half nums of bucket into the new cluster and
4627  *    update the first_bh and header_bh if the insert bucket has been moved
4628  *    to the new cluster.
4629  * 2. If cluster_size == bucket_size:
4630  *    a) If the previous extent rec has more than one cluster and the insert
4631  *       place isn't in the last cluster, copy the entire last cluster to the
4632  *       new one. This time, we don't need to upate the first_bh and header_bh
4633  *       since they will not be moved into the new cluster.
4634  *    b) Otherwise, move the bottom half of the xattrs in the last cluster into
4635  *       the new one. And we set the extend flag to zero if the insert place is
4636  *       moved into the new allocated cluster since no extend is needed.
4637  */
4638 static int ocfs2_adjust_xattr_cross_cluster(struct inode *inode,
4639                                             handle_t *handle,
4640                                             struct ocfs2_xattr_bucket *first,
4641                                             struct ocfs2_xattr_bucket *target,
4642                                             u64 new_blk,
4643                                             u32 prev_clusters,
4644                                             u32 *v_start,
4645                                             int *extend)
4646 {
4647         int ret;
4648
4649         mlog(0, "adjust xattrs from cluster %llu len %u to %llu\n",
4650              (unsigned long long)bucket_blkno(first), prev_clusters,
4651              (unsigned long long)new_blk);
4652
4653         if (ocfs2_xattr_buckets_per_cluster(OCFS2_SB(inode->i_sb)) > 1) {
4654                 ret = ocfs2_mv_xattr_bucket_cross_cluster(inode,
4655                                                           handle,
4656                                                           first, target,
4657                                                           new_blk,
4658                                                           prev_clusters,
4659                                                           v_start);
4660                 if (ret)
4661                         mlog_errno(ret);
4662         } else {
4663                 /* The start of the last cluster in the first extent */
4664                 u64 last_blk = bucket_blkno(first) +
4665                         ((prev_clusters - 1) *
4666                          ocfs2_clusters_to_blocks(inode->i_sb, 1));
4667
4668                 if (prev_clusters > 1 && bucket_blkno(target) != last_blk) {
4669                         ret = ocfs2_mv_xattr_buckets(inode, handle,
4670                                                      bucket_blkno(first),
4671                                                      last_blk, new_blk, 0,
4672                                                      v_start);
4673                         if (ret)
4674                                 mlog_errno(ret);
4675                 } else {
4676                         ret = ocfs2_divide_xattr_cluster(inode, handle,
4677                                                          last_blk, new_blk,
4678                                                          v_start);
4679                         if (ret)
4680                                 mlog_errno(ret);
4681
4682                         if ((bucket_blkno(target) == last_blk) && extend)
4683                                 *extend = 0;
4684                 }
4685         }
4686
4687         return ret;
4688 }
4689
4690 /*
4691  * Add a new cluster for xattr storage.
4692  *
4693  * If the new cluster is contiguous with the previous one, it will be
4694  * appended to the same extent record, and num_clusters will be updated.
4695  * If not, we will insert a new extent for it and move some xattrs in
4696  * the last cluster into the new allocated one.
4697  * We also need to limit the maximum size of a btree leaf, otherwise we'll
4698  * lose the benefits of hashing because we'll have to search large leaves.
4699  * So now the maximum size is OCFS2_MAX_XATTR_TREE_LEAF_SIZE(or clustersize,
4700  * if it's bigger).
4701  *
4702  * first_bh is the first block of the previous extent rec and header_bh
4703  * indicates the bucket we will insert the new xattrs. They will be updated
4704  * when the header_bh is moved into the new cluster.
4705  */
4706 static int ocfs2_add_new_xattr_cluster(struct inode *inode,
4707                                        struct buffer_head *root_bh,
4708                                        struct ocfs2_xattr_bucket *first,
4709                                        struct ocfs2_xattr_bucket *target,
4710                                        u32 *num_clusters,
4711                                        u32 prev_cpos,
4712                                        int *extend,
4713                                        struct ocfs2_xattr_set_ctxt *ctxt)
4714 {
4715         int ret;
4716         u16 bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
4717         u32 prev_clusters = *num_clusters;
4718         u32 clusters_to_add = 1, bit_off, num_bits, v_start = 0;
4719         u64 block;
4720         handle_t *handle = ctxt->handle;
4721         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4722         struct ocfs2_extent_tree et;
4723
4724         mlog(0, "Add new xattr cluster for %llu, previous xattr hash = %u, "
4725              "previous xattr blkno = %llu\n",
4726              (unsigned long long)OCFS2_I(inode)->ip_blkno,
4727              prev_cpos, (unsigned long long)bucket_blkno(first));
4728
4729         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
4730
4731         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
4732                                       OCFS2_JOURNAL_ACCESS_WRITE);
4733         if (ret < 0) {
4734                 mlog_errno(ret);
4735                 goto leave;
4736         }
4737
4738         ret = __ocfs2_claim_clusters(osb, handle, ctxt->data_ac, 1,
4739                                      clusters_to_add, &bit_off, &num_bits);
4740         if (ret < 0) {
4741                 if (ret != -ENOSPC)
4742                         mlog_errno(ret);
4743                 goto leave;
4744         }
4745
4746         BUG_ON(num_bits > clusters_to_add);
4747
4748         block = ocfs2_clusters_to_blocks(osb->sb, bit_off);
4749         mlog(0, "Allocating %u clusters at block %u for xattr in inode %llu\n",
4750              num_bits, bit_off, (unsigned long long)OCFS2_I(inode)->ip_blkno);
4751
4752         if (bucket_blkno(first) + (prev_clusters * bpc) == block &&
4753             (prev_clusters + num_bits) << osb->s_clustersize_bits <=
4754              OCFS2_MAX_XATTR_TREE_LEAF_SIZE) {
4755                 /*
4756                  * If this cluster is contiguous with the old one and
4757                  * adding this new cluster, we don't surpass the limit of
4758                  * OCFS2_MAX_XATTR_TREE_LEAF_SIZE, cool. We will let it be
4759                  * initialized and used like other buckets in the previous
4760                  * cluster.
4761                  * So add it as a contiguous one. The caller will handle
4762                  * its init process.
4763                  */
4764                 v_start = prev_cpos + prev_clusters;
4765                 *num_clusters = prev_clusters + num_bits;
4766                 mlog(0, "Add contiguous %u clusters to previous extent rec.\n",
4767                      num_bits);
4768         } else {
4769                 ret = ocfs2_adjust_xattr_cross_cluster(inode,
4770                                                        handle,
4771                                                        first,
4772                                                        target,
4773                                                        block,
4774                                                        prev_clusters,
4775                                                        &v_start,
4776                                                        extend);
4777                 if (ret) {
4778                         mlog_errno(ret);
4779                         goto leave;
4780                 }
4781         }
4782
4783         mlog(0, "Insert %u clusters at block %llu for xattr at %u\n",
4784              num_bits, (unsigned long long)block, v_start);
4785         ret = ocfs2_insert_extent(handle, &et, v_start, block,
4786                                   num_bits, 0, ctxt->meta_ac);
4787         if (ret < 0) {
4788                 mlog_errno(ret);
4789                 goto leave;
4790         }
4791
4792         ret = ocfs2_journal_dirty(handle, root_bh);
4793         if (ret < 0)
4794                 mlog_errno(ret);
4795
4796 leave:
4797         return ret;
4798 }
4799
4800 /*
4801  * We are given an extent.  'first' is the bucket at the very front of
4802  * the extent.  The extent has space for an additional bucket past
4803  * bucket_xh(first)->xh_num_buckets.  'target_blkno' is the block number
4804  * of the target bucket.  We wish to shift every bucket past the target
4805  * down one, filling in that additional space.  When we get back to the
4806  * target, we split the target between itself and the now-empty bucket
4807  * at target+1 (aka, target_blkno + blks_per_bucket).
4808  */
4809 static int ocfs2_extend_xattr_bucket(struct inode *inode,
4810                                      handle_t *handle,
4811                                      struct ocfs2_xattr_bucket *first,
4812                                      u64 target_blk,
4813                                      u32 num_clusters)
4814 {
4815         int ret, credits;
4816         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4817         u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
4818         u64 end_blk;
4819         u16 new_bucket = le16_to_cpu(bucket_xh(first)->xh_num_buckets);
4820
4821         mlog(0, "extend xattr bucket in %llu, xattr extend rec starting "
4822              "from %llu, len = %u\n", (unsigned long long)target_blk,
4823              (unsigned long long)bucket_blkno(first), num_clusters);
4824
4825         /* The extent must have room for an additional bucket */
4826         BUG_ON(new_bucket >=
4827                (num_clusters * ocfs2_xattr_buckets_per_cluster(osb)));
4828
4829         /* end_blk points to the last existing bucket */
4830         end_blk = bucket_blkno(first) + ((new_bucket - 1) * blk_per_bucket);
4831
4832         /*
4833          * end_blk is the start of the last existing bucket.
4834          * Thus, (end_blk - target_blk) covers the target bucket and
4835          * every bucket after it up to, but not including, the last
4836          * existing bucket.  Then we add the last existing bucket, the
4837          * new bucket, and the first bucket (3 * blk_per_bucket).
4838          */
4839         credits = (end_blk - target_blk) + (3 * blk_per_bucket) +
4840                   handle->h_buffer_credits;
4841         ret = ocfs2_extend_trans(handle, credits);
4842         if (ret) {
4843                 mlog_errno(ret);
4844                 goto out;
4845         }
4846
4847         ret = ocfs2_xattr_bucket_journal_access(handle, first,
4848                                                 OCFS2_JOURNAL_ACCESS_WRITE);
4849         if (ret) {
4850                 mlog_errno(ret);
4851                 goto out;
4852         }
4853
4854         while (end_blk != target_blk) {
4855                 ret = ocfs2_cp_xattr_bucket(inode, handle, end_blk,
4856                                             end_blk + blk_per_bucket, 0);
4857                 if (ret)
4858                         goto out;
4859                 end_blk -= blk_per_bucket;
4860         }
4861
4862         /* Move half of the xattr in target_blkno to the next bucket. */
4863         ret = ocfs2_divide_xattr_bucket(inode, handle, target_blk,
4864                                         target_blk + blk_per_bucket, NULL, 0);
4865
4866         le16_add_cpu(&bucket_xh(first)->xh_num_buckets, 1);
4867         ocfs2_xattr_bucket_journal_dirty(handle, first);
4868
4869 out:
4870         return ret;
4871 }
4872
4873 /*
4874  * Add new xattr bucket in an extent record and adjust the buckets
4875  * accordingly.  xb_bh is the ocfs2_xattr_block, and target is the
4876  * bucket we want to insert into.
4877  *
4878  * In the easy case, we will move all the buckets after target down by
4879  * one. Half of target's xattrs will be moved to the next bucket.
4880  *
4881  * If current cluster is full, we'll allocate a new one.  This may not
4882  * be contiguous.  The underlying calls will make sure that there is
4883  * space for the insert, shifting buckets around if necessary.
4884  * 'target' may be moved by those calls.
4885  */
4886 static int ocfs2_add_new_xattr_bucket(struct inode *inode,
4887                                       struct buffer_head *xb_bh,
4888                                       struct ocfs2_xattr_bucket *target,
4889                                       struct ocfs2_xattr_set_ctxt *ctxt)
4890 {
4891         struct ocfs2_xattr_block *xb =
4892                         (struct ocfs2_xattr_block *)xb_bh->b_data;
4893         struct ocfs2_xattr_tree_root *xb_root = &xb->xb_attrs.xb_root;
4894         struct ocfs2_extent_list *el = &xb_root->xt_list;
4895         u32 name_hash =
4896                 le32_to_cpu(bucket_xh(target)->xh_entries[0].xe_name_hash);
4897         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
4898         int ret, num_buckets, extend = 1;
4899         u64 p_blkno;
4900         u32 e_cpos, num_clusters;
4901         /* The bucket at the front of the extent */
4902         struct ocfs2_xattr_bucket *first;
4903
4904         mlog(0, "Add new xattr bucket starting from %llu\n",
4905              (unsigned long long)bucket_blkno(target));
4906
4907         /* The first bucket of the original extent */
4908         first = ocfs2_xattr_bucket_new(inode);
4909         if (!first) {
4910                 ret = -ENOMEM;
4911                 mlog_errno(ret);
4912                 goto out;
4913         }
4914
4915         ret = ocfs2_xattr_get_rec(inode, name_hash, &p_blkno, &e_cpos,
4916                                   &num_clusters, el);
4917         if (ret) {
4918                 mlog_errno(ret);
4919                 goto out;
4920         }
4921
4922         ret = ocfs2_read_xattr_bucket(first, p_blkno);
4923         if (ret) {
4924                 mlog_errno(ret);
4925                 goto out;
4926         }
4927
4928         num_buckets = ocfs2_xattr_buckets_per_cluster(osb) * num_clusters;
4929         if (num_buckets == le16_to_cpu(bucket_xh(first)->xh_num_buckets)) {
4930                 /*
4931                  * This can move first+target if the target bucket moves
4932                  * to the new extent.
4933                  */
4934                 ret = ocfs2_add_new_xattr_cluster(inode,
4935                                                   xb_bh,
4936                                                   first,
4937                                                   target,
4938                                                   &num_clusters,
4939                                                   e_cpos,
4940                                                   &extend,
4941                                                   ctxt);
4942                 if (ret) {
4943                         mlog_errno(ret);
4944                         goto out;
4945                 }
4946         }
4947
4948         if (extend) {
4949                 ret = ocfs2_extend_xattr_bucket(inode,
4950                                                 ctxt->handle,
4951                                                 first,
4952                                                 bucket_blkno(target),
4953                                                 num_clusters);
4954                 if (ret)
4955                         mlog_errno(ret);
4956         }
4957
4958 out:
4959         ocfs2_xattr_bucket_free(first);
4960
4961         return ret;
4962 }
4963
4964 static inline char *ocfs2_xattr_bucket_get_val(struct inode *inode,
4965                                         struct ocfs2_xattr_bucket *bucket,
4966                                         int offs)
4967 {
4968         int block_off = offs >> inode->i_sb->s_blocksize_bits;
4969
4970         offs = offs % inode->i_sb->s_blocksize;
4971         return bucket_block(bucket, block_off) + offs;
4972 }
4973
4974 /*
4975  * Handle the normal xattr set, including replace, delete and new.
4976  *
4977  * Note: "local" indicates the real data's locality. So we can't
4978  * just its bucket locality by its length.
4979  */
4980 static void ocfs2_xattr_set_entry_normal(struct inode *inode,
4981                                          struct ocfs2_xattr_info *xi,
4982                                          struct ocfs2_xattr_search *xs,
4983                                          u32 name_hash,
4984                                          int local)
4985 {
4986         struct ocfs2_xattr_entry *last, *xe;
4987         struct ocfs2_xattr_header *xh = xs->header;
4988         u16 count = le16_to_cpu(xh->xh_count), start;
4989         size_t blocksize = inode->i_sb->s_blocksize;
4990         char *val;
4991         size_t offs, size, new_size;
4992         struct ocfs2_xa_loc loc;
4993
4994         ocfs2_init_xattr_bucket_xa_loc(&loc, xs->bucket,
4995                                        xs->not_found ? NULL : xs->here);
4996         last = &xh->xh_entries[count];
4997         if (!xs->not_found) {
4998                 xe = xs->here;
4999                 offs = le16_to_cpu(xe->xe_name_offset);
5000                 if (ocfs2_xattr_is_local(xe))
5001                         size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
5002                         OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5003                 else
5004                         size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
5005                         OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5006
5007                 /*
5008                  * If the new value will be stored outside, xi->xi_value has
5009                  * been initalized as an empty ocfs2_xattr_value_root, and
5010                  * the same goes with xi->xi_value_len, so we can set
5011                  * new_size safely here.
5012                  * See ocfs2_xattr_set_in_bucket.
5013                  */
5014                 new_size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
5015                            OCFS2_XATTR_SIZE(xi->xi_value_len);
5016
5017                 if (xi->xi_value) {
5018                         ocfs2_xa_wipe_namevalue(&loc);
5019                         if (new_size > size)
5020                                 goto set_new_name_value;
5021
5022                         /* Now replace the old value with new one. */
5023                         if (local)
5024                                 xe->xe_value_size =
5025                                         cpu_to_le64(xi->xi_value_len);
5026                         else
5027                                 xe->xe_value_size = 0;
5028
5029                         val = ocfs2_xattr_bucket_get_val(inode,
5030                                                          xs->bucket, offs);
5031                         memset(val + OCFS2_XATTR_SIZE(xi->xi_name_len), 0,
5032                                size - OCFS2_XATTR_SIZE(xi->xi_name_len));
5033                         if (OCFS2_XATTR_SIZE(xi->xi_value_len) > 0)
5034                                 memcpy(val + OCFS2_XATTR_SIZE(xi->xi_name_len),
5035                                        xi->xi_value, xi->xi_value_len);
5036
5037                         le16_add_cpu(&xh->xh_name_value_len, new_size);
5038                         ocfs2_xattr_set_local(xe, local);
5039                         return;
5040                 } else {
5041                         ocfs2_xa_remove_entry(&loc);
5042                         if (!xh->xh_count)
5043                                 xh->xh_free_start =
5044                                         cpu_to_le16(OCFS2_XATTR_BUCKET_SIZE);
5045
5046                         return;
5047                 }
5048         } else {
5049                 /* find a new entry for insert. */
5050                 int low = 0, high = count - 1, tmp;
5051                 struct ocfs2_xattr_entry *tmp_xe;
5052
5053                 while (low <= high && count) {
5054                         tmp = (low + high) / 2;
5055                         tmp_xe = &xh->xh_entries[tmp];
5056
5057                         if (name_hash > le32_to_cpu(tmp_xe->xe_name_hash))
5058                                 low = tmp + 1;
5059                         else if (name_hash <
5060                                  le32_to_cpu(tmp_xe->xe_name_hash))
5061                                 high = tmp - 1;
5062                         else {
5063                                 low = tmp;
5064                                 break;
5065                         }
5066                 }
5067
5068                 xe = &xh->xh_entries[low];
5069                 if (low != count)
5070                         memmove(xe + 1, xe, (void *)last - (void *)xe);
5071
5072                 le16_add_cpu(&xh->xh_count, 1);
5073                 memset(xe, 0, sizeof(struct ocfs2_xattr_entry));
5074                 xe->xe_name_hash = cpu_to_le32(name_hash);
5075                 xe->xe_name_len = xi->xi_name_len;
5076                 ocfs2_xattr_set_type(xe, xi->xi_name_index);
5077         }
5078
5079 set_new_name_value:
5080         /* Insert the new name+value. */
5081         size = OCFS2_XATTR_SIZE(xi->xi_name_len) +
5082                 OCFS2_XATTR_SIZE(xi->xi_value_len);
5083
5084         /*
5085          * We must make sure that the name/value pair
5086          * exists in the same block.
5087          */
5088         offs = le16_to_cpu(xh->xh_free_start);
5089         start = offs - size;
5090
5091         if (start >> inode->i_sb->s_blocksize_bits !=
5092             (offs - 1) >> inode->i_sb->s_blocksize_bits) {
5093                 offs = offs - offs % blocksize;
5094                 xh->xh_free_start = cpu_to_le16(offs);
5095         }
5096
5097         val = ocfs2_xattr_bucket_get_val(inode, xs->bucket, offs - size);
5098         xe->xe_name_offset = cpu_to_le16(offs - size);
5099
5100         memset(val, 0, size);
5101         memcpy(val, xi->xi_name, xi->xi_name_len);
5102         memcpy(val + OCFS2_XATTR_SIZE(xi->xi_name_len), xi->xi_value,
5103                xi->xi_value_len);
5104
5105         xe->xe_value_size = cpu_to_le64(xi->xi_value_len);
5106         ocfs2_xattr_set_local(xe, local);
5107         xs->here = xe;
5108         le16_add_cpu(&xh->xh_free_start, -size);
5109         le16_add_cpu(&xh->xh_name_value_len, size);
5110
5111         return;
5112 }
5113
5114 /*
5115  * Set the xattr entry in the specified bucket.
5116  * The bucket is indicated by xs->bucket and it should have the enough
5117  * space for the xattr insertion.
5118  */
5119 static int ocfs2_xattr_set_entry_in_bucket(struct inode *inode,
5120                                            handle_t *handle,
5121                                            struct ocfs2_xattr_info *xi,
5122                                            struct ocfs2_xattr_search *xs,
5123                                            u32 name_hash,
5124                                            int local)
5125 {
5126         int ret;
5127         u64 blkno;
5128
5129         mlog(0, "Set xattr entry len = %lu index = %d in bucket %llu\n",
5130              (unsigned long)xi->xi_value_len, xi->xi_name_index,
5131              (unsigned long long)bucket_blkno(xs->bucket));
5132
5133         if (!xs->bucket->bu_bhs[1]) {
5134                 blkno = bucket_blkno(xs->bucket);
5135                 ocfs2_xattr_bucket_relse(xs->bucket);
5136                 ret = ocfs2_read_xattr_bucket(xs->bucket, blkno);
5137                 if (ret) {
5138                         mlog_errno(ret);
5139                         goto out;
5140                 }
5141         }
5142
5143         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
5144                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5145         if (ret < 0) {
5146                 mlog_errno(ret);
5147                 goto out;
5148         }
5149
5150         ocfs2_xattr_set_entry_normal(inode, xi, xs, name_hash, local);
5151         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
5152
5153 out:
5154         return ret;
5155 }
5156
5157 /*
5158  * Truncate the specified xe_off entry in xattr bucket.
5159  * bucket is indicated by header_bh and len is the new length.
5160  * Both the ocfs2_xattr_value_root and the entry will be updated here.
5161  *
5162  * Copy the new updated xe and xe_value_root to new_xe and new_xv if needed.
5163  */
5164 static int ocfs2_xattr_bucket_value_truncate(struct inode *inode,
5165                                              struct ocfs2_xattr_bucket *bucket,
5166                                              int xe_off,
5167                                              int len,
5168                                              struct ocfs2_xattr_set_ctxt *ctxt)
5169 {
5170         int ret, offset;
5171         u64 value_blk;
5172         struct ocfs2_xattr_entry *xe;
5173         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5174         size_t blocksize = inode->i_sb->s_blocksize;
5175         struct ocfs2_xattr_value_buf vb = {
5176                 .vb_access = ocfs2_journal_access,
5177         };
5178
5179         xe = &xh->xh_entries[xe_off];
5180
5181         BUG_ON(!xe || ocfs2_xattr_is_local(xe));
5182
5183         offset = le16_to_cpu(xe->xe_name_offset) +
5184                  OCFS2_XATTR_SIZE(xe->xe_name_len);
5185
5186         value_blk = offset / blocksize;
5187
5188         /* We don't allow ocfs2_xattr_value to be stored in different block. */
5189         BUG_ON(value_blk != (offset + OCFS2_XATTR_ROOT_SIZE - 1) / blocksize);
5190
5191         vb.vb_bh = bucket->bu_bhs[value_blk];
5192         BUG_ON(!vb.vb_bh);
5193
5194         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5195                 (vb.vb_bh->b_data + offset % blocksize);
5196
5197         /*
5198          * From here on out we have to dirty the bucket.  The generic
5199          * value calls only modify one of the bucket's bhs, but we need
5200          * to send the bucket at once.  So if they error, they *could* have
5201          * modified something.  We have to assume they did, and dirty
5202          * the whole bucket.  This leaves us in a consistent state.
5203          */
5204         mlog(0, "truncate %u in xattr bucket %llu to %d bytes.\n",
5205              xe_off, (unsigned long long)bucket_blkno(bucket), len);
5206         ret = ocfs2_xattr_value_truncate(inode, &vb, len, ctxt);
5207         if (ret) {
5208                 mlog_errno(ret);
5209                 goto out;
5210         }
5211
5212         ret = ocfs2_xattr_bucket_journal_access(ctxt->handle, bucket,
5213                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5214         if (ret) {
5215                 mlog_errno(ret);
5216                 goto out;
5217         }
5218
5219         xe->xe_value_size = cpu_to_le64(len);
5220
5221         ocfs2_xattr_bucket_journal_dirty(ctxt->handle, bucket);
5222
5223 out:
5224         return ret;
5225 }
5226
5227 static int ocfs2_xattr_bucket_value_truncate_xs(struct inode *inode,
5228                                         struct ocfs2_xattr_search *xs,
5229                                         int len,
5230                                         struct ocfs2_xattr_set_ctxt *ctxt)
5231 {
5232         int ret, offset;
5233         struct ocfs2_xattr_entry *xe = xs->here;
5234         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)xs->base;
5235
5236         BUG_ON(!xs->bucket->bu_bhs[0] || !xe || ocfs2_xattr_is_local(xe));
5237
5238         offset = xe - xh->xh_entries;
5239         ret = ocfs2_xattr_bucket_value_truncate(inode, xs->bucket,
5240                                                 offset, len, ctxt);
5241         if (ret)
5242                 mlog_errno(ret);
5243
5244         return ret;
5245 }
5246
5247 static int ocfs2_xattr_bucket_set_value_outside(struct inode *inode,
5248                                                 handle_t *handle,
5249                                                 struct ocfs2_xattr_search *xs,
5250                                                 char *val,
5251                                                 int value_len)
5252 {
5253         int ret, offset, block_off;
5254         struct ocfs2_xattr_value_root *xv;
5255         struct ocfs2_xattr_entry *xe = xs->here;
5256         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
5257         void *base;
5258         struct ocfs2_xattr_value_buf vb = {
5259                 .vb_access = ocfs2_journal_access,
5260         };
5261
5262         BUG_ON(!xs->base || !xe || ocfs2_xattr_is_local(xe));
5263
5264         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb, xh,
5265                                                 xe - xh->xh_entries,
5266                                                 &block_off,
5267                                                 &offset);
5268         if (ret) {
5269                 mlog_errno(ret);
5270                 goto out;
5271         }
5272
5273         base = bucket_block(xs->bucket, block_off);
5274         xv = (struct ocfs2_xattr_value_root *)(base + offset +
5275                  OCFS2_XATTR_SIZE(xe->xe_name_len));
5276
5277         vb.vb_xv = xv;
5278         vb.vb_bh = xs->bucket->bu_bhs[block_off];
5279         ret = __ocfs2_xattr_set_value_outside(inode, handle,
5280                                               &vb, val, value_len);
5281         if (ret)
5282                 mlog_errno(ret);
5283 out:
5284         return ret;
5285 }
5286
5287 static int ocfs2_rm_xattr_cluster(struct inode *inode,
5288                                   struct buffer_head *root_bh,
5289                                   u64 blkno,
5290                                   u32 cpos,
5291                                   u32 len,
5292                                   void *para)
5293 {
5294         int ret;
5295         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5296         struct inode *tl_inode = osb->osb_tl_inode;
5297         handle_t *handle;
5298         struct ocfs2_xattr_block *xb =
5299                         (struct ocfs2_xattr_block *)root_bh->b_data;
5300         struct ocfs2_alloc_context *meta_ac = NULL;
5301         struct ocfs2_cached_dealloc_ctxt dealloc;
5302         struct ocfs2_extent_tree et;
5303
5304         ret = ocfs2_iterate_xattr_buckets(inode, blkno, len,
5305                                           ocfs2_delete_xattr_in_bucket, para);
5306         if (ret) {
5307                 mlog_errno(ret);
5308                 return ret;
5309         }
5310
5311         ocfs2_init_xattr_tree_extent_tree(&et, INODE_CACHE(inode), root_bh);
5312
5313         ocfs2_init_dealloc_ctxt(&dealloc);
5314
5315         mlog(0, "rm xattr extent rec at %u len = %u, start from %llu\n",
5316              cpos, len, (unsigned long long)blkno);
5317
5318         ocfs2_remove_xattr_clusters_from_cache(INODE_CACHE(inode), blkno,
5319                                                len);
5320
5321         ret = ocfs2_lock_allocators(inode, &et, 0, 1, NULL, &meta_ac);
5322         if (ret) {
5323                 mlog_errno(ret);
5324                 return ret;
5325         }
5326
5327         mutex_lock(&tl_inode->i_mutex);
5328
5329         if (ocfs2_truncate_log_needs_flush(osb)) {
5330                 ret = __ocfs2_flush_truncate_log(osb);
5331                 if (ret < 0) {
5332                         mlog_errno(ret);
5333                         goto out;
5334                 }
5335         }
5336
5337         handle = ocfs2_start_trans(osb, ocfs2_remove_extent_credits(osb->sb));
5338         if (IS_ERR(handle)) {
5339                 ret = -ENOMEM;
5340                 mlog_errno(ret);
5341                 goto out;
5342         }
5343
5344         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(inode), root_bh,
5345                                       OCFS2_JOURNAL_ACCESS_WRITE);
5346         if (ret) {
5347                 mlog_errno(ret);
5348                 goto out_commit;
5349         }
5350
5351         ret = ocfs2_remove_extent(handle, &et, cpos, len, meta_ac,
5352                                   &dealloc);
5353         if (ret) {
5354                 mlog_errno(ret);
5355                 goto out_commit;
5356         }
5357
5358         le32_add_cpu(&xb->xb_attrs.xb_root.xt_clusters, -len);
5359
5360         ret = ocfs2_journal_dirty(handle, root_bh);
5361         if (ret) {
5362                 mlog_errno(ret);
5363                 goto out_commit;
5364         }
5365
5366         ret = ocfs2_truncate_log_append(osb, handle, blkno, len);
5367         if (ret)
5368                 mlog_errno(ret);
5369
5370 out_commit:
5371         ocfs2_commit_trans(osb, handle);
5372 out:
5373         ocfs2_schedule_truncate_log_flush(osb, 1);
5374
5375         mutex_unlock(&tl_inode->i_mutex);
5376
5377         if (meta_ac)
5378                 ocfs2_free_alloc_context(meta_ac);
5379
5380         ocfs2_run_deallocs(osb, &dealloc);
5381
5382         return ret;
5383 }
5384
5385 static void ocfs2_xattr_bucket_remove_xs(struct inode *inode,
5386                                          handle_t *handle,
5387                                          struct ocfs2_xattr_search *xs)
5388 {
5389         struct ocfs2_xattr_header *xh = bucket_xh(xs->bucket);
5390         struct ocfs2_xattr_entry *last = &xh->xh_entries[
5391                                                 le16_to_cpu(xh->xh_count) - 1];
5392         int ret = 0;
5393
5394         ret = ocfs2_xattr_bucket_journal_access(handle, xs->bucket,
5395                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5396         if (ret) {
5397                 mlog_errno(ret);
5398                 return;
5399         }
5400
5401         /* Remove the old entry. */
5402         memmove(xs->here, xs->here + 1,
5403                 (void *)last - (void *)xs->here);
5404         memset(last, 0, sizeof(struct ocfs2_xattr_entry));
5405         le16_add_cpu(&xh->xh_count, -1);
5406
5407         ocfs2_xattr_bucket_journal_dirty(handle, xs->bucket);
5408 }
5409
5410 /*
5411  * Set the xattr name/value in the bucket specified in xs.
5412  *
5413  * As the new value in xi may be stored in the bucket or in an outside cluster,
5414  * we divide the whole process into 3 steps:
5415  * 1. insert name/value in the bucket(ocfs2_xattr_set_entry_in_bucket)
5416  * 2. truncate of the outside cluster(ocfs2_xattr_bucket_value_truncate_xs)
5417  * 3. Set the value to the outside cluster(ocfs2_xattr_bucket_set_value_outside)
5418  * 4. If the clusters for the new outside value can't be allocated, we need
5419  *    to free the xattr we allocated in set.
5420  */
5421 static int ocfs2_xattr_set_in_bucket(struct inode *inode,
5422                                      struct ocfs2_xattr_info *xi,
5423                                      struct ocfs2_xattr_search *xs,
5424                                      struct ocfs2_xattr_set_ctxt *ctxt)
5425 {
5426         int ret, local = 1;
5427         size_t value_len;
5428         char *val = (char *)xi->xi_value;
5429         struct ocfs2_xattr_entry *xe = xs->here;
5430         u32 name_hash = ocfs2_xattr_name_hash(inode, xi->xi_name,
5431                                               xi->xi_name_len);
5432
5433         if (!xs->not_found && !ocfs2_xattr_is_local(xe)) {
5434                 /*
5435                  * We need to truncate the xattr storage first.
5436                  *
5437                  * If both the old and new value are stored to
5438                  * outside block, we only need to truncate
5439                  * the storage and then set the value outside.
5440                  *
5441                  * If the new value should be stored within block,
5442                  * we should free all the outside block first and
5443                  * the modification to the xattr block will be done
5444                  * by following steps.
5445                  */
5446                 if (xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
5447                         value_len = xi->xi_value_len;
5448                 else
5449                         value_len = 0;
5450
5451                 ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5452                                                            value_len,
5453                                                            ctxt);
5454                 if (ret)
5455                         goto out;
5456
5457                 if (value_len)
5458                         goto set_value_outside;
5459         }
5460
5461         value_len = xi->xi_value_len;
5462         /* So we have to handle the inside block change now. */
5463         if (value_len > OCFS2_XATTR_INLINE_SIZE) {
5464                 /*
5465                  * If the new value will be stored outside of block,
5466                  * initalize a new empty value root and insert it first.
5467                  */
5468                 local = 0;
5469                 xi->xi_value = &def_xv;
5470                 xi->xi_value_len = OCFS2_XATTR_ROOT_SIZE;
5471         }
5472
5473         ret = ocfs2_xattr_set_entry_in_bucket(inode, ctxt->handle, xi, xs,
5474                                               name_hash, local);
5475         if (ret) {
5476                 mlog_errno(ret);
5477                 goto out;
5478         }
5479
5480         if (value_len <= OCFS2_XATTR_INLINE_SIZE)
5481                 goto out;
5482
5483         /* allocate the space now for the outside block storage. */
5484         ret = ocfs2_xattr_bucket_value_truncate_xs(inode, xs,
5485                                                    value_len, ctxt);
5486         if (ret) {
5487                 mlog_errno(ret);
5488
5489                 if (xs->not_found) {
5490                         /*
5491                          * We can't allocate enough clusters for outside
5492                          * storage and we have allocated xattr already,
5493                          * so need to remove it.
5494                          */
5495                         ocfs2_xattr_bucket_remove_xs(inode, ctxt->handle, xs);
5496                 }
5497                 goto out;
5498         }
5499
5500 set_value_outside:
5501         ret = ocfs2_xattr_bucket_set_value_outside(inode, ctxt->handle,
5502                                                    xs, val, value_len);
5503 out:
5504         return ret;
5505 }
5506
5507 /*
5508  * check whether the xattr bucket is filled up with the same hash value.
5509  * If we want to insert the xattr with the same hash, return -ENOSPC.
5510  * If we want to insert a xattr with different hash value, go ahead
5511  * and ocfs2_divide_xattr_bucket will handle this.
5512  */
5513 static int ocfs2_check_xattr_bucket_collision(struct inode *inode,
5514                                               struct ocfs2_xattr_bucket *bucket,
5515                                               const char *name)
5516 {
5517         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5518         u32 name_hash = ocfs2_xattr_name_hash(inode, name, strlen(name));
5519
5520         if (name_hash != le32_to_cpu(xh->xh_entries[0].xe_name_hash))
5521                 return 0;
5522
5523         if (xh->xh_entries[le16_to_cpu(xh->xh_count) - 1].xe_name_hash ==
5524             xh->xh_entries[0].xe_name_hash) {
5525                 mlog(ML_ERROR, "Too much hash collision in xattr bucket %llu, "
5526                      "hash = %u\n",
5527                      (unsigned long long)bucket_blkno(bucket),
5528                      le32_to_cpu(xh->xh_entries[0].xe_name_hash));
5529                 return -ENOSPC;
5530         }
5531
5532         return 0;
5533 }
5534
5535 static int ocfs2_xattr_set_entry_index_block(struct inode *inode,
5536                                              struct ocfs2_xattr_info *xi,
5537                                              struct ocfs2_xattr_search *xs,
5538                                              struct ocfs2_xattr_set_ctxt *ctxt)
5539 {
5540         struct ocfs2_xattr_header *xh;
5541         struct ocfs2_xattr_entry *xe;
5542         u16 count, header_size, xh_free_start;
5543         int free, max_free, need, old;
5544         size_t value_size = 0;
5545         size_t blocksize = inode->i_sb->s_blocksize;
5546         int ret, allocation = 0;
5547
5548         mlog_entry("Set xattr %s in xattr index block\n", xi->xi_name);
5549
5550 try_again:
5551         xh = xs->header;
5552         count = le16_to_cpu(xh->xh_count);
5553         xh_free_start = le16_to_cpu(xh->xh_free_start);
5554         header_size = sizeof(struct ocfs2_xattr_header) +
5555                         count * sizeof(struct ocfs2_xattr_entry);
5556         max_free = OCFS2_XATTR_BUCKET_SIZE - header_size -
5557                 le16_to_cpu(xh->xh_name_value_len) - OCFS2_XATTR_HEADER_GAP;
5558
5559         mlog_bug_on_msg(header_size > blocksize, "bucket %llu has header size "
5560                         "of %u which exceed block size\n",
5561                         (unsigned long long)bucket_blkno(xs->bucket),
5562                         header_size);
5563
5564         if (xi->xi_value && xi->xi_value_len > OCFS2_XATTR_INLINE_SIZE)
5565                 value_size = OCFS2_XATTR_ROOT_SIZE;
5566         else if (xi->xi_value)
5567                 value_size = OCFS2_XATTR_SIZE(xi->xi_value_len);
5568
5569         if (xs->not_found)
5570                 need = sizeof(struct ocfs2_xattr_entry) +
5571                         OCFS2_XATTR_SIZE(xi->xi_name_len) + value_size;
5572         else {
5573                 need = value_size + OCFS2_XATTR_SIZE(xi->xi_name_len);
5574
5575                 /*
5576                  * We only replace the old value if the new length is smaller
5577                  * than the old one. Otherwise we will allocate new space in the
5578                  * bucket to store it.
5579                  */
5580                 xe = xs->here;
5581                 if (ocfs2_xattr_is_local(xe))
5582                         old = OCFS2_XATTR_SIZE(le64_to_cpu(xe->xe_value_size));
5583                 else
5584                         old = OCFS2_XATTR_SIZE(OCFS2_XATTR_ROOT_SIZE);
5585
5586                 if (old >= value_size)
5587                         need = 0;
5588         }
5589
5590         free = xh_free_start - header_size - OCFS2_XATTR_HEADER_GAP;
5591         /*
5592          * We need to make sure the new name/value pair
5593          * can exist in the same block.
5594          */
5595         if (xh_free_start % blocksize < need)
5596                 free -= xh_free_start % blocksize;
5597
5598         mlog(0, "xs->not_found = %d, in xattr bucket %llu: free = %d, "
5599              "need = %d, max_free = %d, xh_free_start = %u, xh_name_value_len ="
5600              " %u\n", xs->not_found,
5601              (unsigned long long)bucket_blkno(xs->bucket),
5602              free, need, max_free, le16_to_cpu(xh->xh_free_start),
5603              le16_to_cpu(xh->xh_name_value_len));
5604
5605         if (free < need ||
5606             (xs->not_found &&
5607              count == ocfs2_xattr_max_xe_in_bucket(inode->i_sb))) {
5608                 if (need <= max_free &&
5609                     count < ocfs2_xattr_max_xe_in_bucket(inode->i_sb)) {
5610                         /*
5611                          * We can create the space by defragment. Since only the
5612                          * name/value will be moved, the xe shouldn't be changed
5613                          * in xs.
5614                          */
5615                         ret = ocfs2_defrag_xattr_bucket(inode, ctxt->handle,
5616                                                         xs->bucket);
5617                         if (ret) {
5618                                 mlog_errno(ret);
5619                                 goto out;
5620                         }
5621
5622                         xh_free_start = le16_to_cpu(xh->xh_free_start);
5623                         free = xh_free_start - header_size
5624                                 - OCFS2_XATTR_HEADER_GAP;
5625                         if (xh_free_start % blocksize < need)
5626                                 free -= xh_free_start % blocksize;
5627
5628                         if (free >= need)
5629                                 goto xattr_set;
5630
5631                         mlog(0, "Can't get enough space for xattr insert by "
5632                              "defragment. Need %u bytes, but we have %d, so "
5633                              "allocate new bucket for it.\n", need, free);
5634                 }
5635
5636                 /*
5637                  * We have to add new buckets or clusters and one
5638                  * allocation should leave us enough space for insert.
5639                  */
5640                 BUG_ON(allocation);
5641
5642                 /*
5643                  * We do not allow for overlapping ranges between buckets. And
5644                  * the maximum number of collisions we will allow for then is
5645                  * one bucket's worth, so check it here whether we need to
5646                  * add a new bucket for the insert.
5647                  */
5648                 ret = ocfs2_check_xattr_bucket_collision(inode,
5649                                                          xs->bucket,
5650                                                          xi->xi_name);
5651                 if (ret) {
5652                         mlog_errno(ret);
5653                         goto out;
5654                 }
5655
5656                 ret = ocfs2_add_new_xattr_bucket(inode,
5657                                                  xs->xattr_bh,
5658                                                  xs->bucket,
5659                                                  ctxt);
5660                 if (ret) {
5661                         mlog_errno(ret);
5662                         goto out;
5663                 }
5664
5665                 /*
5666                  * ocfs2_add_new_xattr_bucket() will have updated
5667                  * xs->bucket if it moved, but it will not have updated
5668                  * any of the other search fields.  Thus, we drop it and
5669                  * re-search.  Everything should be cached, so it'll be
5670                  * quick.
5671                  */
5672                 ocfs2_xattr_bucket_relse(xs->bucket);
5673                 ret = ocfs2_xattr_index_block_find(inode, xs->xattr_bh,
5674                                                    xi->xi_name_index,
5675                                                    xi->xi_name, xs);
5676                 if (ret && ret != -ENODATA)
5677                         goto out;
5678                 xs->not_found = ret;
5679                 allocation = 1;
5680                 goto try_again;
5681         }
5682
5683 xattr_set:
5684         ret = ocfs2_xattr_set_in_bucket(inode, xi, xs, ctxt);
5685 out:
5686         mlog_exit(ret);
5687         return ret;
5688 }
5689
5690 static int ocfs2_delete_xattr_in_bucket(struct inode *inode,
5691                                         struct ocfs2_xattr_bucket *bucket,
5692                                         void *para)
5693 {
5694         int ret = 0, ref_credits;
5695         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
5696         u16 i;
5697         struct ocfs2_xattr_entry *xe;
5698         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5699         struct ocfs2_xattr_set_ctxt ctxt = {NULL, NULL,};
5700         int credits = ocfs2_remove_extent_credits(osb->sb) +
5701                 ocfs2_blocks_per_xattr_bucket(inode->i_sb);
5702         struct ocfs2_xattr_value_root *xv;
5703         struct ocfs2_rm_xattr_bucket_para *args =
5704                         (struct ocfs2_rm_xattr_bucket_para *)para;
5705
5706         ocfs2_init_dealloc_ctxt(&ctxt.dealloc);
5707
5708         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
5709                 xe = &xh->xh_entries[i];
5710                 if (ocfs2_xattr_is_local(xe))
5711                         continue;
5712
5713                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket,
5714                                                       i, &xv, NULL);
5715
5716                 ret = ocfs2_lock_xattr_remove_allocators(inode, xv,
5717                                                          args->ref_ci,
5718                                                          args->ref_root_bh,
5719                                                          &ctxt.meta_ac,
5720                                                          &ref_credits);
5721
5722                 ctxt.handle = ocfs2_start_trans(osb, credits + ref_credits);
5723                 if (IS_ERR(ctxt.handle)) {
5724                         ret = PTR_ERR(ctxt.handle);
5725                         mlog_errno(ret);
5726                         break;
5727                 }
5728
5729                 ret = ocfs2_xattr_bucket_value_truncate(inode, bucket,
5730                                                         i, 0, &ctxt);
5731
5732                 ocfs2_commit_trans(osb, ctxt.handle);
5733                 if (ctxt.meta_ac) {
5734                         ocfs2_free_alloc_context(ctxt.meta_ac);
5735                         ctxt.meta_ac = NULL;
5736                 }
5737                 if (ret) {
5738                         mlog_errno(ret);
5739                         break;
5740                 }
5741         }
5742
5743         if (ctxt.meta_ac)
5744                 ocfs2_free_alloc_context(ctxt.meta_ac);
5745         ocfs2_schedule_truncate_log_flush(osb, 1);
5746         ocfs2_run_deallocs(osb, &ctxt.dealloc);
5747         return ret;
5748 }
5749
5750 /*
5751  * Whenever we modify a xattr value root in the bucket(e.g, CoW
5752  * or change the extent record flag), we need to recalculate
5753  * the metaecc for the whole bucket. So it is done here.
5754  *
5755  * Note:
5756  * We have to give the extra credits for the caller.
5757  */
5758 static int ocfs2_xattr_bucket_post_refcount(struct inode *inode,
5759                                             handle_t *handle,
5760                                             void *para)
5761 {
5762         int ret;
5763         struct ocfs2_xattr_bucket *bucket =
5764                         (struct ocfs2_xattr_bucket *)para;
5765
5766         ret = ocfs2_xattr_bucket_journal_access(handle, bucket,
5767                                                 OCFS2_JOURNAL_ACCESS_WRITE);
5768         if (ret) {
5769                 mlog_errno(ret);
5770                 return ret;
5771         }
5772
5773         ocfs2_xattr_bucket_journal_dirty(handle, bucket);
5774
5775         return 0;
5776 }
5777
5778 /*
5779  * Special action we need if the xattr value is refcounted.
5780  *
5781  * 1. If the xattr is refcounted, lock the tree.
5782  * 2. CoW the xattr if we are setting the new value and the value
5783  *    will be stored outside.
5784  * 3. In other case, decrease_refcount will work for us, so just
5785  *    lock the refcount tree, calculate the meta and credits is OK.
5786  *
5787  * We have to do CoW before ocfs2_init_xattr_set_ctxt since
5788  * currently CoW is a completed transaction, while this function
5789  * will also lock the allocators and let us deadlock. So we will
5790  * CoW the whole xattr value.
5791  */
5792 static int ocfs2_prepare_refcount_xattr(struct inode *inode,
5793                                         struct ocfs2_dinode *di,
5794                                         struct ocfs2_xattr_info *xi,
5795                                         struct ocfs2_xattr_search *xis,
5796                                         struct ocfs2_xattr_search *xbs,
5797                                         struct ocfs2_refcount_tree **ref_tree,
5798                                         int *meta_add,
5799                                         int *credits)
5800 {
5801         int ret = 0;
5802         struct ocfs2_xattr_block *xb;
5803         struct ocfs2_xattr_entry *xe;
5804         char *base;
5805         u32 p_cluster, num_clusters;
5806         unsigned int ext_flags;
5807         int name_offset, name_len;
5808         struct ocfs2_xattr_value_buf vb;
5809         struct ocfs2_xattr_bucket *bucket = NULL;
5810         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
5811         struct ocfs2_post_refcount refcount;
5812         struct ocfs2_post_refcount *p = NULL;
5813         struct buffer_head *ref_root_bh = NULL;
5814
5815         if (!xis->not_found) {
5816                 xe = xis->here;
5817                 name_offset = le16_to_cpu(xe->xe_name_offset);
5818                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5819                 base = xis->base;
5820                 vb.vb_bh = xis->inode_bh;
5821                 vb.vb_access = ocfs2_journal_access_di;
5822         } else {
5823                 int i, block_off = 0;
5824                 xb = (struct ocfs2_xattr_block *)xbs->xattr_bh->b_data;
5825                 xe = xbs->here;
5826                 name_offset = le16_to_cpu(xe->xe_name_offset);
5827                 name_len = OCFS2_XATTR_SIZE(xe->xe_name_len);
5828                 i = xbs->here - xbs->header->xh_entries;
5829
5830                 if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED) {
5831                         ret = ocfs2_xattr_bucket_get_name_value(inode->i_sb,
5832                                                         bucket_xh(xbs->bucket),
5833                                                         i, &block_off,
5834                                                         &name_offset);
5835                         if (ret) {
5836                                 mlog_errno(ret);
5837                                 goto out;
5838                         }
5839                         base = bucket_block(xbs->bucket, block_off);
5840                         vb.vb_bh = xbs->bucket->bu_bhs[block_off];
5841                         vb.vb_access = ocfs2_journal_access;
5842
5843                         if (ocfs2_meta_ecc(osb)) {
5844                                 /*create parameters for ocfs2_post_refcount. */
5845                                 bucket = xbs->bucket;
5846                                 refcount.credits = bucket->bu_blocks;
5847                                 refcount.para = bucket;
5848                                 refcount.func =
5849                                         ocfs2_xattr_bucket_post_refcount;
5850                                 p = &refcount;
5851                         }
5852                 } else {
5853                         base = xbs->base;
5854                         vb.vb_bh = xbs->xattr_bh;
5855                         vb.vb_access = ocfs2_journal_access_xb;
5856                 }
5857         }
5858
5859         if (ocfs2_xattr_is_local(xe))
5860                 goto out;
5861
5862         vb.vb_xv = (struct ocfs2_xattr_value_root *)
5863                                 (base + name_offset + name_len);
5864
5865         ret = ocfs2_xattr_get_clusters(inode, 0, &p_cluster,
5866                                        &num_clusters, &vb.vb_xv->xr_list,
5867                                        &ext_flags);
5868         if (ret) {
5869                 mlog_errno(ret);
5870                 goto out;
5871         }
5872
5873         /*
5874          * We just need to check the 1st extent record, since we always
5875          * CoW the whole xattr. So there shouldn't be a xattr with
5876          * some REFCOUNT extent recs after the 1st one.
5877          */
5878         if (!(ext_flags & OCFS2_EXT_REFCOUNTED))
5879                 goto out;
5880
5881         ret = ocfs2_lock_refcount_tree(osb, le64_to_cpu(di->i_refcount_loc),
5882                                        1, ref_tree, &ref_root_bh);
5883         if (ret) {
5884                 mlog_errno(ret);
5885                 goto out;
5886         }
5887
5888         /*
5889          * If we are deleting the xattr or the new size will be stored inside,
5890          * cool, leave it there, the xattr truncate process will remove them
5891          * for us(it still needs the refcount tree lock and the meta, credits).
5892          * And the worse case is that every cluster truncate will split the
5893          * refcount tree, and make the original extent become 3. So we will need
5894          * 2 * cluster more extent recs at most.
5895          */
5896         if (!xi->xi_value || xi->xi_value_len <= OCFS2_XATTR_INLINE_SIZE) {
5897
5898                 ret = ocfs2_refcounted_xattr_delete_need(inode,
5899                                                          &(*ref_tree)->rf_ci,
5900                                                          ref_root_bh, vb.vb_xv,
5901                                                          meta_add, credits);
5902                 if (ret)
5903                         mlog_errno(ret);
5904                 goto out;
5905         }
5906
5907         ret = ocfs2_refcount_cow_xattr(inode, di, &vb,
5908                                        *ref_tree, ref_root_bh, 0,
5909                                        le32_to_cpu(vb.vb_xv->xr_clusters), p);
5910         if (ret)
5911                 mlog_errno(ret);
5912
5913 out:
5914         brelse(ref_root_bh);
5915         return ret;
5916 }
5917
5918 /*
5919  * Add the REFCOUNTED flags for all the extent rec in ocfs2_xattr_value_root.
5920  * The physical clusters will be added to refcount tree.
5921  */
5922 static int ocfs2_xattr_value_attach_refcount(struct inode *inode,
5923                                 struct ocfs2_xattr_value_root *xv,
5924                                 struct ocfs2_extent_tree *value_et,
5925                                 struct ocfs2_caching_info *ref_ci,
5926                                 struct buffer_head *ref_root_bh,
5927                                 struct ocfs2_cached_dealloc_ctxt *dealloc,
5928                                 struct ocfs2_post_refcount *refcount)
5929 {
5930         int ret = 0;
5931         u32 clusters = le32_to_cpu(xv->xr_clusters);
5932         u32 cpos, p_cluster, num_clusters;
5933         struct ocfs2_extent_list *el = &xv->xr_list;
5934         unsigned int ext_flags;
5935
5936         cpos = 0;
5937         while (cpos < clusters) {
5938                 ret = ocfs2_xattr_get_clusters(inode, cpos, &p_cluster,
5939                                                &num_clusters, el, &ext_flags);
5940
5941                 cpos += num_clusters;
5942                 if ((ext_flags & OCFS2_EXT_REFCOUNTED))
5943                         continue;
5944
5945                 BUG_ON(!p_cluster);
5946
5947                 ret = ocfs2_add_refcount_flag(inode, value_et,
5948                                               ref_ci, ref_root_bh,
5949                                               cpos - num_clusters,
5950                                               p_cluster, num_clusters,
5951                                               dealloc, refcount);
5952                 if (ret) {
5953                         mlog_errno(ret);
5954                         break;
5955                 }
5956         }
5957
5958         return ret;
5959 }
5960
5961 /*
5962  * Given a normal ocfs2_xattr_header, refcount all the entries which
5963  * have value stored outside.
5964  * Used for xattrs stored in inode and ocfs2_xattr_block.
5965  */
5966 static int ocfs2_xattr_attach_refcount_normal(struct inode *inode,
5967                                 struct ocfs2_xattr_value_buf *vb,
5968                                 struct ocfs2_xattr_header *header,
5969                                 struct ocfs2_caching_info *ref_ci,
5970                                 struct buffer_head *ref_root_bh,
5971                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
5972 {
5973
5974         struct ocfs2_xattr_entry *xe;
5975         struct ocfs2_xattr_value_root *xv;
5976         struct ocfs2_extent_tree et;
5977         int i, ret = 0;
5978
5979         for (i = 0; i < le16_to_cpu(header->xh_count); i++) {
5980                 xe = &header->xh_entries[i];
5981
5982                 if (ocfs2_xattr_is_local(xe))
5983                         continue;
5984
5985                 xv = (struct ocfs2_xattr_value_root *)((void *)header +
5986                         le16_to_cpu(xe->xe_name_offset) +
5987                         OCFS2_XATTR_SIZE(xe->xe_name_len));
5988
5989                 vb->vb_xv = xv;
5990                 ocfs2_init_xattr_value_extent_tree(&et, INODE_CACHE(inode), vb);
5991
5992                 ret = ocfs2_xattr_value_attach_refcount(inode, xv, &et,
5993                                                         ref_ci, ref_root_bh,
5994                                                         dealloc, NULL);
5995                 if (ret) {
5996                         mlog_errno(ret);
5997                         break;
5998                 }
5999         }
6000
6001         return ret;
6002 }
6003
6004 static int ocfs2_xattr_inline_attach_refcount(struct inode *inode,
6005                                 struct buffer_head *fe_bh,
6006                                 struct ocfs2_caching_info *ref_ci,
6007                                 struct buffer_head *ref_root_bh,
6008                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6009 {
6010         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6011         struct ocfs2_xattr_header *header = (struct ocfs2_xattr_header *)
6012                                 (fe_bh->b_data + inode->i_sb->s_blocksize -
6013                                 le16_to_cpu(di->i_xattr_inline_size));
6014         struct ocfs2_xattr_value_buf vb = {
6015                 .vb_bh = fe_bh,
6016                 .vb_access = ocfs2_journal_access_di,
6017         };
6018
6019         return ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6020                                                   ref_ci, ref_root_bh, dealloc);
6021 }
6022
6023 struct ocfs2_xattr_tree_value_refcount_para {
6024         struct ocfs2_caching_info *ref_ci;
6025         struct buffer_head *ref_root_bh;
6026         struct ocfs2_cached_dealloc_ctxt *dealloc;
6027 };
6028
6029 static int ocfs2_get_xattr_tree_value_root(struct super_block *sb,
6030                                            struct ocfs2_xattr_bucket *bucket,
6031                                            int offset,
6032                                            struct ocfs2_xattr_value_root **xv,
6033                                            struct buffer_head **bh)
6034 {
6035         int ret, block_off, name_offset;
6036         struct ocfs2_xattr_header *xh = bucket_xh(bucket);
6037         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6038         void *base;
6039
6040         ret = ocfs2_xattr_bucket_get_name_value(sb,
6041                                                 bucket_xh(bucket),
6042                                                 offset,
6043                                                 &block_off,
6044                                                 &name_offset);
6045         if (ret) {
6046                 mlog_errno(ret);
6047                 goto out;
6048         }
6049
6050         base = bucket_block(bucket, block_off);
6051
6052         *xv = (struct ocfs2_xattr_value_root *)(base + name_offset +
6053                          OCFS2_XATTR_SIZE(xe->xe_name_len));
6054
6055         if (bh)
6056                 *bh = bucket->bu_bhs[block_off];
6057 out:
6058         return ret;
6059 }
6060
6061 /*
6062  * For a given xattr bucket, refcount all the entries which
6063  * have value stored outside.
6064  */
6065 static int ocfs2_xattr_bucket_value_refcount(struct inode *inode,
6066                                              struct ocfs2_xattr_bucket *bucket,
6067                                              void *para)
6068 {
6069         int i, ret = 0;
6070         struct ocfs2_extent_tree et;
6071         struct ocfs2_xattr_tree_value_refcount_para *ref =
6072                         (struct ocfs2_xattr_tree_value_refcount_para *)para;
6073         struct ocfs2_xattr_header *xh =
6074                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6075         struct ocfs2_xattr_entry *xe;
6076         struct ocfs2_xattr_value_buf vb = {
6077                 .vb_access = ocfs2_journal_access,
6078         };
6079         struct ocfs2_post_refcount refcount = {
6080                 .credits = bucket->bu_blocks,
6081                 .para = bucket,
6082                 .func = ocfs2_xattr_bucket_post_refcount,
6083         };
6084         struct ocfs2_post_refcount *p = NULL;
6085
6086         /* We only need post_refcount if we support metaecc. */
6087         if (ocfs2_meta_ecc(OCFS2_SB(inode->i_sb)))
6088                 p = &refcount;
6089
6090         mlog(0, "refcount bucket %llu, count = %u\n",
6091              (unsigned long long)bucket_blkno(bucket),
6092              le16_to_cpu(xh->xh_count));
6093         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6094                 xe = &xh->xh_entries[i];
6095
6096                 if (ocfs2_xattr_is_local(xe))
6097                         continue;
6098
6099                 ret = ocfs2_get_xattr_tree_value_root(inode->i_sb, bucket, i,
6100                                                       &vb.vb_xv, &vb.vb_bh);
6101                 if (ret) {
6102                         mlog_errno(ret);
6103                         break;
6104                 }
6105
6106                 ocfs2_init_xattr_value_extent_tree(&et,
6107                                                    INODE_CACHE(inode), &vb);
6108
6109                 ret = ocfs2_xattr_value_attach_refcount(inode, vb.vb_xv,
6110                                                         &et, ref->ref_ci,
6111                                                         ref->ref_root_bh,
6112                                                         ref->dealloc, p);
6113                 if (ret) {
6114                         mlog_errno(ret);
6115                         break;
6116                 }
6117         }
6118
6119         return ret;
6120
6121 }
6122
6123 static int ocfs2_refcount_xattr_tree_rec(struct inode *inode,
6124                                      struct buffer_head *root_bh,
6125                                      u64 blkno, u32 cpos, u32 len, void *para)
6126 {
6127         return ocfs2_iterate_xattr_buckets(inode, blkno, len,
6128                                            ocfs2_xattr_bucket_value_refcount,
6129                                            para);
6130 }
6131
6132 static int ocfs2_xattr_block_attach_refcount(struct inode *inode,
6133                                 struct buffer_head *blk_bh,
6134                                 struct ocfs2_caching_info *ref_ci,
6135                                 struct buffer_head *ref_root_bh,
6136                                 struct ocfs2_cached_dealloc_ctxt *dealloc)
6137 {
6138         int ret = 0;
6139         struct ocfs2_xattr_block *xb =
6140                                 (struct ocfs2_xattr_block *)blk_bh->b_data;
6141
6142         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)) {
6143                 struct ocfs2_xattr_header *header = &xb->xb_attrs.xb_header;
6144                 struct ocfs2_xattr_value_buf vb = {
6145                         .vb_bh = blk_bh,
6146                         .vb_access = ocfs2_journal_access_xb,
6147                 };
6148
6149                 ret = ocfs2_xattr_attach_refcount_normal(inode, &vb, header,
6150                                                          ref_ci, ref_root_bh,
6151                                                          dealloc);
6152         } else {
6153                 struct ocfs2_xattr_tree_value_refcount_para para = {
6154                         .ref_ci = ref_ci,
6155                         .ref_root_bh = ref_root_bh,
6156                         .dealloc = dealloc,
6157                 };
6158
6159                 ret = ocfs2_iterate_xattr_index_block(inode, blk_bh,
6160                                                 ocfs2_refcount_xattr_tree_rec,
6161                                                 &para);
6162         }
6163
6164         return ret;
6165 }
6166
6167 int ocfs2_xattr_attach_refcount_tree(struct inode *inode,
6168                                      struct buffer_head *fe_bh,
6169                                      struct ocfs2_caching_info *ref_ci,
6170                                      struct buffer_head *ref_root_bh,
6171                                      struct ocfs2_cached_dealloc_ctxt *dealloc)
6172 {
6173         int ret = 0;
6174         struct ocfs2_inode_info *oi = OCFS2_I(inode);
6175         struct ocfs2_dinode *di = (struct ocfs2_dinode *)fe_bh->b_data;
6176         struct buffer_head *blk_bh = NULL;
6177
6178         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
6179                 ret = ocfs2_xattr_inline_attach_refcount(inode, fe_bh,
6180                                                          ref_ci, ref_root_bh,
6181                                                          dealloc);
6182                 if (ret) {
6183                         mlog_errno(ret);
6184                         goto out;
6185                 }
6186         }
6187
6188         if (!di->i_xattr_loc)
6189                 goto out;
6190
6191         ret = ocfs2_read_xattr_block(inode, le64_to_cpu(di->i_xattr_loc),
6192                                      &blk_bh);
6193         if (ret < 0) {
6194                 mlog_errno(ret);
6195                 goto out;
6196         }
6197
6198         ret = ocfs2_xattr_block_attach_refcount(inode, blk_bh, ref_ci,
6199                                                 ref_root_bh, dealloc);
6200         if (ret)
6201                 mlog_errno(ret);
6202
6203         brelse(blk_bh);
6204 out:
6205
6206         return ret;
6207 }
6208
6209 typedef int (should_xattr_reflinked)(struct ocfs2_xattr_entry *xe);
6210 /*
6211  * Store the information we need in xattr reflink.
6212  * old_bh and new_bh are inode bh for the old and new inode.
6213  */
6214 struct ocfs2_xattr_reflink {
6215         struct inode *old_inode;
6216         struct inode *new_inode;
6217         struct buffer_head *old_bh;
6218         struct buffer_head *new_bh;
6219         struct ocfs2_caching_info *ref_ci;
6220         struct buffer_head *ref_root_bh;
6221         struct ocfs2_cached_dealloc_ctxt *dealloc;
6222         should_xattr_reflinked *xattr_reflinked;
6223 };
6224
6225 /*
6226  * Given a xattr header and xe offset,
6227  * return the proper xv and the corresponding bh.
6228  * xattr in inode, block and xattr tree have different implementaions.
6229  */
6230 typedef int (get_xattr_value_root)(struct super_block *sb,
6231                                    struct buffer_head *bh,
6232                                    struct ocfs2_xattr_header *xh,
6233                                    int offset,
6234                                    struct ocfs2_xattr_value_root **xv,
6235                                    struct buffer_head **ret_bh,
6236                                    void *para);
6237
6238 /*
6239  * Calculate all the xattr value root metadata stored in this xattr header and
6240  * credits we need if we create them from the scratch.
6241  * We use get_xattr_value_root so that all types of xattr container can use it.
6242  */
6243 static int ocfs2_value_metas_in_xattr_header(struct super_block *sb,
6244                                              struct buffer_head *bh,
6245                                              struct ocfs2_xattr_header *xh,
6246                                              int *metas, int *credits,
6247                                              int *num_recs,
6248                                              get_xattr_value_root *func,
6249                                              void *para)
6250 {
6251         int i, ret = 0;
6252         struct ocfs2_xattr_value_root *xv;
6253         struct ocfs2_xattr_entry *xe;
6254
6255         for (i = 0; i < le16_to_cpu(xh->xh_count); i++) {
6256                 xe = &xh->xh_entries[i];
6257                 if (ocfs2_xattr_is_local(xe))
6258                         continue;
6259
6260                 ret = func(sb, bh, xh, i, &xv, NULL, para);
6261                 if (ret) {
6262                         mlog_errno(ret);
6263                         break;
6264                 }
6265
6266                 *metas += le16_to_cpu(xv->xr_list.l_tree_depth) *
6267                           le16_to_cpu(xv->xr_list.l_next_free_rec);
6268
6269                 *credits += ocfs2_calc_extend_credits(sb,
6270                                                 &def_xv.xv.xr_list,
6271                                                 le32_to_cpu(xv->xr_clusters));
6272
6273                 /*
6274                  * If the value is a tree with depth > 1, We don't go deep
6275                  * to the extent block, so just calculate a maximum record num.
6276                  */
6277                 if (!xv->xr_list.l_tree_depth)
6278                         *num_recs += le16_to_cpu(xv->xr_list.l_next_free_rec);
6279                 else
6280                         *num_recs += ocfs2_clusters_for_bytes(sb,
6281                                                               XATTR_SIZE_MAX);
6282         }
6283
6284         return ret;
6285 }
6286
6287 /* Used by xattr inode and block to return the right xv and buffer_head. */
6288 static int ocfs2_get_xattr_value_root(struct super_block *sb,
6289                                       struct buffer_head *bh,
6290                                       struct ocfs2_xattr_header *xh,
6291                                       int offset,
6292                                       struct ocfs2_xattr_value_root **xv,
6293                                       struct buffer_head **ret_bh,
6294                                       void *para)
6295 {
6296         struct ocfs2_xattr_entry *xe = &xh->xh_entries[offset];
6297
6298         *xv = (struct ocfs2_xattr_value_root *)((void *)xh +
6299                 le16_to_cpu(xe->xe_name_offset) +
6300                 OCFS2_XATTR_SIZE(xe->xe_name_len));
6301
6302         if (ret_bh)
6303                 *ret_bh = bh;
6304
6305         return 0;
6306 }
6307
6308 /*
6309  * Lock the meta_ac and caculate how much credits we need for reflink xattrs.
6310  * It is only used for inline xattr and xattr block.
6311  */
6312 static int ocfs2_reflink_lock_xattr_allocators(struct ocfs2_super *osb,
6313                                         struct ocfs2_xattr_header *xh,
6314                                         struct buffer_head *ref_root_bh,
6315                                         int *credits,
6316                                         struct ocfs2_alloc_context **meta_ac)
6317 {
6318         int ret, meta_add = 0, num_recs = 0;
6319         struct ocfs2_refcount_block *rb =
6320                         (struct ocfs2_refcount_block *)ref_root_bh->b_data;
6321
6322         *credits = 0;
6323
6324         ret = ocfs2_value_metas_in_xattr_header(osb->sb, NULL, xh,
6325                                                 &meta_add, credits, &num_recs,
6326                                                 ocfs2_get_xattr_value_root,
6327                                                 NULL);
6328         if (ret) {
6329                 mlog_errno(ret);
6330                 goto out;
6331         }
6332
6333         /*
6334          * We need to add/modify num_recs in refcount tree, so just calculate
6335          * an approximate number we need for refcount tree change.
6336          * Sometimes we need to split the tree, and after split,  half recs
6337          * will be moved to the new block, and a new block can only provide
6338          * half number of recs. So we multiple new blocks by 2.
6339          */
6340         num_recs = num_recs / ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6341         meta_add += num_recs;
6342         *credits += num_recs + num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6343         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6344                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6345                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6346         else
6347                 *credits += 1;
6348
6349         ret = ocfs2_reserve_new_metadata_blocks(osb, meta_add, meta_ac);
6350         if (ret)
6351                 mlog_errno(ret);
6352
6353 out:
6354         return ret;
6355 }
6356
6357 /*
6358  * Given a xattr header, reflink all the xattrs in this container.
6359  * It can be used for inode, block and bucket.
6360  *
6361  * NOTE:
6362  * Before we call this function, the caller has memcpy the xattr in
6363  * old_xh to the new_xh.
6364  *
6365  * If args.xattr_reflinked is set, call it to decide whether the xe should
6366  * be reflinked or not. If not, remove it from the new xattr header.
6367  */
6368 static int ocfs2_reflink_xattr_header(handle_t *handle,
6369                                       struct ocfs2_xattr_reflink *args,
6370                                       struct buffer_head *old_bh,
6371                                       struct ocfs2_xattr_header *xh,
6372                                       struct buffer_head *new_bh,
6373                                       struct ocfs2_xattr_header *new_xh,
6374                                       struct ocfs2_xattr_value_buf *vb,
6375                                       struct ocfs2_alloc_context *meta_ac,
6376                                       get_xattr_value_root *func,
6377                                       void *para)
6378 {
6379         int ret = 0, i, j;
6380         struct super_block *sb = args->old_inode->i_sb;
6381         struct buffer_head *value_bh;
6382         struct ocfs2_xattr_entry *xe, *last;
6383         struct ocfs2_xattr_value_root *xv, *new_xv;
6384         struct ocfs2_extent_tree data_et;
6385         u32 clusters, cpos, p_cluster, num_clusters;
6386         unsigned int ext_flags = 0;
6387
6388         mlog(0, "reflink xattr in container %llu, count = %u\n",
6389              (unsigned long long)old_bh->b_blocknr, le16_to_cpu(xh->xh_count));
6390
6391         last = &new_xh->xh_entries[le16_to_cpu(new_xh->xh_count)];
6392         for (i = 0, j = 0; i < le16_to_cpu(xh->xh_count); i++, j++) {
6393                 xe = &xh->xh_entries[i];
6394
6395                 if (args->xattr_reflinked && !args->xattr_reflinked(xe)) {
6396                         xe = &new_xh->xh_entries[j];
6397
6398                         le16_add_cpu(&new_xh->xh_count, -1);
6399                         if (new_xh->xh_count) {
6400                                 memmove(xe, xe + 1,
6401                                         (void *)last - (void *)xe);
6402                                 memset(last, 0,
6403                                        sizeof(struct ocfs2_xattr_entry));
6404                         }
6405
6406                         /*
6407                          * We don't want j to increase in the next round since
6408                          * it is already moved ahead.
6409                          */
6410                         j--;
6411                         continue;
6412                 }
6413
6414                 if (ocfs2_xattr_is_local(xe))
6415                         continue;
6416
6417                 ret = func(sb, old_bh, xh, i, &xv, NULL, para);
6418                 if (ret) {
6419                         mlog_errno(ret);
6420                         break;
6421                 }
6422
6423                 ret = func(sb, new_bh, new_xh, j, &new_xv, &value_bh, para);
6424                 if (ret) {
6425                         mlog_errno(ret);
6426                         break;
6427                 }
6428
6429                 /*
6430                  * For the xattr which has l_tree_depth = 0, all the extent
6431                  * recs have already be copied to the new xh with the
6432                  * propriate OCFS2_EXT_REFCOUNTED flag we just need to
6433                  * increase the refount count int the refcount tree.
6434                  *
6435                  * For the xattr which has l_tree_depth > 0, we need
6436                  * to initialize it to the empty default value root,
6437                  * and then insert the extents one by one.
6438                  */
6439                 if (xv->xr_list.l_tree_depth) {
6440                         memcpy(new_xv, &def_xv, sizeof(def_xv));
6441                         vb->vb_xv = new_xv;
6442                         vb->vb_bh = value_bh;
6443                         ocfs2_init_xattr_value_extent_tree(&data_et,
6444                                         INODE_CACHE(args->new_inode), vb);
6445                 }
6446
6447                 clusters = le32_to_cpu(xv->xr_clusters);
6448                 cpos = 0;
6449                 while (cpos < clusters) {
6450                         ret = ocfs2_xattr_get_clusters(args->old_inode,
6451                                                        cpos,
6452                                                        &p_cluster,
6453                                                        &num_clusters,
6454                                                        &xv->xr_list,
6455                                                        &ext_flags);
6456                         if (ret) {
6457                                 mlog_errno(ret);
6458                                 goto out;
6459                         }
6460
6461                         BUG_ON(!p_cluster);
6462
6463                         if (xv->xr_list.l_tree_depth) {
6464                                 ret = ocfs2_insert_extent(handle,
6465                                                 &data_et, cpos,
6466                                                 ocfs2_clusters_to_blocks(
6467                                                         args->old_inode->i_sb,
6468                                                         p_cluster),
6469                                                 num_clusters, ext_flags,
6470                                                 meta_ac);
6471                                 if (ret) {
6472                                         mlog_errno(ret);
6473                                         goto out;
6474                                 }
6475                         }
6476
6477                         ret = ocfs2_increase_refcount(handle, args->ref_ci,
6478                                                       args->ref_root_bh,
6479                                                       p_cluster, num_clusters,
6480                                                       meta_ac, args->dealloc);
6481                         if (ret) {
6482                                 mlog_errno(ret);
6483                                 goto out;
6484                         }
6485
6486                         cpos += num_clusters;
6487                 }
6488         }
6489
6490 out:
6491         return ret;
6492 }
6493
6494 static int ocfs2_reflink_xattr_inline(struct ocfs2_xattr_reflink *args)
6495 {
6496         int ret = 0, credits = 0;
6497         handle_t *handle;
6498         struct ocfs2_super *osb = OCFS2_SB(args->old_inode->i_sb);
6499         struct ocfs2_dinode *di = (struct ocfs2_dinode *)args->old_bh->b_data;
6500         int inline_size = le16_to_cpu(di->i_xattr_inline_size);
6501         int header_off = osb->sb->s_blocksize - inline_size;
6502         struct ocfs2_xattr_header *xh = (struct ocfs2_xattr_header *)
6503                                         (args->old_bh->b_data + header_off);
6504         struct ocfs2_xattr_header *new_xh = (struct ocfs2_xattr_header *)
6505                                         (args->new_bh->b_data + header_off);
6506         struct ocfs2_alloc_context *meta_ac = NULL;
6507         struct ocfs2_inode_info *new_oi;
6508         struct ocfs2_dinode *new_di;
6509         struct ocfs2_xattr_value_buf vb = {
6510                 .vb_bh = args->new_bh,
6511                 .vb_access = ocfs2_journal_access_di,
6512         };
6513
6514         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6515                                                   &credits, &meta_ac);
6516         if (ret) {
6517                 mlog_errno(ret);
6518                 goto out;
6519         }
6520
6521         handle = ocfs2_start_trans(osb, credits);
6522         if (IS_ERR(handle)) {
6523                 ret = PTR_ERR(handle);
6524                 mlog_errno(ret);
6525                 goto out;
6526         }
6527
6528         ret = ocfs2_journal_access_di(handle, INODE_CACHE(args->new_inode),
6529                                       args->new_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6530         if (ret) {
6531                 mlog_errno(ret);
6532                 goto out_commit;
6533         }
6534
6535         memcpy(args->new_bh->b_data + header_off,
6536                args->old_bh->b_data + header_off, inline_size);
6537
6538         new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6539         new_di->i_xattr_inline_size = cpu_to_le16(inline_size);
6540
6541         ret = ocfs2_reflink_xattr_header(handle, args, args->old_bh, xh,
6542                                          args->new_bh, new_xh, &vb, meta_ac,
6543                                          ocfs2_get_xattr_value_root, NULL);
6544         if (ret) {
6545                 mlog_errno(ret);
6546                 goto out_commit;
6547         }
6548
6549         new_oi = OCFS2_I(args->new_inode);
6550         spin_lock(&new_oi->ip_lock);
6551         new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL | OCFS2_INLINE_XATTR_FL;
6552         new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6553         spin_unlock(&new_oi->ip_lock);
6554
6555         ocfs2_journal_dirty(handle, args->new_bh);
6556
6557 out_commit:
6558         ocfs2_commit_trans(osb, handle);
6559
6560 out:
6561         if (meta_ac)
6562                 ocfs2_free_alloc_context(meta_ac);
6563         return ret;
6564 }
6565
6566 static int ocfs2_create_empty_xattr_block(struct inode *inode,
6567                                           struct buffer_head *fe_bh,
6568                                           struct buffer_head **ret_bh,
6569                                           int indexed)
6570 {
6571         int ret;
6572         handle_t *handle;
6573         struct ocfs2_alloc_context *meta_ac;
6574         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6575
6576         ret = ocfs2_reserve_new_metadata_blocks(osb, 1, &meta_ac);
6577         if (ret < 0) {
6578                 mlog_errno(ret);
6579                 return ret;
6580         }
6581
6582         handle = ocfs2_start_trans(osb, OCFS2_XATTR_BLOCK_CREATE_CREDITS);
6583         if (IS_ERR(handle)) {
6584                 ret = PTR_ERR(handle);
6585                 mlog_errno(ret);
6586                 goto out;
6587         }
6588
6589         mlog(0, "create new xattr block for inode %llu, index = %d\n",
6590              (unsigned long long)fe_bh->b_blocknr, indexed);
6591         ret = ocfs2_create_xattr_block(handle, inode, fe_bh,
6592                                        meta_ac, ret_bh, indexed);
6593         if (ret)
6594                 mlog_errno(ret);
6595
6596         ocfs2_commit_trans(osb, handle);
6597 out:
6598         ocfs2_free_alloc_context(meta_ac);
6599         return ret;
6600 }
6601
6602 static int ocfs2_reflink_xattr_block(struct ocfs2_xattr_reflink *args,
6603                                      struct buffer_head *blk_bh,
6604                                      struct buffer_head *new_blk_bh)
6605 {
6606         int ret = 0, credits = 0;
6607         handle_t *handle;
6608         struct ocfs2_inode_info *new_oi = OCFS2_I(args->new_inode);
6609         struct ocfs2_dinode *new_di;
6610         struct ocfs2_super *osb = OCFS2_SB(args->new_inode->i_sb);
6611         int header_off = offsetof(struct ocfs2_xattr_block, xb_attrs.xb_header);
6612         struct ocfs2_xattr_block *xb =
6613                         (struct ocfs2_xattr_block *)blk_bh->b_data;
6614         struct ocfs2_xattr_header *xh = &xb->xb_attrs.xb_header;
6615         struct ocfs2_xattr_block *new_xb =
6616                         (struct ocfs2_xattr_block *)new_blk_bh->b_data;
6617         struct ocfs2_xattr_header *new_xh = &new_xb->xb_attrs.xb_header;
6618         struct ocfs2_alloc_context *meta_ac;
6619         struct ocfs2_xattr_value_buf vb = {
6620                 .vb_bh = new_blk_bh,
6621                 .vb_access = ocfs2_journal_access_xb,
6622         };
6623
6624         ret = ocfs2_reflink_lock_xattr_allocators(osb, xh, args->ref_root_bh,
6625                                                   &credits, &meta_ac);
6626         if (ret) {
6627                 mlog_errno(ret);
6628                 return ret;
6629         }
6630
6631         /* One more credits in case we need to add xattr flags in new inode. */
6632         handle = ocfs2_start_trans(osb, credits + 1);
6633         if (IS_ERR(handle)) {
6634                 ret = PTR_ERR(handle);
6635                 mlog_errno(ret);
6636                 goto out;
6637         }
6638
6639         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6640                 ret = ocfs2_journal_access_di(handle,
6641                                               INODE_CACHE(args->new_inode),
6642                                               args->new_bh,
6643                                               OCFS2_JOURNAL_ACCESS_WRITE);
6644                 if (ret) {
6645                         mlog_errno(ret);
6646                         goto out_commit;
6647                 }
6648         }
6649
6650         ret = ocfs2_journal_access_xb(handle, INODE_CACHE(args->new_inode),
6651                                       new_blk_bh, OCFS2_JOURNAL_ACCESS_WRITE);
6652         if (ret) {
6653                 mlog_errno(ret);
6654                 goto out_commit;
6655         }
6656
6657         memcpy(new_blk_bh->b_data + header_off, blk_bh->b_data + header_off,
6658                osb->sb->s_blocksize - header_off);
6659
6660         ret = ocfs2_reflink_xattr_header(handle, args, blk_bh, xh,
6661                                          new_blk_bh, new_xh, &vb, meta_ac,
6662                                          ocfs2_get_xattr_value_root, NULL);
6663         if (ret) {
6664                 mlog_errno(ret);
6665                 goto out_commit;
6666         }
6667
6668         ocfs2_journal_dirty(handle, new_blk_bh);
6669
6670         if (!(new_oi->ip_dyn_features & OCFS2_HAS_XATTR_FL)) {
6671                 new_di = (struct ocfs2_dinode *)args->new_bh->b_data;
6672                 spin_lock(&new_oi->ip_lock);
6673                 new_oi->ip_dyn_features |= OCFS2_HAS_XATTR_FL;
6674                 new_di->i_dyn_features = cpu_to_le16(new_oi->ip_dyn_features);
6675                 spin_unlock(&new_oi->ip_lock);
6676
6677                 ocfs2_journal_dirty(handle, args->new_bh);
6678         }
6679
6680 out_commit:
6681         ocfs2_commit_trans(osb, handle);
6682
6683 out:
6684         ocfs2_free_alloc_context(meta_ac);
6685         return ret;
6686 }
6687
6688 struct ocfs2_reflink_xattr_tree_args {
6689         struct ocfs2_xattr_reflink *reflink;
6690         struct buffer_head *old_blk_bh;
6691         struct buffer_head *new_blk_bh;
6692         struct ocfs2_xattr_bucket *old_bucket;
6693         struct ocfs2_xattr_bucket *new_bucket;
6694 };
6695
6696 /*
6697  * NOTE:
6698  * We have to handle the case that both old bucket and new bucket
6699  * will call this function to get the right ret_bh.
6700  * So The caller must give us the right bh.
6701  */
6702 static int ocfs2_get_reflink_xattr_value_root(struct super_block *sb,
6703                                         struct buffer_head *bh,
6704                                         struct ocfs2_xattr_header *xh,
6705                                         int offset,
6706                                         struct ocfs2_xattr_value_root **xv,
6707                                         struct buffer_head **ret_bh,
6708                                         void *para)
6709 {
6710         struct ocfs2_reflink_xattr_tree_args *args =
6711                         (struct ocfs2_reflink_xattr_tree_args *)para;
6712         struct ocfs2_xattr_bucket *bucket;
6713
6714         if (bh == args->old_bucket->bu_bhs[0])
6715                 bucket = args->old_bucket;
6716         else
6717                 bucket = args->new_bucket;
6718
6719         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6720                                                xv, ret_bh);
6721 }
6722
6723 struct ocfs2_value_tree_metas {
6724         int num_metas;
6725         int credits;
6726         int num_recs;
6727 };
6728
6729 static int ocfs2_value_tree_metas_in_bucket(struct super_block *sb,
6730                                         struct buffer_head *bh,
6731                                         struct ocfs2_xattr_header *xh,
6732                                         int offset,
6733                                         struct ocfs2_xattr_value_root **xv,
6734                                         struct buffer_head **ret_bh,
6735                                         void *para)
6736 {
6737         struct ocfs2_xattr_bucket *bucket =
6738                                 (struct ocfs2_xattr_bucket *)para;
6739
6740         return ocfs2_get_xattr_tree_value_root(sb, bucket, offset,
6741                                                xv, ret_bh);
6742 }
6743
6744 static int ocfs2_calc_value_tree_metas(struct inode *inode,
6745                                       struct ocfs2_xattr_bucket *bucket,
6746                                       void *para)
6747 {
6748         struct ocfs2_value_tree_metas *metas =
6749                         (struct ocfs2_value_tree_metas *)para;
6750         struct ocfs2_xattr_header *xh =
6751                         (struct ocfs2_xattr_header *)bucket->bu_bhs[0]->b_data;
6752
6753         /* Add the credits for this bucket first. */
6754         metas->credits += bucket->bu_blocks;
6755         return ocfs2_value_metas_in_xattr_header(inode->i_sb, bucket->bu_bhs[0],
6756                                         xh, &metas->num_metas,
6757                                         &metas->credits, &metas->num_recs,
6758                                         ocfs2_value_tree_metas_in_bucket,
6759                                         bucket);
6760 }
6761
6762 /*
6763  * Given a xattr extent rec starting from blkno and having len clusters,
6764  * iterate all the buckets calculate how much metadata we need for reflinking
6765  * all the ocfs2_xattr_value_root and lock the allocators accordingly.
6766  */
6767 static int ocfs2_lock_reflink_xattr_rec_allocators(
6768                                 struct ocfs2_reflink_xattr_tree_args *args,
6769                                 struct ocfs2_extent_tree *xt_et,
6770                                 u64 blkno, u32 len, int *credits,
6771                                 struct ocfs2_alloc_context **meta_ac,
6772                                 struct ocfs2_alloc_context **data_ac)
6773 {
6774         int ret, num_free_extents;
6775         struct ocfs2_value_tree_metas metas;
6776         struct ocfs2_super *osb = OCFS2_SB(args->reflink->old_inode->i_sb);
6777         struct ocfs2_refcount_block *rb;
6778
6779         memset(&metas, 0, sizeof(metas));
6780
6781         ret = ocfs2_iterate_xattr_buckets(args->reflink->old_inode, blkno, len,
6782                                           ocfs2_calc_value_tree_metas, &metas);
6783         if (ret) {
6784                 mlog_errno(ret);
6785                 goto out;
6786         }
6787
6788         *credits = metas.credits;
6789
6790         /*
6791          * Calculate we need for refcount tree change.
6792          *
6793          * We need to add/modify num_recs in refcount tree, so just calculate
6794          * an approximate number we need for refcount tree change.
6795          * Sometimes we need to split the tree, and after split,  half recs
6796          * will be moved to the new block, and a new block can only provide
6797          * half number of recs. So we multiple new blocks by 2.
6798          * In the end, we have to add credits for modifying the already
6799          * existed refcount block.
6800          */
6801         rb = (struct ocfs2_refcount_block *)args->reflink->ref_root_bh->b_data;
6802         metas.num_recs =
6803                 (metas.num_recs + ocfs2_refcount_recs_per_rb(osb->sb) - 1) /
6804                  ocfs2_refcount_recs_per_rb(osb->sb) * 2;
6805         metas.num_metas += metas.num_recs;
6806         *credits += metas.num_recs +
6807                     metas.num_recs * OCFS2_EXPAND_REFCOUNT_TREE_CREDITS;
6808         if (le32_to_cpu(rb->rf_flags) & OCFS2_REFCOUNT_TREE_FL)
6809                 *credits += le16_to_cpu(rb->rf_list.l_tree_depth) *
6810                             le16_to_cpu(rb->rf_list.l_next_free_rec) + 1;
6811         else
6812                 *credits += 1;
6813
6814         /* count in the xattr tree change. */
6815         num_free_extents = ocfs2_num_free_extents(osb, xt_et);
6816         if (num_free_extents < 0) {
6817                 ret = num_free_extents;
6818                 mlog_errno(ret);
6819                 goto out;
6820         }
6821
6822         if (num_free_extents < len)
6823                 metas.num_metas += ocfs2_extend_meta_needed(xt_et->et_root_el);
6824
6825         *credits += ocfs2_calc_extend_credits(osb->sb,
6826                                               xt_et->et_root_el, len);
6827
6828         if (metas.num_metas) {
6829                 ret = ocfs2_reserve_new_metadata_blocks(osb, metas.num_metas,
6830                                                         meta_ac);
6831                 if (ret) {
6832                         mlog_errno(ret);
6833                         goto out;
6834                 }
6835         }
6836
6837         if (len) {
6838                 ret = ocfs2_reserve_clusters(osb, len, data_ac);
6839                 if (ret)
6840                         mlog_errno(ret);
6841         }
6842 out:
6843         if (ret) {
6844                 if (*meta_ac) {
6845                         ocfs2_free_alloc_context(*meta_ac);
6846                         meta_ac = NULL;
6847                 }
6848         }
6849
6850         return ret;
6851 }
6852
6853 static int ocfs2_reflink_xattr_buckets(handle_t *handle,
6854                                 u64 blkno, u64 new_blkno, u32 clusters,
6855                                 struct ocfs2_alloc_context *meta_ac,
6856                                 struct ocfs2_alloc_context *data_ac,
6857                                 struct ocfs2_reflink_xattr_tree_args *args)
6858 {
6859         int i, j, ret = 0;
6860         struct super_block *sb = args->reflink->old_inode->i_sb;
6861         u32 bpc = ocfs2_xattr_buckets_per_cluster(OCFS2_SB(sb));
6862         u32 num_buckets = clusters * bpc;
6863         int bpb = args->old_bucket->bu_blocks;
6864         struct ocfs2_xattr_value_buf vb = {
6865                 .vb_access = ocfs2_journal_access,
6866         };
6867
6868         for (i = 0; i < num_buckets; i++, blkno += bpb, new_blkno += bpb) {
6869                 ret = ocfs2_read_xattr_bucket(args->old_bucket, blkno);
6870                 if (ret) {
6871                         mlog_errno(ret);
6872                         break;
6873                 }
6874
6875                 ret = ocfs2_init_xattr_bucket(args->new_bucket, new_blkno);
6876                 if (ret) {
6877                         mlog_errno(ret);
6878                         break;
6879                 }
6880
6881                 /*
6882                  * The real bucket num in this series of blocks is stored
6883                  * in the 1st bucket.
6884                  */
6885                 if (i == 0)
6886                         num_buckets = le16_to_cpu(
6887                                 bucket_xh(args->old_bucket)->xh_num_buckets);
6888
6889                 ret = ocfs2_xattr_bucket_journal_access(handle,
6890                                                 args->new_bucket,
6891                                                 OCFS2_JOURNAL_ACCESS_CREATE);
6892                 if (ret) {
6893                         mlog_errno(ret);
6894                         break;
6895                 }
6896
6897                 for (j = 0; j < bpb; j++)
6898                         memcpy(bucket_block(args->new_bucket, j),
6899                                bucket_block(args->old_bucket, j),
6900                                sb->s_blocksize);
6901
6902                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6903
6904                 ret = ocfs2_reflink_xattr_header(handle, args->reflink,
6905                                         args->old_bucket->bu_bhs[0],
6906                                         bucket_xh(args->old_bucket),
6907                                         args->new_bucket->bu_bhs[0],
6908                                         bucket_xh(args->new_bucket),
6909                                         &vb, meta_ac,
6910                                         ocfs2_get_reflink_xattr_value_root,
6911                                         args);
6912                 if (ret) {
6913                         mlog_errno(ret);
6914                         break;
6915                 }
6916
6917                 /*
6918                  * Re-access and dirty the bucket to calculate metaecc.
6919                  * Because we may extend the transaction in reflink_xattr_header
6920                  * which will let the already accessed block gone.
6921                  */
6922                 ret = ocfs2_xattr_bucket_journal_access(handle,
6923                                                 args->new_bucket,
6924                                                 OCFS2_JOURNAL_ACCESS_WRITE);
6925                 if (ret) {
6926                         mlog_errno(ret);
6927                         break;
6928                 }
6929
6930                 ocfs2_xattr_bucket_journal_dirty(handle, args->new_bucket);
6931                 ocfs2_xattr_bucket_relse(args->old_bucket);
6932                 ocfs2_xattr_bucket_relse(args->new_bucket);
6933         }
6934
6935         ocfs2_xattr_bucket_relse(args->old_bucket);
6936         ocfs2_xattr_bucket_relse(args->new_bucket);
6937         return ret;
6938 }
6939 /*
6940  * Create the same xattr extent record in the new inode's xattr tree.
6941  */
6942 static int ocfs2_reflink_xattr_rec(struct inode *inode,
6943                                    struct buffer_head *root_bh,
6944                                    u64 blkno,
6945                                    u32 cpos,
6946                                    u32 len,
6947                                    void *para)
6948 {
6949         int ret, credits = 0;
6950         u32 p_cluster, num_clusters;
6951         u64 new_blkno;
6952         handle_t *handle;
6953         struct ocfs2_reflink_xattr_tree_args *args =
6954                         (struct ocfs2_reflink_xattr_tree_args *)para;
6955         struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
6956         struct ocfs2_alloc_context *meta_ac = NULL;
6957         struct ocfs2_alloc_context *data_ac = NULL;
6958         struct ocfs2_extent_tree et;
6959
6960         ocfs2_init_xattr_tree_extent_tree(&et,
6961                                           INODE_CACHE(args->reflink->new_inode),
6962                                           args->new_blk_bh);
6963
6964         ret = ocfs2_lock_reflink_xattr_rec_allocators(args, &et, blkno,
6965                                                       len, &credits,
6966                                                       &meta_ac, &data_ac);
6967         if (ret) {
6968                 mlog_errno(ret);
6969                 goto out;
6970         }
6971
6972         handle = ocfs2_start_trans(osb, credits);
6973         if (IS_ERR(handle)) {
6974                 ret = PTR_ERR(handle);
6975                 mlog_errno(ret);
6976                 goto out;
6977         }
6978
6979         ret = ocfs2_claim_clusters(osb, handle, data_ac,
6980                                    len, &p_cluster, &num_clusters);
6981         if (ret) {
6982                 mlog_errno(ret);
6983                 goto out_commit;
6984         }
6985
6986         new_blkno = ocfs2_clusters_to_blocks(osb->sb, p_cluster);
6987
6988         mlog(0, "reflink xattr buckets %llu to %llu, len %u\n",
6989              (unsigned long long)blkno, (unsigned long long)new_blkno, len);
6990         ret = ocfs2_reflink_xattr_buckets(handle, blkno, new_blkno, len,
6991                                           meta_ac, data_ac, args);
6992         if (ret) {
6993                 mlog_errno(ret);
6994                 goto out_commit;
6995         }
6996
6997         mlog(0, "insert new xattr extent rec start %llu len %u to %u\n",
6998              (unsigned long long)new_blkno, len, cpos);
6999         ret = ocfs2_insert_extent(handle, &et, cpos, new_blkno,
7000                                   len, 0, meta_ac);
7001         if (ret)
7002                 mlog_errno(ret);
7003
7004 out_commit:
7005         ocfs2_commit_trans(osb, handle);
7006
7007 out:
7008         if (meta_ac)
7009                 ocfs2_free_alloc_context(meta_ac);
7010         if (data_ac)
7011                 ocfs2_free_alloc_context(data_ac);
7012         return ret;
7013 }
7014
7015 /*
7016  * Create reflinked xattr buckets.
7017  * We will add bucket one by one, and refcount all the xattrs in the bucket
7018  * if they are stored outside.
7019  */
7020 static int ocfs2_reflink_xattr_tree(struct ocfs2_xattr_reflink *args,
7021                                     struct buffer_head *blk_bh,
7022                                     struct buffer_head *new_blk_bh)
7023 {
7024         int ret;
7025         struct ocfs2_reflink_xattr_tree_args para;
7026
7027         memset(&para, 0, sizeof(para));
7028         para.reflink = args;
7029         para.old_blk_bh = blk_bh;
7030         para.new_blk_bh = new_blk_bh;
7031
7032         para.old_bucket = ocfs2_xattr_bucket_new(args->old_inode);
7033         if (!para.old_bucket) {
7034                 mlog_errno(-ENOMEM);
7035                 return -ENOMEM;
7036         }
7037
7038         para.new_bucket = ocfs2_xattr_bucket_new(args->new_inode);
7039         if (!para.new_bucket) {
7040                 ret = -ENOMEM;
7041                 mlog_errno(ret);
7042                 goto out;
7043         }
7044
7045         ret = ocfs2_iterate_xattr_index_block(args->old_inode, blk_bh,
7046                                               ocfs2_reflink_xattr_rec,
7047                                               &para);
7048         if (ret)
7049                 mlog_errno(ret);
7050
7051 out:
7052         ocfs2_xattr_bucket_free(para.old_bucket);
7053         ocfs2_xattr_bucket_free(para.new_bucket);
7054         return ret;
7055 }
7056
7057 static int ocfs2_reflink_xattr_in_block(struct ocfs2_xattr_reflink *args,
7058                                         struct buffer_head *blk_bh)
7059 {
7060         int ret, indexed = 0;
7061         struct buffer_head *new_blk_bh = NULL;
7062         struct ocfs2_xattr_block *xb =
7063                         (struct ocfs2_xattr_block *)blk_bh->b_data;
7064
7065
7066         if (le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED)
7067                 indexed = 1;
7068
7069         ret = ocfs2_create_empty_xattr_block(args->new_inode, args->new_bh,
7070                                              &new_blk_bh, indexed);
7071         if (ret) {
7072                 mlog_errno(ret);
7073                 goto out;
7074         }
7075
7076         if (!(le16_to_cpu(xb->xb_flags) & OCFS2_XATTR_INDEXED))
7077                 ret = ocfs2_reflink_xattr_block(args, blk_bh, new_blk_bh);
7078         else
7079                 ret = ocfs2_reflink_xattr_tree(args, blk_bh, new_blk_bh);
7080         if (ret)
7081                 mlog_errno(ret);
7082
7083 out:
7084         brelse(new_blk_bh);
7085         return ret;
7086 }
7087
7088 static int ocfs2_reflink_xattr_no_security(struct ocfs2_xattr_entry *xe)
7089 {
7090         int type = ocfs2_xattr_get_type(xe);
7091
7092         return type != OCFS2_XATTR_INDEX_SECURITY &&
7093                type != OCFS2_XATTR_INDEX_POSIX_ACL_ACCESS &&
7094                type != OCFS2_XATTR_INDEX_POSIX_ACL_DEFAULT;
7095 }
7096
7097 int ocfs2_reflink_xattrs(struct inode *old_inode,
7098                          struct buffer_head *old_bh,
7099                          struct inode *new_inode,
7100                          struct buffer_head *new_bh,
7101                          bool preserve_security)
7102 {
7103         int ret;
7104         struct ocfs2_xattr_reflink args;
7105         struct ocfs2_inode_info *oi = OCFS2_I(old_inode);
7106         struct ocfs2_dinode *di = (struct ocfs2_dinode *)old_bh->b_data;
7107         struct buffer_head *blk_bh = NULL;
7108         struct ocfs2_cached_dealloc_ctxt dealloc;
7109         struct ocfs2_refcount_tree *ref_tree;
7110         struct buffer_head *ref_root_bh = NULL;
7111
7112         ret = ocfs2_lock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7113                                        le64_to_cpu(di->i_refcount_loc),
7114                                        1, &ref_tree, &ref_root_bh);
7115         if (ret) {
7116                 mlog_errno(ret);
7117                 goto out;
7118         }
7119
7120         ocfs2_init_dealloc_ctxt(&dealloc);
7121
7122         args.old_inode = old_inode;
7123         args.new_inode = new_inode;
7124         args.old_bh = old_bh;
7125         args.new_bh = new_bh;
7126         args.ref_ci = &ref_tree->rf_ci;
7127         args.ref_root_bh = ref_root_bh;
7128         args.dealloc = &dealloc;
7129         if (preserve_security)
7130                 args.xattr_reflinked = NULL;
7131         else
7132                 args.xattr_reflinked = ocfs2_reflink_xattr_no_security;
7133
7134         if (oi->ip_dyn_features & OCFS2_INLINE_XATTR_FL) {
7135                 ret = ocfs2_reflink_xattr_inline(&args);
7136                 if (ret) {
7137                         mlog_errno(ret);
7138                         goto out_unlock;
7139                 }
7140         }
7141
7142         if (!di->i_xattr_loc)
7143                 goto out_unlock;
7144
7145         ret = ocfs2_read_xattr_block(old_inode, le64_to_cpu(di->i_xattr_loc),
7146                                      &blk_bh);
7147         if (ret < 0) {
7148                 mlog_errno(ret);
7149                 goto out_unlock;
7150         }
7151
7152         ret = ocfs2_reflink_xattr_in_block(&args, blk_bh);
7153         if (ret)
7154                 mlog_errno(ret);
7155
7156         brelse(blk_bh);
7157
7158 out_unlock:
7159         ocfs2_unlock_refcount_tree(OCFS2_SB(old_inode->i_sb),
7160                                    ref_tree, 1);
7161         brelse(ref_root_bh);
7162
7163         if (ocfs2_dealloc_has_cluster(&dealloc)) {
7164                 ocfs2_schedule_truncate_log_flush(OCFS2_SB(old_inode->i_sb), 1);
7165                 ocfs2_run_deallocs(OCFS2_SB(old_inode->i_sb), &dealloc);
7166         }
7167
7168 out:
7169         return ret;
7170 }
7171
7172 /*
7173  * Initialize security and acl for a already created inode.
7174  * Used for reflink a non-preserve-security file.
7175  *
7176  * It uses common api like ocfs2_xattr_set, so the caller
7177  * must not hold any lock expect i_mutex.
7178  */
7179 int ocfs2_init_security_and_acl(struct inode *dir,
7180                                 struct inode *inode)
7181 {
7182         int ret = 0;
7183         struct buffer_head *dir_bh = NULL;
7184         struct ocfs2_security_xattr_info si = {
7185                 .enable = 1,
7186         };
7187
7188         ret = ocfs2_init_security_get(inode, dir, &si);
7189         if (!ret) {
7190                 ret = ocfs2_xattr_set(inode, OCFS2_XATTR_INDEX_SECURITY,
7191                                       si.name, si.value, si.value_len,
7192                                       XATTR_CREATE);
7193                 if (ret) {
7194                         mlog_errno(ret);
7195                         goto leave;
7196                 }
7197         } else if (ret != -EOPNOTSUPP) {
7198                 mlog_errno(ret);
7199                 goto leave;
7200         }
7201
7202         ret = ocfs2_inode_lock(dir, &dir_bh, 0);
7203         if (ret) {
7204                 mlog_errno(ret);
7205                 goto leave;
7206         }
7207
7208         ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
7209         if (ret)
7210                 mlog_errno(ret);
7211
7212         ocfs2_inode_unlock(dir, 0);
7213         brelse(dir_bh);
7214 leave:
7215         return ret;
7216 }
7217 /*
7218  * 'security' attributes support
7219  */
7220 static size_t ocfs2_xattr_security_list(struct dentry *dentry, char *list,
7221                                         size_t list_size, const char *name,
7222                                         size_t name_len, int type)
7223 {
7224         const size_t prefix_len = XATTR_SECURITY_PREFIX_LEN;
7225         const size_t total_len = prefix_len + name_len + 1;
7226
7227         if (list && total_len <= list_size) {
7228                 memcpy(list, XATTR_SECURITY_PREFIX, prefix_len);
7229                 memcpy(list + prefix_len, name, name_len);
7230                 list[prefix_len + name_len] = '\0';
7231         }
7232         return total_len;
7233 }
7234
7235 static int ocfs2_xattr_security_get(struct dentry *dentry, const char *name,
7236                                     void *buffer, size_t size, int type)
7237 {
7238         if (strcmp(name, "") == 0)
7239                 return -EINVAL;
7240         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7241                                name, buffer, size);
7242 }
7243
7244 static int ocfs2_xattr_security_set(struct dentry *dentry, const char *name,
7245                 const void *value, size_t size, int flags, int type)
7246 {
7247         if (strcmp(name, "") == 0)
7248                 return -EINVAL;
7249
7250         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_SECURITY,
7251                                name, value, size, flags);
7252 }
7253
7254 int ocfs2_init_security_get(struct inode *inode,
7255                             struct inode *dir,
7256                             struct ocfs2_security_xattr_info *si)
7257 {
7258         /* check whether ocfs2 support feature xattr */
7259         if (!ocfs2_supports_xattr(OCFS2_SB(dir->i_sb)))
7260                 return -EOPNOTSUPP;
7261         return security_inode_init_security(inode, dir, &si->name, &si->value,
7262                                             &si->value_len);
7263 }
7264
7265 int ocfs2_init_security_set(handle_t *handle,
7266                             struct inode *inode,
7267                             struct buffer_head *di_bh,
7268                             struct ocfs2_security_xattr_info *si,
7269                             struct ocfs2_alloc_context *xattr_ac,
7270                             struct ocfs2_alloc_context *data_ac)
7271 {
7272         return ocfs2_xattr_set_handle(handle, inode, di_bh,
7273                                      OCFS2_XATTR_INDEX_SECURITY,
7274                                      si->name, si->value, si->value_len, 0,
7275                                      xattr_ac, data_ac);
7276 }
7277
7278 struct xattr_handler ocfs2_xattr_security_handler = {
7279         .prefix = XATTR_SECURITY_PREFIX,
7280         .list   = ocfs2_xattr_security_list,
7281         .get    = ocfs2_xattr_security_get,
7282         .set    = ocfs2_xattr_security_set,
7283 };
7284
7285 /*
7286  * 'trusted' attributes support
7287  */
7288 static size_t ocfs2_xattr_trusted_list(struct dentry *dentry, char *list,
7289                                        size_t list_size, const char *name,
7290                                        size_t name_len, int type)
7291 {
7292         const size_t prefix_len = XATTR_TRUSTED_PREFIX_LEN;
7293         const size_t total_len = prefix_len + name_len + 1;
7294
7295         if (list && total_len <= list_size) {
7296                 memcpy(list, XATTR_TRUSTED_PREFIX, prefix_len);
7297                 memcpy(list + prefix_len, name, name_len);
7298                 list[prefix_len + name_len] = '\0';
7299         }
7300         return total_len;
7301 }
7302
7303 static int ocfs2_xattr_trusted_get(struct dentry *dentry, const char *name,
7304                 void *buffer, size_t size, int type)
7305 {
7306         if (strcmp(name, "") == 0)
7307                 return -EINVAL;
7308         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7309                                name, buffer, size);
7310 }
7311
7312 static int ocfs2_xattr_trusted_set(struct dentry *dentry, const char *name,
7313                 const void *value, size_t size, int flags, int type)
7314 {
7315         if (strcmp(name, "") == 0)
7316                 return -EINVAL;
7317
7318         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_TRUSTED,
7319                                name, value, size, flags);
7320 }
7321
7322 struct xattr_handler ocfs2_xattr_trusted_handler = {
7323         .prefix = XATTR_TRUSTED_PREFIX,
7324         .list   = ocfs2_xattr_trusted_list,
7325         .get    = ocfs2_xattr_trusted_get,
7326         .set    = ocfs2_xattr_trusted_set,
7327 };
7328
7329 /*
7330  * 'user' attributes support
7331  */
7332 static size_t ocfs2_xattr_user_list(struct dentry *dentry, char *list,
7333                                     size_t list_size, const char *name,
7334                                     size_t name_len, int type)
7335 {
7336         const size_t prefix_len = XATTR_USER_PREFIX_LEN;
7337         const size_t total_len = prefix_len + name_len + 1;
7338         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7339
7340         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7341                 return 0;
7342
7343         if (list && total_len <= list_size) {
7344                 memcpy(list, XATTR_USER_PREFIX, prefix_len);
7345                 memcpy(list + prefix_len, name, name_len);
7346                 list[prefix_len + name_len] = '\0';
7347         }
7348         return total_len;
7349 }
7350
7351 static int ocfs2_xattr_user_get(struct dentry *dentry, const char *name,
7352                 void *buffer, size_t size, int type)
7353 {
7354         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7355
7356         if (strcmp(name, "") == 0)
7357                 return -EINVAL;
7358         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7359                 return -EOPNOTSUPP;
7360         return ocfs2_xattr_get(dentry->d_inode, OCFS2_XATTR_INDEX_USER, name,
7361                                buffer, size);
7362 }
7363
7364 static int ocfs2_xattr_user_set(struct dentry *dentry, const char *name,
7365                 const void *value, size_t size, int flags, int type)
7366 {
7367         struct ocfs2_super *osb = OCFS2_SB(dentry->d_sb);
7368
7369         if (strcmp(name, "") == 0)
7370                 return -EINVAL;
7371         if (osb->s_mount_opt & OCFS2_MOUNT_NOUSERXATTR)
7372                 return -EOPNOTSUPP;
7373
7374         return ocfs2_xattr_set(dentry->d_inode, OCFS2_XATTR_INDEX_USER,
7375                                name, value, size, flags);
7376 }
7377
7378 struct xattr_handler ocfs2_xattr_user_handler = {
7379         .prefix = XATTR_USER_PREFIX,
7380         .list   = ocfs2_xattr_user_list,
7381         .get    = ocfs2_xattr_user_get,
7382         .set    = ocfs2_xattr_user_set,
7383 };