]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/btrfs/inode.c
7869c0157a1f1dae098d3e3ff170f8cc06e5601a
[mv-sheeva.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/bio.h>
20 #include <linux/buffer_head.h>
21 #include <linux/fs.h>
22 #include <linux/pagemap.h>
23 #include <linux/highmem.h>
24 #include <linux/time.h>
25 #include <linux/init.h>
26 #include <linux/string.h>
27 #include <linux/smp_lock.h>
28 #include <linux/backing-dev.h>
29 #include <linux/mpage.h>
30 #include <linux/swap.h>
31 #include <linux/writeback.h>
32 #include <linux/statfs.h>
33 #include <linux/compat.h>
34 #include <linux/bit_spinlock.h>
35 #include <linux/version.h>
36 #include <linux/xattr.h>
37 #include "ctree.h"
38 #include "disk-io.h"
39 #include "transaction.h"
40 #include "btrfs_inode.h"
41 #include "ioctl.h"
42 #include "print-tree.h"
43 #include "volumes.h"
44
45 struct btrfs_iget_args {
46         u64 ino;
47         struct btrfs_root *root;
48 };
49
50 static struct inode_operations btrfs_dir_inode_operations;
51 static struct inode_operations btrfs_symlink_inode_operations;
52 static struct inode_operations btrfs_dir_ro_inode_operations;
53 static struct inode_operations btrfs_special_inode_operations;
54 static struct inode_operations btrfs_file_inode_operations;
55 static struct address_space_operations btrfs_aops;
56 static struct address_space_operations btrfs_symlink_aops;
57 static struct file_operations btrfs_dir_file_operations;
58 static struct extent_io_ops btrfs_extent_io_ops;
59
60 static struct kmem_cache *btrfs_inode_cachep;
61 struct kmem_cache *btrfs_trans_handle_cachep;
62 struct kmem_cache *btrfs_transaction_cachep;
63 struct kmem_cache *btrfs_bit_radix_cachep;
64 struct kmem_cache *btrfs_path_cachep;
65
66 #define S_SHIFT 12
67 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
68         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
69         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
70         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
71         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
72         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
73         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
74         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
75 };
76
77 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
78                            int for_del)
79 {
80         u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
81         u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
82         u64 thresh;
83         int ret = 0;
84
85         if (for_del)
86                 thresh = total * 90;
87         else
88                 thresh = total * 85;
89
90         do_div(thresh, 100);
91
92         spin_lock(&root->fs_info->delalloc_lock);
93         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
94                 ret = -ENOSPC;
95         spin_unlock(&root->fs_info->delalloc_lock);
96         return ret;
97 }
98
99 static int cow_file_range(struct inode *inode, u64 start, u64 end)
100 {
101         struct btrfs_root *root = BTRFS_I(inode)->root;
102         struct btrfs_trans_handle *trans;
103         u64 alloc_hint = 0;
104         u64 num_bytes;
105         u64 cur_alloc_size;
106         u64 blocksize = root->sectorsize;
107         u64 orig_start = start;
108         u64 orig_num_bytes;
109         struct btrfs_key ins;
110         int ret;
111
112         trans = btrfs_start_transaction(root, 1);
113         BUG_ON(!trans);
114         btrfs_set_trans_block_group(trans, inode);
115
116         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
117         num_bytes = max(blocksize,  num_bytes);
118         ret = btrfs_drop_extents(trans, root, inode,
119                                  start, start + num_bytes, start, &alloc_hint);
120         orig_num_bytes = num_bytes;
121
122         if (alloc_hint == EXTENT_MAP_INLINE)
123                 goto out;
124
125         while(num_bytes > 0) {
126                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
127                 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
128                                          root->sectorsize,
129                                          root->root_key.objectid,
130                                          trans->transid,
131                                          inode->i_ino, start, 0,
132                                          alloc_hint, (u64)-1, &ins, 1);
133                 if (ret) {
134                         WARN_ON(1);
135                         goto out;
136                 }
137                 cur_alloc_size = ins.offset;
138                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
139                                                start, ins.objectid, ins.offset,
140                                                ins.offset);
141                 inode->i_blocks += ins.offset >> 9;
142                 btrfs_check_file(root, inode);
143                 num_bytes -= cur_alloc_size;
144                 alloc_hint = ins.objectid + ins.offset;
145                 start += cur_alloc_size;
146         }
147         btrfs_drop_extent_cache(inode, orig_start,
148                                 orig_start + orig_num_bytes - 1);
149         btrfs_add_ordered_inode(inode);
150         btrfs_update_inode(trans, root, inode);
151 out:
152         btrfs_end_transaction(trans, root);
153         return ret;
154 }
155
156 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
157 {
158         u64 extent_start;
159         u64 extent_end;
160         u64 bytenr;
161         u64 cow_end;
162         u64 loops = 0;
163         u64 total_fs_bytes;
164         struct btrfs_root *root = BTRFS_I(inode)->root;
165         struct extent_buffer *leaf;
166         int found_type;
167         struct btrfs_path *path;
168         struct btrfs_file_extent_item *item;
169         int ret;
170         int err;
171         struct btrfs_key found_key;
172
173         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
174         path = btrfs_alloc_path();
175         BUG_ON(!path);
176 again:
177         ret = btrfs_lookup_file_extent(NULL, root, path,
178                                        inode->i_ino, start, 0);
179         if (ret < 0) {
180                 btrfs_free_path(path);
181                 return ret;
182         }
183
184         cow_end = end;
185         if (ret != 0) {
186                 if (path->slots[0] == 0)
187                         goto not_found;
188                 path->slots[0]--;
189         }
190
191         leaf = path->nodes[0];
192         item = btrfs_item_ptr(leaf, path->slots[0],
193                               struct btrfs_file_extent_item);
194
195         /* are we inside the extent that was found? */
196         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
197         found_type = btrfs_key_type(&found_key);
198         if (found_key.objectid != inode->i_ino ||
199             found_type != BTRFS_EXTENT_DATA_KEY) {
200                 goto not_found;
201         }
202
203         found_type = btrfs_file_extent_type(leaf, item);
204         extent_start = found_key.offset;
205         if (found_type == BTRFS_FILE_EXTENT_REG) {
206                 u64 extent_num_bytes;
207
208                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
209                 extent_end = extent_start + extent_num_bytes;
210                 err = 0;
211
212                 if (loops && start != extent_start)
213                         goto not_found;
214
215                 if (start < extent_start || start >= extent_end)
216                         goto not_found;
217
218                 cow_end = min(end, extent_end - 1);
219                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
220                 if (bytenr == 0)
221                         goto not_found;
222
223                 /*
224                  * we may be called by the resizer, make sure we're inside
225                  * the limits of the FS
226                  */
227                 if (bytenr + extent_num_bytes > total_fs_bytes)
228                         goto not_found;
229
230                 if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) {
231                         goto not_found;
232                 }
233
234                 start = extent_end;
235         } else {
236                 goto not_found;
237         }
238 loop:
239         if (start > end) {
240                 btrfs_free_path(path);
241                 return 0;
242         }
243         btrfs_release_path(root, path);
244         loops++;
245         goto again;
246
247 not_found:
248         cow_file_range(inode, start, cow_end);
249         start = cow_end + 1;
250         goto loop;
251 }
252
253 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
254 {
255         struct btrfs_root *root = BTRFS_I(inode)->root;
256         int ret;
257         mutex_lock(&root->fs_info->fs_mutex);
258         if (btrfs_test_opt(root, NODATACOW) ||
259             btrfs_test_flag(inode, NODATACOW))
260                 ret = run_delalloc_nocow(inode, start, end);
261         else
262                 ret = cow_file_range(inode, start, end);
263
264         mutex_unlock(&root->fs_info->fs_mutex);
265         return ret;
266 }
267
268 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
269                        unsigned long old, unsigned long bits)
270 {
271         if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
272                 struct btrfs_root *root = BTRFS_I(inode)->root;
273                 spin_lock(&root->fs_info->delalloc_lock);
274                 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
275                 root->fs_info->delalloc_bytes += end - start + 1;
276                 spin_unlock(&root->fs_info->delalloc_lock);
277         }
278         return 0;
279 }
280
281 int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
282                          unsigned long old, unsigned long bits)
283 {
284         if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
285                 struct btrfs_root *root = BTRFS_I(inode)->root;
286                 spin_lock(&root->fs_info->delalloc_lock);
287                 if (end - start + 1 > root->fs_info->delalloc_bytes) {
288                         printk("warning: delalloc account %Lu %Lu\n",
289                                end - start + 1, root->fs_info->delalloc_bytes);
290                         root->fs_info->delalloc_bytes = 0;
291                         BTRFS_I(inode)->delalloc_bytes = 0;
292                 } else {
293                         root->fs_info->delalloc_bytes -= end - start + 1;
294                         BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
295                 }
296                 spin_unlock(&root->fs_info->delalloc_lock);
297         }
298         return 0;
299 }
300
301 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
302                          size_t size, struct bio *bio)
303 {
304         struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
305         struct btrfs_mapping_tree *map_tree;
306         u64 logical = bio->bi_sector << 9;
307         u64 length = 0;
308         u64 map_length;
309         struct bio_vec *bvec;
310         int i;
311         int ret;
312
313         bio_for_each_segment(bvec, bio, i) {
314                 length += bvec->bv_len;
315         }
316         map_tree = &root->fs_info->mapping_tree;
317         map_length = length;
318         ret = btrfs_map_block(map_tree, READ, logical,
319                               &map_length, NULL, 0);
320
321         if (map_length < length + size) {
322                 return 1;
323         }
324         return 0;
325 }
326
327 int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
328                           int mirror_num)
329 {
330         struct btrfs_root *root = BTRFS_I(inode)->root;
331         struct btrfs_trans_handle *trans;
332         int ret = 0;
333
334         if (!(rw & (1 << BIO_RW))) {
335                 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
336                 BUG_ON(ret);
337                 goto mapit;
338         }
339
340         if (btrfs_test_opt(root, NODATASUM) ||
341             btrfs_test_flag(inode, NODATASUM)) {
342                 goto mapit;
343         }
344
345         mutex_lock(&root->fs_info->fs_mutex);
346         trans = btrfs_start_transaction(root, 1);
347         btrfs_set_trans_block_group(trans, inode);
348         btrfs_csum_file_blocks(trans, root, inode, bio);
349         ret = btrfs_end_transaction(trans, root);
350         BUG_ON(ret);
351         mutex_unlock(&root->fs_info->fs_mutex);
352 mapit:
353         return btrfs_map_bio(root, rw, bio, mirror_num);
354 }
355
356 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
357 {
358         int ret = 0;
359         struct inode *inode = page->mapping->host;
360         struct btrfs_root *root = BTRFS_I(inode)->root;
361         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
362         struct btrfs_csum_item *item;
363         struct btrfs_path *path = NULL;
364         u32 csum;
365         if (btrfs_test_opt(root, NODATASUM) ||
366             btrfs_test_flag(inode, NODATASUM))
367                 return 0;
368         mutex_lock(&root->fs_info->fs_mutex);
369         path = btrfs_alloc_path();
370         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
371         if (IS_ERR(item)) {
372                 ret = PTR_ERR(item);
373                 /* a csum that isn't present is a preallocated region. */
374                 if (ret == -ENOENT || ret == -EFBIG)
375                         ret = 0;
376                 csum = 0;
377                 printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start);
378                 goto out;
379         }
380         read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
381                            BTRFS_CRC32_SIZE);
382         set_state_private(io_tree, start, csum);
383 out:
384         if (path)
385                 btrfs_free_path(path);
386         mutex_unlock(&root->fs_info->fs_mutex);
387         return ret;
388 }
389
390 struct io_failure_record {
391         struct page *page;
392         u64 start;
393         u64 len;
394         u64 logical;
395         int last_mirror;
396 };
397
398 int btrfs_readpage_io_failed_hook(struct bio *failed_bio,
399                                   struct page *page, u64 start, u64 end,
400                                   struct extent_state *state)
401 {
402         struct io_failure_record *failrec = NULL;
403         u64 private;
404         struct extent_map *em;
405         struct inode *inode = page->mapping->host;
406         struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
407         struct bio *bio;
408         int num_copies;
409         int ret;
410         u64 logical;
411
412         ret = get_state_private(failure_tree, start, &private);
413         if (ret) {
414                 size_t pg_offset = start - page_offset(page);
415                 failrec = kmalloc(sizeof(*failrec), GFP_NOFS);
416                 if (!failrec)
417                         return -ENOMEM;
418                 failrec->start = start;
419                 failrec->len = end - start + 1;
420                 failrec->last_mirror = 0;
421
422                 em = btrfs_get_extent(inode, NULL, pg_offset, start,
423                                       failrec->len, 0);
424
425                 if (!em || IS_ERR(em)) {
426                         kfree(failrec);
427                         return -EIO;
428                 }
429                 logical = start - em->start;
430                 logical = em->block_start + logical;
431                 failrec->logical = logical;
432                 free_extent_map(em);
433                 set_extent_bits(failure_tree, start, end, EXTENT_LOCKED |
434                                 EXTENT_DIRTY, GFP_NOFS);
435                 set_state_private(failure_tree, start,
436                                  (u64)(unsigned long)failrec);
437         } else {
438                 failrec = (struct io_failure_record *)(unsigned long)private;
439         }
440         num_copies = btrfs_num_copies(
441                               &BTRFS_I(inode)->root->fs_info->mapping_tree,
442                               failrec->logical, failrec->len);
443         failrec->last_mirror++;
444         if (!state) {
445                 spin_lock_irq(&BTRFS_I(inode)->io_tree.lock);
446                 state = find_first_extent_bit_state(&BTRFS_I(inode)->io_tree,
447                                                     failrec->start,
448                                                     EXTENT_LOCKED);
449                 if (state && state->start != failrec->start)
450                         state = NULL;
451                 spin_unlock_irq(&BTRFS_I(inode)->io_tree.lock);
452         }
453         if (!state || failrec->last_mirror > num_copies) {
454                 set_state_private(failure_tree, failrec->start, 0);
455                 clear_extent_bits(failure_tree, failrec->start,
456                                   failrec->start + failrec->len - 1,
457                                   EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
458                 kfree(failrec);
459                 return -EIO;
460         }
461         bio = bio_alloc(GFP_NOFS, 1);
462         bio->bi_private = state;
463         bio->bi_end_io = failed_bio->bi_end_io;
464         bio->bi_sector = failrec->logical >> 9;
465         bio->bi_bdev = failed_bio->bi_bdev;
466         bio_add_page(bio, page, failrec->len, start - page_offset(page));
467         btrfs_submit_bio_hook(inode, READ, bio, failrec->last_mirror);
468         return 0;
469 }
470
471 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
472                                struct extent_state *state)
473 {
474         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
475         struct inode *inode = page->mapping->host;
476         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
477         char *kaddr;
478         u64 private = ~(u32)0;
479         int ret;
480         struct btrfs_root *root = BTRFS_I(inode)->root;
481         u32 csum = ~(u32)0;
482         unsigned long flags;
483
484         if (btrfs_test_opt(root, NODATASUM) ||
485             btrfs_test_flag(inode, NODATASUM))
486                 return 0;
487         if (state && state->start == start) {
488                 private = state->private;
489                 ret = 0;
490         } else {
491                 ret = get_state_private(io_tree, start, &private);
492         }
493         local_irq_save(flags);
494         kaddr = kmap_atomic(page, KM_IRQ0);
495         if (ret) {
496                 goto zeroit;
497         }
498         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
499         btrfs_csum_final(csum, (char *)&csum);
500         if (csum != private) {
501                 goto zeroit;
502         }
503         kunmap_atomic(kaddr, KM_IRQ0);
504         local_irq_restore(flags);
505
506         /* if the io failure tree for this inode is non-empty,
507          * check to see if we've recovered from a failed IO
508          */
509         private = 0;
510         if (count_range_bits(&BTRFS_I(inode)->io_failure_tree, &private,
511                              (u64)-1, 1, EXTENT_DIRTY)) {
512                 u64 private_failure;
513                 struct io_failure_record *failure;
514                 ret = get_state_private(&BTRFS_I(inode)->io_failure_tree,
515                                         start, &private_failure);
516                 if (ret == 0) {
517                         failure = (struct io_failure_record *)(unsigned long)
518                                    private_failure;
519                         set_state_private(&BTRFS_I(inode)->io_failure_tree,
520                                           failure->start, 0);
521                         clear_extent_bits(&BTRFS_I(inode)->io_failure_tree,
522                                           failure->start,
523                                           failure->start + failure->len - 1,
524                                           EXTENT_DIRTY | EXTENT_LOCKED,
525                                           GFP_NOFS);
526                         kfree(failure);
527                 }
528         }
529         return 0;
530
531 zeroit:
532         printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
533                page->mapping->host->i_ino, (unsigned long long)start, csum,
534                private);
535         memset(kaddr + offset, 1, end - start + 1);
536         flush_dcache_page(page);
537         kunmap_atomic(kaddr, KM_IRQ0);
538         local_irq_restore(flags);
539         return -EIO;
540 }
541
542 void btrfs_read_locked_inode(struct inode *inode)
543 {
544         struct btrfs_path *path;
545         struct extent_buffer *leaf;
546         struct btrfs_inode_item *inode_item;
547         struct btrfs_timespec *tspec;
548         struct btrfs_root *root = BTRFS_I(inode)->root;
549         struct btrfs_key location;
550         u64 alloc_group_block;
551         u32 rdev;
552         int ret;
553
554         path = btrfs_alloc_path();
555         BUG_ON(!path);
556         mutex_lock(&root->fs_info->fs_mutex);
557         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
558
559         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
560         if (ret)
561                 goto make_bad;
562
563         leaf = path->nodes[0];
564         inode_item = btrfs_item_ptr(leaf, path->slots[0],
565                                     struct btrfs_inode_item);
566
567         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
568         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
569         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
570         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
571         inode->i_size = btrfs_inode_size(leaf, inode_item);
572
573         tspec = btrfs_inode_atime(inode_item);
574         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
575         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
576
577         tspec = btrfs_inode_mtime(inode_item);
578         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
579         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
580
581         tspec = btrfs_inode_ctime(inode_item);
582         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
583         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
584
585         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
586         inode->i_generation = btrfs_inode_generation(leaf, inode_item);
587         inode->i_rdev = 0;
588         rdev = btrfs_inode_rdev(leaf, inode_item);
589
590         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
591         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
592                                                        alloc_group_block);
593         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
594         if (!BTRFS_I(inode)->block_group) {
595                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
596                                                  NULL, 0,
597                                                  BTRFS_BLOCK_GROUP_METADATA, 0);
598         }
599         btrfs_free_path(path);
600         inode_item = NULL;
601
602         mutex_unlock(&root->fs_info->fs_mutex);
603
604         switch (inode->i_mode & S_IFMT) {
605         case S_IFREG:
606                 inode->i_mapping->a_ops = &btrfs_aops;
607                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
608                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
609                 inode->i_fop = &btrfs_file_operations;
610                 inode->i_op = &btrfs_file_inode_operations;
611                 break;
612         case S_IFDIR:
613                 inode->i_fop = &btrfs_dir_file_operations;
614                 if (root == root->fs_info->tree_root)
615                         inode->i_op = &btrfs_dir_ro_inode_operations;
616                 else
617                         inode->i_op = &btrfs_dir_inode_operations;
618                 break;
619         case S_IFLNK:
620                 inode->i_op = &btrfs_symlink_inode_operations;
621                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
622                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
623                 break;
624         default:
625                 init_special_inode(inode, inode->i_mode, rdev);
626                 break;
627         }
628         return;
629
630 make_bad:
631         btrfs_release_path(root, path);
632         btrfs_free_path(path);
633         mutex_unlock(&root->fs_info->fs_mutex);
634         make_bad_inode(inode);
635 }
636
637 static void fill_inode_item(struct extent_buffer *leaf,
638                             struct btrfs_inode_item *item,
639                             struct inode *inode)
640 {
641         btrfs_set_inode_uid(leaf, item, inode->i_uid);
642         btrfs_set_inode_gid(leaf, item, inode->i_gid);
643         btrfs_set_inode_size(leaf, item, inode->i_size);
644         btrfs_set_inode_mode(leaf, item, inode->i_mode);
645         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
646
647         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
648                                inode->i_atime.tv_sec);
649         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
650                                 inode->i_atime.tv_nsec);
651
652         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
653                                inode->i_mtime.tv_sec);
654         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
655                                 inode->i_mtime.tv_nsec);
656
657         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
658                                inode->i_ctime.tv_sec);
659         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
660                                 inode->i_ctime.tv_nsec);
661
662         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
663         btrfs_set_inode_generation(leaf, item, inode->i_generation);
664         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
665         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
666         btrfs_set_inode_block_group(leaf, item,
667                                     BTRFS_I(inode)->block_group->key.objectid);
668 }
669
670 int btrfs_update_inode(struct btrfs_trans_handle *trans,
671                               struct btrfs_root *root,
672                               struct inode *inode)
673 {
674         struct btrfs_inode_item *inode_item;
675         struct btrfs_path *path;
676         struct extent_buffer *leaf;
677         int ret;
678
679         path = btrfs_alloc_path();
680         BUG_ON(!path);
681         ret = btrfs_lookup_inode(trans, root, path,
682                                  &BTRFS_I(inode)->location, 1);
683         if (ret) {
684                 if (ret > 0)
685                         ret = -ENOENT;
686                 goto failed;
687         }
688
689         leaf = path->nodes[0];
690         inode_item = btrfs_item_ptr(leaf, path->slots[0],
691                                   struct btrfs_inode_item);
692
693         fill_inode_item(leaf, inode_item, inode);
694         btrfs_mark_buffer_dirty(leaf);
695         btrfs_set_inode_last_trans(trans, inode);
696         ret = 0;
697 failed:
698         btrfs_release_path(root, path);
699         btrfs_free_path(path);
700         return ret;
701 }
702
703
704 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
705                               struct btrfs_root *root,
706                               struct inode *dir,
707                               struct dentry *dentry)
708 {
709         struct btrfs_path *path;
710         const char *name = dentry->d_name.name;
711         int name_len = dentry->d_name.len;
712         int ret = 0;
713         struct extent_buffer *leaf;
714         struct btrfs_dir_item *di;
715         struct btrfs_key key;
716
717         path = btrfs_alloc_path();
718         if (!path) {
719                 ret = -ENOMEM;
720                 goto err;
721         }
722
723         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
724                                     name, name_len, -1);
725         if (IS_ERR(di)) {
726                 ret = PTR_ERR(di);
727                 goto err;
728         }
729         if (!di) {
730                 ret = -ENOENT;
731                 goto err;
732         }
733         leaf = path->nodes[0];
734         btrfs_dir_item_key_to_cpu(leaf, di, &key);
735         ret = btrfs_delete_one_dir_name(trans, root, path, di);
736         if (ret)
737                 goto err;
738         btrfs_release_path(root, path);
739
740         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
741                                          key.objectid, name, name_len, -1);
742         if (IS_ERR(di)) {
743                 ret = PTR_ERR(di);
744                 goto err;
745         }
746         if (!di) {
747                 ret = -ENOENT;
748                 goto err;
749         }
750         ret = btrfs_delete_one_dir_name(trans, root, path, di);
751
752         dentry->d_inode->i_ctime = dir->i_ctime;
753         ret = btrfs_del_inode_ref(trans, root, name, name_len,
754                                   dentry->d_inode->i_ino,
755                                   dentry->d_parent->d_inode->i_ino);
756         if (ret) {
757                 printk("failed to delete reference to %.*s, "
758                        "inode %lu parent %lu\n", name_len, name,
759                        dentry->d_inode->i_ino,
760                        dentry->d_parent->d_inode->i_ino);
761         }
762 err:
763         btrfs_free_path(path);
764         if (!ret) {
765                 dir->i_size -= name_len * 2;
766                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
767                 btrfs_update_inode(trans, root, dir);
768 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
769                 dentry->d_inode->i_nlink--;
770 #else
771                 drop_nlink(dentry->d_inode);
772 #endif
773                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
774                 dir->i_sb->s_dirt = 1;
775         }
776         return ret;
777 }
778
779 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
780 {
781         struct btrfs_root *root;
782         struct btrfs_trans_handle *trans;
783         struct inode *inode = dentry->d_inode;
784         int ret;
785         unsigned long nr = 0;
786
787         root = BTRFS_I(dir)->root;
788         mutex_lock(&root->fs_info->fs_mutex);
789
790         ret = btrfs_check_free_space(root, 1, 1);
791         if (ret)
792                 goto fail;
793
794         trans = btrfs_start_transaction(root, 1);
795
796         btrfs_set_trans_block_group(trans, dir);
797         ret = btrfs_unlink_trans(trans, root, dir, dentry);
798         nr = trans->blocks_used;
799
800         if (inode->i_nlink == 0) {
801                 int found;
802                 /* if the inode isn't linked anywhere,
803                  * we don't need to worry about
804                  * data=ordered
805                  */
806                 found = btrfs_del_ordered_inode(inode);
807                 if (found == 1) {
808                         atomic_dec(&inode->i_count);
809                 }
810         }
811
812         btrfs_end_transaction(trans, root);
813 fail:
814         mutex_unlock(&root->fs_info->fs_mutex);
815         btrfs_btree_balance_dirty(root, nr);
816         btrfs_throttle(root);
817         return ret;
818 }
819
820 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
821 {
822         struct inode *inode = dentry->d_inode;
823         int err = 0;
824         int ret;
825         struct btrfs_root *root = BTRFS_I(dir)->root;
826         struct btrfs_trans_handle *trans;
827         unsigned long nr = 0;
828
829         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
830                 return -ENOTEMPTY;
831
832         mutex_lock(&root->fs_info->fs_mutex);
833         ret = btrfs_check_free_space(root, 1, 1);
834         if (ret)
835                 goto fail;
836
837         trans = btrfs_start_transaction(root, 1);
838         btrfs_set_trans_block_group(trans, dir);
839
840         /* now the directory is empty */
841         err = btrfs_unlink_trans(trans, root, dir, dentry);
842         if (!err) {
843                 inode->i_size = 0;
844         }
845
846         nr = trans->blocks_used;
847         ret = btrfs_end_transaction(trans, root);
848 fail:
849         mutex_unlock(&root->fs_info->fs_mutex);
850         btrfs_btree_balance_dirty(root, nr);
851         btrfs_throttle(root);
852
853         if (ret && !err)
854                 err = ret;
855         return err;
856 }
857
858 /*
859  * this can truncate away extent items, csum items and directory items.
860  * It starts at a high offset and removes keys until it can't find
861  * any higher than i_size.
862  *
863  * csum items that cross the new i_size are truncated to the new size
864  * as well.
865  */
866 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
867                                    struct btrfs_root *root,
868                                    struct inode *inode,
869                                    u32 min_type)
870 {
871         int ret;
872         struct btrfs_path *path;
873         struct btrfs_key key;
874         struct btrfs_key found_key;
875         u32 found_type;
876         struct extent_buffer *leaf;
877         struct btrfs_file_extent_item *fi;
878         u64 extent_start = 0;
879         u64 extent_num_bytes = 0;
880         u64 item_end = 0;
881         u64 root_gen = 0;
882         u64 root_owner = 0;
883         int found_extent;
884         int del_item;
885         int pending_del_nr = 0;
886         int pending_del_slot = 0;
887         int extent_type = -1;
888
889         btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
890         path = btrfs_alloc_path();
891         path->reada = -1;
892         BUG_ON(!path);
893
894         /* FIXME, add redo link to tree so we don't leak on crash */
895         key.objectid = inode->i_ino;
896         key.offset = (u64)-1;
897         key.type = (u8)-1;
898
899         btrfs_init_path(path);
900 search_again:
901         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
902         if (ret < 0) {
903                 goto error;
904         }
905         if (ret > 0) {
906                 BUG_ON(path->slots[0] == 0);
907                 path->slots[0]--;
908         }
909
910         while(1) {
911                 fi = NULL;
912                 leaf = path->nodes[0];
913                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
914                 found_type = btrfs_key_type(&found_key);
915
916                 if (found_key.objectid != inode->i_ino)
917                         break;
918
919                 if (found_type < min_type)
920                         break;
921
922                 item_end = found_key.offset;
923                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
924                         fi = btrfs_item_ptr(leaf, path->slots[0],
925                                             struct btrfs_file_extent_item);
926                         extent_type = btrfs_file_extent_type(leaf, fi);
927                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
928                                 item_end +=
929                                     btrfs_file_extent_num_bytes(leaf, fi);
930                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
931                                 struct btrfs_item *item = btrfs_item_nr(leaf,
932                                                                 path->slots[0]);
933                                 item_end += btrfs_file_extent_inline_len(leaf,
934                                                                          item);
935                         }
936                         item_end--;
937                 }
938                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
939                         ret = btrfs_csum_truncate(trans, root, path,
940                                                   inode->i_size);
941                         BUG_ON(ret);
942                 }
943                 if (item_end < inode->i_size) {
944                         if (found_type == BTRFS_DIR_ITEM_KEY) {
945                                 found_type = BTRFS_INODE_ITEM_KEY;
946                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
947                                 found_type = BTRFS_CSUM_ITEM_KEY;
948                         } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
949                                 found_type = BTRFS_XATTR_ITEM_KEY;
950                         } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
951                                 found_type = BTRFS_INODE_REF_KEY;
952                         } else if (found_type) {
953                                 found_type--;
954                         } else {
955                                 break;
956                         }
957                         btrfs_set_key_type(&key, found_type);
958                         goto next;
959                 }
960                 if (found_key.offset >= inode->i_size)
961                         del_item = 1;
962                 else
963                         del_item = 0;
964                 found_extent = 0;
965
966                 /* FIXME, shrink the extent if the ref count is only 1 */
967                 if (found_type != BTRFS_EXTENT_DATA_KEY)
968                         goto delete;
969
970                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
971                         u64 num_dec;
972                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
973                         if (!del_item) {
974                                 u64 orig_num_bytes =
975                                         btrfs_file_extent_num_bytes(leaf, fi);
976                                 extent_num_bytes = inode->i_size -
977                                         found_key.offset + root->sectorsize - 1;
978                                 extent_num_bytes = extent_num_bytes &
979                                         ~((u64)root->sectorsize - 1);
980                                 btrfs_set_file_extent_num_bytes(leaf, fi,
981                                                          extent_num_bytes);
982                                 num_dec = (orig_num_bytes -
983                                            extent_num_bytes);
984                                 if (extent_start != 0)
985                                         dec_i_blocks(inode, num_dec);
986                                 btrfs_mark_buffer_dirty(leaf);
987                         } else {
988                                 extent_num_bytes =
989                                         btrfs_file_extent_disk_num_bytes(leaf,
990                                                                          fi);
991                                 /* FIXME blocksize != 4096 */
992                                 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
993                                 if (extent_start != 0) {
994                                         found_extent = 1;
995                                         dec_i_blocks(inode, num_dec);
996                                 }
997                                 root_gen = btrfs_header_generation(leaf);
998                                 root_owner = btrfs_header_owner(leaf);
999                         }
1000                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
1001                         if (!del_item) {
1002                                 u32 newsize = inode->i_size - found_key.offset;
1003                                 dec_i_blocks(inode, item_end + 1 -
1004                                             found_key.offset - newsize);
1005                                 newsize =
1006                                     btrfs_file_extent_calc_inline_size(newsize);
1007                                 ret = btrfs_truncate_item(trans, root, path,
1008                                                           newsize, 1);
1009                                 BUG_ON(ret);
1010                         } else {
1011                                 dec_i_blocks(inode, item_end + 1 -
1012                                              found_key.offset);
1013                         }
1014                 }
1015 delete:
1016                 if (del_item) {
1017                         if (!pending_del_nr) {
1018                                 /* no pending yet, add ourselves */
1019                                 pending_del_slot = path->slots[0];
1020                                 pending_del_nr = 1;
1021                         } else if (pending_del_nr &&
1022                                    path->slots[0] + 1 == pending_del_slot) {
1023                                 /* hop on the pending chunk */
1024                                 pending_del_nr++;
1025                                 pending_del_slot = path->slots[0];
1026                         } else {
1027                                 printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
1028                         }
1029                 } else {
1030                         break;
1031                 }
1032                 if (found_extent) {
1033                         ret = btrfs_free_extent(trans, root, extent_start,
1034                                                 extent_num_bytes,
1035                                                 root_owner,
1036                                                 root_gen, inode->i_ino,
1037                                                 found_key.offset, 0);
1038                         BUG_ON(ret);
1039                 }
1040 next:
1041                 if (path->slots[0] == 0) {
1042                         if (pending_del_nr)
1043                                 goto del_pending;
1044                         btrfs_release_path(root, path);
1045                         goto search_again;
1046                 }
1047
1048                 path->slots[0]--;
1049                 if (pending_del_nr &&
1050                     path->slots[0] + 1 != pending_del_slot) {
1051                         struct btrfs_key debug;
1052 del_pending:
1053                         btrfs_item_key_to_cpu(path->nodes[0], &debug,
1054                                               pending_del_slot);
1055                         ret = btrfs_del_items(trans, root, path,
1056                                               pending_del_slot,
1057                                               pending_del_nr);
1058                         BUG_ON(ret);
1059                         pending_del_nr = 0;
1060                         btrfs_release_path(root, path);
1061                         goto search_again;
1062                 }
1063         }
1064         ret = 0;
1065 error:
1066         if (pending_del_nr) {
1067                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
1068                                       pending_del_nr);
1069         }
1070         btrfs_release_path(root, path);
1071         btrfs_free_path(path);
1072         inode->i_sb->s_dirt = 1;
1073         return ret;
1074 }
1075
1076 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
1077                               size_t zero_start)
1078 {
1079         char *kaddr;
1080         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1081         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1082         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
1083         int ret = 0;
1084
1085         WARN_ON(!PageLocked(page));
1086         set_page_extent_mapped(page);
1087
1088         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
1089         set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
1090                             page_end, GFP_NOFS);
1091
1092         if (zero_start != PAGE_CACHE_SIZE) {
1093                 kaddr = kmap(page);
1094                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
1095                 flush_dcache_page(page);
1096                 kunmap(page);
1097         }
1098         set_page_dirty(page);
1099         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
1100
1101         return ret;
1102 }
1103
1104 /*
1105  * taken from block_truncate_page, but does cow as it zeros out
1106  * any bytes left in the last page in the file.
1107  */
1108 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
1109 {
1110         struct inode *inode = mapping->host;
1111         struct btrfs_root *root = BTRFS_I(inode)->root;
1112         u32 blocksize = root->sectorsize;
1113         pgoff_t index = from >> PAGE_CACHE_SHIFT;
1114         unsigned offset = from & (PAGE_CACHE_SIZE-1);
1115         struct page *page;
1116         int ret = 0;
1117         u64 page_start;
1118
1119         if ((offset & (blocksize - 1)) == 0)
1120                 goto out;
1121
1122         ret = -ENOMEM;
1123         page = grab_cache_page(mapping, index);
1124         if (!page)
1125                 goto out;
1126         if (!PageUptodate(page)) {
1127                 ret = btrfs_readpage(NULL, page);
1128                 lock_page(page);
1129                 if (!PageUptodate(page)) {
1130                         ret = -EIO;
1131                         goto out;
1132                 }
1133         }
1134         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1135
1136         ret = btrfs_cow_one_page(inode, page, offset);
1137
1138         unlock_page(page);
1139         page_cache_release(page);
1140 out:
1141         return ret;
1142 }
1143
1144 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1145 {
1146         struct inode *inode = dentry->d_inode;
1147         int err;
1148
1149         err = inode_change_ok(inode, attr);
1150         if (err)
1151                 return err;
1152
1153         if (S_ISREG(inode->i_mode) &&
1154             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
1155                 struct btrfs_trans_handle *trans;
1156                 struct btrfs_root *root = BTRFS_I(inode)->root;
1157                 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1158
1159                 u64 mask = root->sectorsize - 1;
1160                 u64 hole_start = (inode->i_size + mask) & ~mask;
1161                 u64 block_end = (attr->ia_size + mask) & ~mask;
1162                 u64 hole_size;
1163                 u64 alloc_hint = 0;
1164
1165                 if (attr->ia_size <= hole_start)
1166                         goto out;
1167
1168                 mutex_lock(&root->fs_info->fs_mutex);
1169                 err = btrfs_check_free_space(root, 1, 0);
1170                 mutex_unlock(&root->fs_info->fs_mutex);
1171                 if (err)
1172                         goto fail;
1173
1174                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1175
1176                 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1177                 hole_size = block_end - hole_start;
1178
1179                 mutex_lock(&root->fs_info->fs_mutex);
1180                 trans = btrfs_start_transaction(root, 1);
1181                 btrfs_set_trans_block_group(trans, inode);
1182                 err = btrfs_drop_extents(trans, root, inode,
1183                                          hole_start, block_end, hole_start,
1184                                          &alloc_hint);
1185
1186                 if (alloc_hint != EXTENT_MAP_INLINE) {
1187                         err = btrfs_insert_file_extent(trans, root,
1188                                                        inode->i_ino,
1189                                                        hole_start, 0, 0,
1190                                                        hole_size);
1191                         btrfs_drop_extent_cache(inode, hole_start,
1192                                                 hole_size - 1);
1193                         btrfs_check_file(root, inode);
1194                 }
1195                 btrfs_end_transaction(trans, root);
1196                 mutex_unlock(&root->fs_info->fs_mutex);
1197                 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1198                 if (err)
1199                         return err;
1200         }
1201 out:
1202         err = inode_setattr(inode, attr);
1203 fail:
1204         return err;
1205 }
1206
1207 void btrfs_put_inode(struct inode *inode)
1208 {
1209         int ret;
1210
1211         if (!BTRFS_I(inode)->ordered_trans) {
1212                 return;
1213         }
1214
1215         if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
1216             mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1217                 return;
1218
1219         ret = btrfs_del_ordered_inode(inode);
1220         if (ret == 1) {
1221                 atomic_dec(&inode->i_count);
1222         }
1223 }
1224
1225 void btrfs_delete_inode(struct inode *inode)
1226 {
1227         struct btrfs_trans_handle *trans;
1228         struct btrfs_root *root = BTRFS_I(inode)->root;
1229         unsigned long nr;
1230         int ret;
1231
1232         truncate_inode_pages(&inode->i_data, 0);
1233         if (is_bad_inode(inode)) {
1234                 goto no_delete;
1235         }
1236
1237         inode->i_size = 0;
1238         mutex_lock(&root->fs_info->fs_mutex);
1239         trans = btrfs_start_transaction(root, 1);
1240
1241         btrfs_set_trans_block_group(trans, inode);
1242         ret = btrfs_truncate_in_trans(trans, root, inode, 0);
1243         if (ret)
1244                 goto no_delete_lock;
1245
1246         nr = trans->blocks_used;
1247         clear_inode(inode);
1248
1249         btrfs_end_transaction(trans, root);
1250         mutex_unlock(&root->fs_info->fs_mutex);
1251         btrfs_btree_balance_dirty(root, nr);
1252         btrfs_throttle(root);
1253         return;
1254
1255 no_delete_lock:
1256         nr = trans->blocks_used;
1257         btrfs_end_transaction(trans, root);
1258         mutex_unlock(&root->fs_info->fs_mutex);
1259         btrfs_btree_balance_dirty(root, nr);
1260         btrfs_throttle(root);
1261 no_delete:
1262         clear_inode(inode);
1263 }
1264
1265 /*
1266  * this returns the key found in the dir entry in the location pointer.
1267  * If no dir entries were found, location->objectid is 0.
1268  */
1269 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1270                                struct btrfs_key *location)
1271 {
1272         const char *name = dentry->d_name.name;
1273         int namelen = dentry->d_name.len;
1274         struct btrfs_dir_item *di;
1275         struct btrfs_path *path;
1276         struct btrfs_root *root = BTRFS_I(dir)->root;
1277         int ret = 0;
1278
1279         if (namelen == 1 && strcmp(name, ".") == 0) {
1280                 location->objectid = dir->i_ino;
1281                 location->type = BTRFS_INODE_ITEM_KEY;
1282                 location->offset = 0;
1283                 return 0;
1284         }
1285         path = btrfs_alloc_path();
1286         BUG_ON(!path);
1287
1288         if (namelen == 2 && strcmp(name, "..") == 0) {
1289                 struct btrfs_key key;
1290                 struct extent_buffer *leaf;
1291                 u32 nritems;
1292                 int slot;
1293
1294                 key.objectid = dir->i_ino;
1295                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1296                 key.offset = 0;
1297                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1298                 BUG_ON(ret == 0);
1299                 ret = 0;
1300
1301                 leaf = path->nodes[0];
1302                 slot = path->slots[0];
1303                 nritems = btrfs_header_nritems(leaf);
1304                 if (slot >= nritems)
1305                         goto out_err;
1306
1307                 btrfs_item_key_to_cpu(leaf, &key, slot);
1308                 if (key.objectid != dir->i_ino ||
1309                     key.type != BTRFS_INODE_REF_KEY) {
1310                         goto out_err;
1311                 }
1312                 location->objectid = key.offset;
1313                 location->type = BTRFS_INODE_ITEM_KEY;
1314                 location->offset = 0;
1315                 goto out;
1316         }
1317
1318         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1319                                     namelen, 0);
1320         if (IS_ERR(di))
1321                 ret = PTR_ERR(di);
1322         if (!di || IS_ERR(di)) {
1323                 goto out_err;
1324         }
1325         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1326 out:
1327         btrfs_free_path(path);
1328         return ret;
1329 out_err:
1330         location->objectid = 0;
1331         goto out;
1332 }
1333
1334 /*
1335  * when we hit a tree root in a directory, the btrfs part of the inode
1336  * needs to be changed to reflect the root directory of the tree root.  This
1337  * is kind of like crossing a mount point.
1338  */
1339 static int fixup_tree_root_location(struct btrfs_root *root,
1340                              struct btrfs_key *location,
1341                              struct btrfs_root **sub_root,
1342                              struct dentry *dentry)
1343 {
1344         struct btrfs_path *path;
1345         struct btrfs_root_item *ri;
1346
1347         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1348                 return 0;
1349         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1350                 return 0;
1351
1352         path = btrfs_alloc_path();
1353         BUG_ON(!path);
1354         mutex_lock(&root->fs_info->fs_mutex);
1355
1356         *sub_root = btrfs_read_fs_root(root->fs_info, location,
1357                                         dentry->d_name.name,
1358                                         dentry->d_name.len);
1359         if (IS_ERR(*sub_root))
1360                 return PTR_ERR(*sub_root);
1361
1362         ri = &(*sub_root)->root_item;
1363         location->objectid = btrfs_root_dirid(ri);
1364         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1365         location->offset = 0;
1366
1367         btrfs_free_path(path);
1368         mutex_unlock(&root->fs_info->fs_mutex);
1369         return 0;
1370 }
1371
1372 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1373 {
1374         struct btrfs_iget_args *args = p;
1375         inode->i_ino = args->ino;
1376         BTRFS_I(inode)->root = args->root;
1377         BTRFS_I(inode)->delalloc_bytes = 0;
1378         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1379         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1380                              inode->i_mapping, GFP_NOFS);
1381         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1382                              inode->i_mapping, GFP_NOFS);
1383         return 0;
1384 }
1385
1386 static int btrfs_find_actor(struct inode *inode, void *opaque)
1387 {
1388         struct btrfs_iget_args *args = opaque;
1389         return (args->ino == inode->i_ino &&
1390                 args->root == BTRFS_I(inode)->root);
1391 }
1392
1393 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1394                             u64 root_objectid)
1395 {
1396         struct btrfs_iget_args args;
1397         args.ino = objectid;
1398         args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1399
1400         if (!args.root)
1401                 return NULL;
1402
1403         return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1404 }
1405
1406 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1407                                 struct btrfs_root *root)
1408 {
1409         struct inode *inode;
1410         struct btrfs_iget_args args;
1411         args.ino = objectid;
1412         args.root = root;
1413
1414         inode = iget5_locked(s, objectid, btrfs_find_actor,
1415                              btrfs_init_locked_inode,
1416                              (void *)&args);
1417         return inode;
1418 }
1419
1420 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1421                                    struct nameidata *nd)
1422 {
1423         struct inode * inode;
1424         struct btrfs_inode *bi = BTRFS_I(dir);
1425         struct btrfs_root *root = bi->root;
1426         struct btrfs_root *sub_root = root;
1427         struct btrfs_key location;
1428         int ret;
1429
1430         if (dentry->d_name.len > BTRFS_NAME_LEN)
1431                 return ERR_PTR(-ENAMETOOLONG);
1432
1433         mutex_lock(&root->fs_info->fs_mutex);
1434         ret = btrfs_inode_by_name(dir, dentry, &location);
1435         mutex_unlock(&root->fs_info->fs_mutex);
1436
1437         if (ret < 0)
1438                 return ERR_PTR(ret);
1439
1440         inode = NULL;
1441         if (location.objectid) {
1442                 ret = fixup_tree_root_location(root, &location, &sub_root,
1443                                                 dentry);
1444                 if (ret < 0)
1445                         return ERR_PTR(ret);
1446                 if (ret > 0)
1447                         return ERR_PTR(-ENOENT);
1448                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1449                                           sub_root);
1450                 if (!inode)
1451                         return ERR_PTR(-EACCES);
1452                 if (inode->i_state & I_NEW) {
1453                         /* the inode and parent dir are two different roots */
1454                         if (sub_root != root) {
1455                                 igrab(inode);
1456                                 sub_root->inode = inode;
1457                         }
1458                         BTRFS_I(inode)->root = sub_root;
1459                         memcpy(&BTRFS_I(inode)->location, &location,
1460                                sizeof(location));
1461                         btrfs_read_locked_inode(inode);
1462                         unlock_new_inode(inode);
1463                 }
1464         }
1465         return d_splice_alias(inode, dentry);
1466 }
1467
1468 static unsigned char btrfs_filetype_table[] = {
1469         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1470 };
1471
1472 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1473 {
1474         struct inode *inode = filp->f_dentry->d_inode;
1475         struct btrfs_root *root = BTRFS_I(inode)->root;
1476         struct btrfs_item *item;
1477         struct btrfs_dir_item *di;
1478         struct btrfs_key key;
1479         struct btrfs_key found_key;
1480         struct btrfs_path *path;
1481         int ret;
1482         u32 nritems;
1483         struct extent_buffer *leaf;
1484         int slot;
1485         int advance;
1486         unsigned char d_type;
1487         int over = 0;
1488         u32 di_cur;
1489         u32 di_total;
1490         u32 di_len;
1491         int key_type = BTRFS_DIR_INDEX_KEY;
1492         char tmp_name[32];
1493         char *name_ptr;
1494         int name_len;
1495
1496         /* FIXME, use a real flag for deciding about the key type */
1497         if (root->fs_info->tree_root == root)
1498                 key_type = BTRFS_DIR_ITEM_KEY;
1499
1500         /* special case for "." */
1501         if (filp->f_pos == 0) {
1502                 over = filldir(dirent, ".", 1,
1503                                1, inode->i_ino,
1504                                DT_DIR);
1505                 if (over)
1506                         return 0;
1507                 filp->f_pos = 1;
1508         }
1509
1510         mutex_lock(&root->fs_info->fs_mutex);
1511         key.objectid = inode->i_ino;
1512         path = btrfs_alloc_path();
1513         path->reada = 2;
1514
1515         /* special case for .., just use the back ref */
1516         if (filp->f_pos == 1) {
1517                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1518                 key.offset = 0;
1519                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1520                 BUG_ON(ret == 0);
1521                 leaf = path->nodes[0];
1522                 slot = path->slots[0];
1523                 nritems = btrfs_header_nritems(leaf);
1524                 if (slot >= nritems) {
1525                         btrfs_release_path(root, path);
1526                         goto read_dir_items;
1527                 }
1528                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1529                 btrfs_release_path(root, path);
1530                 if (found_key.objectid != key.objectid ||
1531                     found_key.type != BTRFS_INODE_REF_KEY)
1532                         goto read_dir_items;
1533                 over = filldir(dirent, "..", 2,
1534                                2, found_key.offset, DT_DIR);
1535                 if (over)
1536                         goto nopos;
1537                 filp->f_pos = 2;
1538         }
1539
1540 read_dir_items:
1541         btrfs_set_key_type(&key, key_type);
1542         key.offset = filp->f_pos;
1543
1544         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1545         if (ret < 0)
1546                 goto err;
1547         advance = 0;
1548         while(1) {
1549                 leaf = path->nodes[0];
1550                 nritems = btrfs_header_nritems(leaf);
1551                 slot = path->slots[0];
1552                 if (advance || slot >= nritems) {
1553                         if (slot >= nritems -1) {
1554                                 ret = btrfs_next_leaf(root, path);
1555                                 if (ret)
1556                                         break;
1557                                 leaf = path->nodes[0];
1558                                 nritems = btrfs_header_nritems(leaf);
1559                                 slot = path->slots[0];
1560                         } else {
1561                                 slot++;
1562                                 path->slots[0]++;
1563                         }
1564                 }
1565                 advance = 1;
1566                 item = btrfs_item_nr(leaf, slot);
1567                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1568
1569                 if (found_key.objectid != key.objectid)
1570                         break;
1571                 if (btrfs_key_type(&found_key) != key_type)
1572                         break;
1573                 if (found_key.offset < filp->f_pos)
1574                         continue;
1575
1576                 filp->f_pos = found_key.offset;
1577                 advance = 1;
1578                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1579                 di_cur = 0;
1580                 di_total = btrfs_item_size(leaf, item);
1581                 while(di_cur < di_total) {
1582                         struct btrfs_key location;
1583
1584                         name_len = btrfs_dir_name_len(leaf, di);
1585                         if (name_len < 32) {
1586                                 name_ptr = tmp_name;
1587                         } else {
1588                                 name_ptr = kmalloc(name_len, GFP_NOFS);
1589                                 BUG_ON(!name_ptr);
1590                         }
1591                         read_extent_buffer(leaf, name_ptr,
1592                                            (unsigned long)(di + 1), name_len);
1593
1594                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1595                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
1596                         over = filldir(dirent, name_ptr, name_len,
1597                                        found_key.offset,
1598                                        location.objectid,
1599                                        d_type);
1600
1601                         if (name_ptr != tmp_name)
1602                                 kfree(name_ptr);
1603
1604                         if (over)
1605                                 goto nopos;
1606                         di_len = btrfs_dir_name_len(leaf, di) +
1607                                 btrfs_dir_data_len(leaf, di) +sizeof(*di);
1608                         di_cur += di_len;
1609                         di = (struct btrfs_dir_item *)((char *)di + di_len);
1610                 }
1611         }
1612         if (key_type == BTRFS_DIR_INDEX_KEY)
1613                 filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
1614         else
1615                 filp->f_pos++;
1616 nopos:
1617         ret = 0;
1618 err:
1619         btrfs_release_path(root, path);
1620         btrfs_free_path(path);
1621         mutex_unlock(&root->fs_info->fs_mutex);
1622         return ret;
1623 }
1624
1625 int btrfs_write_inode(struct inode *inode, int wait)
1626 {
1627         struct btrfs_root *root = BTRFS_I(inode)->root;
1628         struct btrfs_trans_handle *trans;
1629         int ret = 0;
1630
1631         if (wait) {
1632                 mutex_lock(&root->fs_info->fs_mutex);
1633                 trans = btrfs_start_transaction(root, 1);
1634                 btrfs_set_trans_block_group(trans, inode);
1635                 ret = btrfs_commit_transaction(trans, root);
1636                 mutex_unlock(&root->fs_info->fs_mutex);
1637         }
1638         return ret;
1639 }
1640
1641 /*
1642  * This is somewhat expensive, updating the tree every time the
1643  * inode changes.  But, it is most likely to find the inode in cache.
1644  * FIXME, needs more benchmarking...there are no reasons other than performance
1645  * to keep or drop this code.
1646  */
1647 void btrfs_dirty_inode(struct inode *inode)
1648 {
1649         struct btrfs_root *root = BTRFS_I(inode)->root;
1650         struct btrfs_trans_handle *trans;
1651
1652         mutex_lock(&root->fs_info->fs_mutex);
1653         trans = btrfs_start_transaction(root, 1);
1654         btrfs_set_trans_block_group(trans, inode);
1655         btrfs_update_inode(trans, root, inode);
1656         btrfs_end_transaction(trans, root);
1657         mutex_unlock(&root->fs_info->fs_mutex);
1658 }
1659
1660 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1661                                      struct btrfs_root *root,
1662                                      const char *name, int name_len,
1663                                      u64 ref_objectid,
1664                                      u64 objectid,
1665                                      struct btrfs_block_group_cache *group,
1666                                      int mode)
1667 {
1668         struct inode *inode;
1669         struct btrfs_inode_item *inode_item;
1670         struct btrfs_block_group_cache *new_inode_group;
1671         struct btrfs_key *location;
1672         struct btrfs_path *path;
1673         struct btrfs_inode_ref *ref;
1674         struct btrfs_key key[2];
1675         u32 sizes[2];
1676         unsigned long ptr;
1677         int ret;
1678         int owner;
1679
1680         path = btrfs_alloc_path();
1681         BUG_ON(!path);
1682
1683         inode = new_inode(root->fs_info->sb);
1684         if (!inode)
1685                 return ERR_PTR(-ENOMEM);
1686
1687         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1688         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1689                              inode->i_mapping, GFP_NOFS);
1690         extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1691                              inode->i_mapping, GFP_NOFS);
1692         BTRFS_I(inode)->delalloc_bytes = 0;
1693         BTRFS_I(inode)->root = root;
1694
1695         if (mode & S_IFDIR)
1696                 owner = 0;
1697         else
1698                 owner = 1;
1699         new_inode_group = btrfs_find_block_group(root, group, 0,
1700                                        BTRFS_BLOCK_GROUP_METADATA, owner);
1701         if (!new_inode_group) {
1702                 printk("find_block group failed\n");
1703                 new_inode_group = group;
1704         }
1705         BTRFS_I(inode)->block_group = new_inode_group;
1706         BTRFS_I(inode)->flags = 0;
1707
1708         key[0].objectid = objectid;
1709         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
1710         key[0].offset = 0;
1711
1712         key[1].objectid = objectid;
1713         btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
1714         key[1].offset = ref_objectid;
1715
1716         sizes[0] = sizeof(struct btrfs_inode_item);
1717         sizes[1] = name_len + sizeof(*ref);
1718
1719         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
1720         if (ret != 0)
1721                 goto fail;
1722
1723         if (objectid > root->highest_inode)
1724                 root->highest_inode = objectid;
1725
1726         inode->i_uid = current->fsuid;
1727         inode->i_gid = current->fsgid;
1728         inode->i_mode = mode;
1729         inode->i_ino = objectid;
1730         inode->i_blocks = 0;
1731         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1732         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1733                                   struct btrfs_inode_item);
1734         fill_inode_item(path->nodes[0], inode_item, inode);
1735
1736         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1737                              struct btrfs_inode_ref);
1738         btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
1739         ptr = (unsigned long)(ref + 1);
1740         write_extent_buffer(path->nodes[0], name, ptr, name_len);
1741
1742         btrfs_mark_buffer_dirty(path->nodes[0]);
1743         btrfs_free_path(path);
1744
1745         location = &BTRFS_I(inode)->location;
1746         location->objectid = objectid;
1747         location->offset = 0;
1748         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1749
1750         insert_inode_hash(inode);
1751         return inode;
1752 fail:
1753         btrfs_free_path(path);
1754         return ERR_PTR(ret);
1755 }
1756
1757 static inline u8 btrfs_inode_type(struct inode *inode)
1758 {
1759         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1760 }
1761
1762 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1763                             struct dentry *dentry, struct inode *inode,
1764                             int add_backref)
1765 {
1766         int ret;
1767         struct btrfs_key key;
1768         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1769         struct inode *parent_inode;
1770
1771         key.objectid = inode->i_ino;
1772         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1773         key.offset = 0;
1774
1775         ret = btrfs_insert_dir_item(trans, root,
1776                                     dentry->d_name.name, dentry->d_name.len,
1777                                     dentry->d_parent->d_inode->i_ino,
1778                                     &key, btrfs_inode_type(inode));
1779         if (ret == 0) {
1780                 if (add_backref) {
1781                         ret = btrfs_insert_inode_ref(trans, root,
1782                                              dentry->d_name.name,
1783                                              dentry->d_name.len,
1784                                              inode->i_ino,
1785                                              dentry->d_parent->d_inode->i_ino);
1786                 }
1787                 parent_inode = dentry->d_parent->d_inode;
1788                 parent_inode->i_size += dentry->d_name.len * 2;
1789                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1790                 ret = btrfs_update_inode(trans, root,
1791                                          dentry->d_parent->d_inode);
1792         }
1793         return ret;
1794 }
1795
1796 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1797                             struct dentry *dentry, struct inode *inode,
1798                             int backref)
1799 {
1800         int err = btrfs_add_link(trans, dentry, inode, backref);
1801         if (!err) {
1802                 d_instantiate(dentry, inode);
1803                 return 0;
1804         }
1805         if (err > 0)
1806                 err = -EEXIST;
1807         return err;
1808 }
1809
1810 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1811                         int mode, dev_t rdev)
1812 {
1813         struct btrfs_trans_handle *trans;
1814         struct btrfs_root *root = BTRFS_I(dir)->root;
1815         struct inode *inode = NULL;
1816         int err;
1817         int drop_inode = 0;
1818         u64 objectid;
1819         unsigned long nr = 0;
1820
1821         if (!new_valid_dev(rdev))
1822                 return -EINVAL;
1823
1824         mutex_lock(&root->fs_info->fs_mutex);
1825         err = btrfs_check_free_space(root, 1, 0);
1826         if (err)
1827                 goto fail;
1828
1829         trans = btrfs_start_transaction(root, 1);
1830         btrfs_set_trans_block_group(trans, dir);
1831
1832         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1833         if (err) {
1834                 err = -ENOSPC;
1835                 goto out_unlock;
1836         }
1837
1838         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1839                                 dentry->d_name.len,
1840                                 dentry->d_parent->d_inode->i_ino, objectid,
1841                                 BTRFS_I(dir)->block_group, mode);
1842         err = PTR_ERR(inode);
1843         if (IS_ERR(inode))
1844                 goto out_unlock;
1845
1846         btrfs_set_trans_block_group(trans, inode);
1847         err = btrfs_add_nondir(trans, dentry, inode, 0);
1848         if (err)
1849                 drop_inode = 1;
1850         else {
1851                 inode->i_op = &btrfs_special_inode_operations;
1852                 init_special_inode(inode, inode->i_mode, rdev);
1853                 btrfs_update_inode(trans, root, inode);
1854         }
1855         dir->i_sb->s_dirt = 1;
1856         btrfs_update_inode_block_group(trans, inode);
1857         btrfs_update_inode_block_group(trans, dir);
1858 out_unlock:
1859         nr = trans->blocks_used;
1860         btrfs_end_transaction(trans, root);
1861 fail:
1862         mutex_unlock(&root->fs_info->fs_mutex);
1863
1864         if (drop_inode) {
1865                 inode_dec_link_count(inode);
1866                 iput(inode);
1867         }
1868         btrfs_btree_balance_dirty(root, nr);
1869         btrfs_throttle(root);
1870         return err;
1871 }
1872
1873 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1874                         int mode, struct nameidata *nd)
1875 {
1876         struct btrfs_trans_handle *trans;
1877         struct btrfs_root *root = BTRFS_I(dir)->root;
1878         struct inode *inode = NULL;
1879         int err;
1880         int drop_inode = 0;
1881         unsigned long nr = 0;
1882         u64 objectid;
1883
1884         mutex_lock(&root->fs_info->fs_mutex);
1885         err = btrfs_check_free_space(root, 1, 0);
1886         if (err)
1887                 goto fail;
1888         trans = btrfs_start_transaction(root, 1);
1889         btrfs_set_trans_block_group(trans, dir);
1890
1891         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1892         if (err) {
1893                 err = -ENOSPC;
1894                 goto out_unlock;
1895         }
1896
1897         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1898                                 dentry->d_name.len,
1899                                 dentry->d_parent->d_inode->i_ino,
1900                                 objectid, BTRFS_I(dir)->block_group, mode);
1901         err = PTR_ERR(inode);
1902         if (IS_ERR(inode))
1903                 goto out_unlock;
1904
1905         btrfs_set_trans_block_group(trans, inode);
1906         err = btrfs_add_nondir(trans, dentry, inode, 0);
1907         if (err)
1908                 drop_inode = 1;
1909         else {
1910                 inode->i_mapping->a_ops = &btrfs_aops;
1911                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
1912                 inode->i_fop = &btrfs_file_operations;
1913                 inode->i_op = &btrfs_file_inode_operations;
1914                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1915                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1916                                      inode->i_mapping, GFP_NOFS);
1917                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
1918                                      inode->i_mapping, GFP_NOFS);
1919                 BTRFS_I(inode)->delalloc_bytes = 0;
1920                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1921         }
1922         dir->i_sb->s_dirt = 1;
1923         btrfs_update_inode_block_group(trans, inode);
1924         btrfs_update_inode_block_group(trans, dir);
1925 out_unlock:
1926         nr = trans->blocks_used;
1927         btrfs_end_transaction(trans, root);
1928 fail:
1929         mutex_unlock(&root->fs_info->fs_mutex);
1930
1931         if (drop_inode) {
1932                 inode_dec_link_count(inode);
1933                 iput(inode);
1934         }
1935         btrfs_btree_balance_dirty(root, nr);
1936         btrfs_throttle(root);
1937         return err;
1938 }
1939
1940 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1941                       struct dentry *dentry)
1942 {
1943         struct btrfs_trans_handle *trans;
1944         struct btrfs_root *root = BTRFS_I(dir)->root;
1945         struct inode *inode = old_dentry->d_inode;
1946         unsigned long nr = 0;
1947         int err;
1948         int drop_inode = 0;
1949
1950         if (inode->i_nlink == 0)
1951                 return -ENOENT;
1952
1953 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1954         inode->i_nlink++;
1955 #else
1956         inc_nlink(inode);
1957 #endif
1958         mutex_lock(&root->fs_info->fs_mutex);
1959         err = btrfs_check_free_space(root, 1, 0);
1960         if (err)
1961                 goto fail;
1962         trans = btrfs_start_transaction(root, 1);
1963
1964         btrfs_set_trans_block_group(trans, dir);
1965         atomic_inc(&inode->i_count);
1966         err = btrfs_add_nondir(trans, dentry, inode, 1);
1967
1968         if (err)
1969                 drop_inode = 1;
1970
1971         dir->i_sb->s_dirt = 1;
1972         btrfs_update_inode_block_group(trans, dir);
1973         err = btrfs_update_inode(trans, root, inode);
1974
1975         if (err)
1976                 drop_inode = 1;
1977
1978         nr = trans->blocks_used;
1979         btrfs_end_transaction(trans, root);
1980 fail:
1981         mutex_unlock(&root->fs_info->fs_mutex);
1982
1983         if (drop_inode) {
1984                 inode_dec_link_count(inode);
1985                 iput(inode);
1986         }
1987         btrfs_btree_balance_dirty(root, nr);
1988         btrfs_throttle(root);
1989         return err;
1990 }
1991
1992 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1993 {
1994         struct inode *inode;
1995         struct btrfs_trans_handle *trans;
1996         struct btrfs_root *root = BTRFS_I(dir)->root;
1997         int err = 0;
1998         int drop_on_err = 0;
1999         u64 objectid;
2000         unsigned long nr = 1;
2001
2002         mutex_lock(&root->fs_info->fs_mutex);
2003         err = btrfs_check_free_space(root, 1, 0);
2004         if (err)
2005                 goto out_unlock;
2006
2007         trans = btrfs_start_transaction(root, 1);
2008         btrfs_set_trans_block_group(trans, dir);
2009
2010         if (IS_ERR(trans)) {
2011                 err = PTR_ERR(trans);
2012                 goto out_unlock;
2013         }
2014
2015         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2016         if (err) {
2017                 err = -ENOSPC;
2018                 goto out_unlock;
2019         }
2020
2021         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
2022                                 dentry->d_name.len,
2023                                 dentry->d_parent->d_inode->i_ino, objectid,
2024                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
2025         if (IS_ERR(inode)) {
2026                 err = PTR_ERR(inode);
2027                 goto out_fail;
2028         }
2029
2030         drop_on_err = 1;
2031         inode->i_op = &btrfs_dir_inode_operations;
2032         inode->i_fop = &btrfs_dir_file_operations;
2033         btrfs_set_trans_block_group(trans, inode);
2034
2035         inode->i_size = 0;
2036         err = btrfs_update_inode(trans, root, inode);
2037         if (err)
2038                 goto out_fail;
2039
2040         err = btrfs_add_link(trans, dentry, inode, 0);
2041         if (err)
2042                 goto out_fail;
2043
2044         d_instantiate(dentry, inode);
2045         drop_on_err = 0;
2046         dir->i_sb->s_dirt = 1;
2047         btrfs_update_inode_block_group(trans, inode);
2048         btrfs_update_inode_block_group(trans, dir);
2049
2050 out_fail:
2051         nr = trans->blocks_used;
2052         btrfs_end_transaction(trans, root);
2053
2054 out_unlock:
2055         mutex_unlock(&root->fs_info->fs_mutex);
2056         if (drop_on_err)
2057                 iput(inode);
2058         btrfs_btree_balance_dirty(root, nr);
2059         btrfs_throttle(root);
2060         return err;
2061 }
2062
2063 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
2064                                     size_t pg_offset, u64 start, u64 len,
2065                                     int create)
2066 {
2067         int ret;
2068         int err = 0;
2069         u64 bytenr;
2070         u64 extent_start = 0;
2071         u64 extent_end = 0;
2072         u64 objectid = inode->i_ino;
2073         u32 found_type;
2074         struct btrfs_path *path;
2075         struct btrfs_root *root = BTRFS_I(inode)->root;
2076         struct btrfs_file_extent_item *item;
2077         struct extent_buffer *leaf;
2078         struct btrfs_key found_key;
2079         struct extent_map *em = NULL;
2080         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2081         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2082         struct btrfs_trans_handle *trans = NULL;
2083
2084         path = btrfs_alloc_path();
2085         BUG_ON(!path);
2086         mutex_lock(&root->fs_info->fs_mutex);
2087
2088 again:
2089         spin_lock(&em_tree->lock);
2090         em = lookup_extent_mapping(em_tree, start, len);
2091         spin_unlock(&em_tree->lock);
2092
2093         if (em) {
2094                 if (em->start > start) {
2095                         printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n",
2096                                start, len, em->start, em->len);
2097                         WARN_ON(1);
2098                 }
2099                 if (em->block_start == EXTENT_MAP_INLINE && page)
2100                         free_extent_map(em);
2101                 else
2102                         goto out;
2103         }
2104         em = alloc_extent_map(GFP_NOFS);
2105         if (!em) {
2106                 err = -ENOMEM;
2107                 goto out;
2108         }
2109
2110         em->start = EXTENT_MAP_HOLE;
2111         em->len = (u64)-1;
2112         em->bdev = inode->i_sb->s_bdev;
2113         ret = btrfs_lookup_file_extent(trans, root, path,
2114                                        objectid, start, trans != NULL);
2115         if (ret < 0) {
2116                 err = ret;
2117                 goto out;
2118         }
2119
2120         if (ret != 0) {
2121                 if (path->slots[0] == 0)
2122                         goto not_found;
2123                 path->slots[0]--;
2124         }
2125
2126         leaf = path->nodes[0];
2127         item = btrfs_item_ptr(leaf, path->slots[0],
2128                               struct btrfs_file_extent_item);
2129         /* are we inside the extent that was found? */
2130         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2131         found_type = btrfs_key_type(&found_key);
2132         if (found_key.objectid != objectid ||
2133             found_type != BTRFS_EXTENT_DATA_KEY) {
2134                 goto not_found;
2135         }
2136
2137         found_type = btrfs_file_extent_type(leaf, item);
2138         extent_start = found_key.offset;
2139         if (found_type == BTRFS_FILE_EXTENT_REG) {
2140                 extent_end = extent_start +
2141                        btrfs_file_extent_num_bytes(leaf, item);
2142                 err = 0;
2143                 if (start < extent_start || start >= extent_end) {
2144                         em->start = start;
2145                         if (start < extent_start) {
2146                                 if (start + len <= extent_start)
2147                                         goto not_found;
2148                                 em->len = extent_end - extent_start;
2149                         } else {
2150                                 em->len = len;
2151                         }
2152                         goto not_found_em;
2153                 }
2154                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
2155                 if (bytenr == 0) {
2156                         em->start = extent_start;
2157                         em->len = extent_end - extent_start;
2158                         em->block_start = EXTENT_MAP_HOLE;
2159                         goto insert;
2160                 }
2161                 bytenr += btrfs_file_extent_offset(leaf, item);
2162                 em->block_start = bytenr;
2163                 em->start = extent_start;
2164                 em->len = extent_end - extent_start;
2165                 goto insert;
2166         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
2167                 u64 page_start;
2168                 unsigned long ptr;
2169                 char *map;
2170                 size_t size;
2171                 size_t extent_offset;
2172                 size_t copy_size;
2173
2174                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
2175                                                     path->slots[0]));
2176                 extent_end = (extent_start + size + root->sectorsize - 1) &
2177                         ~((u64)root->sectorsize - 1);
2178                 if (start < extent_start || start >= extent_end) {
2179                         em->start = start;
2180                         if (start < extent_start) {
2181                                 if (start + len <= extent_start)
2182                                         goto not_found;
2183                                 em->len = extent_end - extent_start;
2184                         } else {
2185                                 em->len = len;
2186                         }
2187                         goto not_found_em;
2188                 }
2189                 em->block_start = EXTENT_MAP_INLINE;
2190
2191                 if (!page) {
2192                         em->start = extent_start;
2193                         em->len = size;
2194                         goto out;
2195                 }
2196
2197                 page_start = page_offset(page) + pg_offset;
2198                 extent_offset = page_start - extent_start;
2199                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
2200                                 size - extent_offset);
2201                 em->start = extent_start + extent_offset;
2202                 em->len = (copy_size + root->sectorsize - 1) &
2203                         ~((u64)root->sectorsize - 1);
2204                 map = kmap(page);
2205                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
2206                 if (create == 0 && !PageUptodate(page)) {
2207                         read_extent_buffer(leaf, map + pg_offset, ptr,
2208                                            copy_size);
2209                         flush_dcache_page(page);
2210                 } else if (create && PageUptodate(page)) {
2211                         if (!trans) {
2212                                 kunmap(page);
2213                                 free_extent_map(em);
2214                                 em = NULL;
2215                                 btrfs_release_path(root, path);
2216                                 trans = btrfs_start_transaction(root, 1);
2217                                 goto again;
2218                         }
2219                         write_extent_buffer(leaf, map + pg_offset, ptr,
2220                                             copy_size);
2221                         btrfs_mark_buffer_dirty(leaf);
2222                 }
2223                 kunmap(page);
2224                 set_extent_uptodate(io_tree, em->start,
2225                                     extent_map_end(em) - 1, GFP_NOFS);
2226                 goto insert;
2227         } else {
2228                 printk("unkknown found_type %d\n", found_type);
2229                 WARN_ON(1);
2230         }
2231 not_found:
2232         em->start = start;
2233         em->len = len;
2234 not_found_em:
2235         em->block_start = EXTENT_MAP_HOLE;
2236 insert:
2237         btrfs_release_path(root, path);
2238         if (em->start > start || extent_map_end(em) <= start) {
2239                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
2240                 err = -EIO;
2241                 goto out;
2242         }
2243
2244         err = 0;
2245         spin_lock(&em_tree->lock);
2246         ret = add_extent_mapping(em_tree, em);
2247         if (ret == -EEXIST) {
2248                 free_extent_map(em);
2249                 em = lookup_extent_mapping(em_tree, start, len);
2250                 if (!em) {
2251                         err = -EIO;
2252                         printk("failing to insert %Lu %Lu\n", start, len);
2253                 }
2254         }
2255         spin_unlock(&em_tree->lock);
2256 out:
2257         btrfs_free_path(path);
2258         if (trans) {
2259                 ret = btrfs_end_transaction(trans, root);
2260                 if (!err)
2261                         err = ret;
2262         }
2263         mutex_unlock(&root->fs_info->fs_mutex);
2264         if (err) {
2265                 free_extent_map(em);
2266                 WARN_ON(1);
2267                 return ERR_PTR(err);
2268         }
2269         return em;
2270 }
2271
2272 static int btrfs_get_block(struct inode *inode, sector_t iblock,
2273                         struct buffer_head *bh_result, int create)
2274 {
2275         struct extent_map *em;
2276         u64 start = (u64)iblock << inode->i_blkbits;
2277         struct btrfs_multi_bio *multi = NULL;
2278         struct btrfs_root *root = BTRFS_I(inode)->root;
2279         u64 len;
2280         u64 logical;
2281         u64 map_length;
2282         int ret = 0;
2283
2284         em = btrfs_get_extent(inode, NULL, 0, start, bh_result->b_size, 0);
2285
2286         if (!em || IS_ERR(em))
2287                 goto out;
2288
2289         if (em->start > start || em->start + em->len <= start)
2290             goto out;
2291
2292         if (em->block_start == EXTENT_MAP_INLINE) {
2293                 ret = -EINVAL;
2294                 goto out;
2295         }
2296
2297         if (em->block_start == EXTENT_MAP_HOLE ||
2298             em->block_start == EXTENT_MAP_DELALLOC) {
2299                 goto out;
2300         }
2301
2302         len = em->start + em->len - start;
2303         len = min_t(u64, len, INT_LIMIT(typeof(bh_result->b_size)));
2304
2305         logical = start - em->start;
2306         logical = em->block_start + logical;
2307
2308         map_length = len;
2309         ret = btrfs_map_block(&root->fs_info->mapping_tree, READ,
2310                               logical, &map_length, &multi, 0);
2311         BUG_ON(ret);
2312         bh_result->b_blocknr = multi->stripes[0].physical >> inode->i_blkbits;
2313         bh_result->b_size = min(map_length, len);
2314         bh_result->b_bdev = multi->stripes[0].dev->bdev;
2315         set_buffer_mapped(bh_result);
2316         kfree(multi);
2317 out:
2318         free_extent_map(em);
2319         return ret;
2320 }
2321
2322 static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
2323                         const struct iovec *iov, loff_t offset,
2324                         unsigned long nr_segs)
2325 {
2326         struct file *file = iocb->ki_filp;
2327         struct inode *inode = file->f_mapping->host;
2328
2329         if (rw == WRITE)
2330                 return -EINVAL;
2331
2332         return blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
2333                                   offset, nr_segs, btrfs_get_block, NULL);
2334 }
2335
2336 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
2337 {
2338         return extent_bmap(mapping, iblock, btrfs_get_extent);
2339 }
2340
2341 int btrfs_readpage(struct file *file, struct page *page)
2342 {
2343         struct extent_io_tree *tree;
2344         tree = &BTRFS_I(page->mapping->host)->io_tree;
2345         return extent_read_full_page(tree, page, btrfs_get_extent);
2346 }
2347
2348 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
2349 {
2350         struct extent_io_tree *tree;
2351
2352
2353         if (current->flags & PF_MEMALLOC) {
2354                 redirty_page_for_writepage(wbc, page);
2355                 unlock_page(page);
2356                 return 0;
2357         }
2358         tree = &BTRFS_I(page->mapping->host)->io_tree;
2359         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2360 }
2361
2362 static int btrfs_writepages(struct address_space *mapping,
2363                             struct writeback_control *wbc)
2364 {
2365         struct extent_io_tree *tree;
2366         tree = &BTRFS_I(mapping->host)->io_tree;
2367         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2368 }
2369
2370 static int
2371 btrfs_readpages(struct file *file, struct address_space *mapping,
2372                 struct list_head *pages, unsigned nr_pages)
2373 {
2374         struct extent_io_tree *tree;
2375         tree = &BTRFS_I(mapping->host)->io_tree;
2376         return extent_readpages(tree, mapping, pages, nr_pages,
2377                                 btrfs_get_extent);
2378 }
2379
2380 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2381 {
2382         struct extent_io_tree *tree;
2383         struct extent_map_tree *map;
2384         int ret;
2385
2386         tree = &BTRFS_I(page->mapping->host)->io_tree;
2387         map = &BTRFS_I(page->mapping->host)->extent_tree;
2388         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
2389         if (ret == 1) {
2390                 ClearPagePrivate(page);
2391                 set_page_private(page, 0);
2392                 page_cache_release(page);
2393         }
2394         return ret;
2395 }
2396
2397 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2398 {
2399         struct extent_io_tree *tree;
2400
2401         tree = &BTRFS_I(page->mapping->host)->io_tree;
2402         extent_invalidatepage(tree, page, offset);
2403         btrfs_releasepage(page, GFP_NOFS);
2404 }
2405
2406 /*
2407  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2408  * called from a page fault handler when a page is first dirtied. Hence we must
2409  * be careful to check for EOF conditions here. We set the page up correctly
2410  * for a written page which means we get ENOSPC checking when writing into
2411  * holes and correct delalloc and unwritten extent mapping on filesystems that
2412  * support these features.
2413  *
2414  * We are not allowed to take the i_mutex here so we have to play games to
2415  * protect against truncate races as the page could now be beyond EOF.  Because
2416  * vmtruncate() writes the inode size before removing pages, once we have the
2417  * page lock we can determine safely if the page is beyond EOF. If it is not
2418  * beyond EOF, then the page is guaranteed safe against truncation until we
2419  * unlock the page.
2420  */
2421 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2422 {
2423         struct inode *inode = fdentry(vma->vm_file)->d_inode;
2424         struct btrfs_root *root = BTRFS_I(inode)->root;
2425         unsigned long end;
2426         loff_t size;
2427         int ret;
2428         u64 page_start;
2429
2430         mutex_lock(&root->fs_info->fs_mutex);
2431         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2432         mutex_unlock(&root->fs_info->fs_mutex);
2433         if (ret)
2434                 goto out;
2435
2436         ret = -EINVAL;
2437
2438         lock_page(page);
2439         wait_on_page_writeback(page);
2440         size = i_size_read(inode);
2441         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2442
2443         if ((page->mapping != inode->i_mapping) ||
2444             (page_start > size)) {
2445                 /* page got truncated out from underneath us */
2446                 goto out_unlock;
2447         }
2448
2449         /* page is wholly or partially inside EOF */
2450         if (page_start + PAGE_CACHE_SIZE > size)
2451                 end = size & ~PAGE_CACHE_MASK;
2452         else
2453                 end = PAGE_CACHE_SIZE;
2454
2455         ret = btrfs_cow_one_page(inode, page, end);
2456
2457 out_unlock:
2458         unlock_page(page);
2459 out:
2460         return ret;
2461 }
2462
2463 static void btrfs_truncate(struct inode *inode)
2464 {
2465         struct btrfs_root *root = BTRFS_I(inode)->root;
2466         int ret;
2467         struct btrfs_trans_handle *trans;
2468         unsigned long nr;
2469
2470         if (!S_ISREG(inode->i_mode))
2471                 return;
2472         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2473                 return;
2474
2475         btrfs_truncate_page(inode->i_mapping, inode->i_size);
2476
2477         mutex_lock(&root->fs_info->fs_mutex);
2478         trans = btrfs_start_transaction(root, 1);
2479         btrfs_set_trans_block_group(trans, inode);
2480
2481         /* FIXME, add redo link to tree so we don't leak on crash */
2482         ret = btrfs_truncate_in_trans(trans, root, inode,
2483                                       BTRFS_EXTENT_DATA_KEY);
2484         btrfs_update_inode(trans, root, inode);
2485         nr = trans->blocks_used;
2486
2487         ret = btrfs_end_transaction(trans, root);
2488         BUG_ON(ret);
2489         mutex_unlock(&root->fs_info->fs_mutex);
2490         btrfs_btree_balance_dirty(root, nr);
2491         btrfs_throttle(root);
2492 }
2493
2494 static int noinline create_subvol(struct btrfs_root *root, char *name,
2495                                   int namelen)
2496 {
2497         struct btrfs_trans_handle *trans;
2498         struct btrfs_key key;
2499         struct btrfs_root_item root_item;
2500         struct btrfs_inode_item *inode_item;
2501         struct extent_buffer *leaf;
2502         struct btrfs_root *new_root = root;
2503         struct inode *inode;
2504         struct inode *dir;
2505         int ret;
2506         int err;
2507         u64 objectid;
2508         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2509         unsigned long nr = 1;
2510
2511         mutex_lock(&root->fs_info->fs_mutex);
2512         ret = btrfs_check_free_space(root, 1, 0);
2513         if (ret)
2514                 goto fail_commit;
2515
2516         trans = btrfs_start_transaction(root, 1);
2517         BUG_ON(!trans);
2518
2519         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2520                                        0, &objectid);
2521         if (ret)
2522                 goto fail;
2523
2524         leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2525                                         objectid, trans->transid, 0, 0,
2526                                         0, 0);
2527         if (IS_ERR(leaf))
2528                 return PTR_ERR(leaf);
2529
2530         btrfs_set_header_nritems(leaf, 0);
2531         btrfs_set_header_level(leaf, 0);
2532         btrfs_set_header_bytenr(leaf, leaf->start);
2533         btrfs_set_header_generation(leaf, trans->transid);
2534         btrfs_set_header_owner(leaf, objectid);
2535
2536         write_extent_buffer(leaf, root->fs_info->fsid,
2537                             (unsigned long)btrfs_header_fsid(leaf),
2538                             BTRFS_FSID_SIZE);
2539         btrfs_mark_buffer_dirty(leaf);
2540
2541         inode_item = &root_item.inode;
2542         memset(inode_item, 0, sizeof(*inode_item));
2543         inode_item->generation = cpu_to_le64(1);
2544         inode_item->size = cpu_to_le64(3);
2545         inode_item->nlink = cpu_to_le32(1);
2546         inode_item->nblocks = cpu_to_le64(1);
2547         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2548
2549         btrfs_set_root_bytenr(&root_item, leaf->start);
2550         btrfs_set_root_level(&root_item, 0);
2551         btrfs_set_root_refs(&root_item, 1);
2552         btrfs_set_root_used(&root_item, 0);
2553
2554         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2555         root_item.drop_level = 0;
2556
2557         free_extent_buffer(leaf);
2558         leaf = NULL;
2559
2560         btrfs_set_root_dirid(&root_item, new_dirid);
2561
2562         key.objectid = objectid;
2563         key.offset = 1;
2564         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2565         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2566                                 &root_item);
2567         if (ret)
2568                 goto fail;
2569
2570         /*
2571          * insert the directory item
2572          */
2573         key.offset = (u64)-1;
2574         dir = root->fs_info->sb->s_root->d_inode;
2575         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2576                                     name, namelen, dir->i_ino, &key,
2577                                     BTRFS_FT_DIR);
2578         if (ret)
2579                 goto fail;
2580
2581         ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2582                              name, namelen, objectid,
2583                              root->fs_info->sb->s_root->d_inode->i_ino);
2584         if (ret)
2585                 goto fail;
2586
2587         ret = btrfs_commit_transaction(trans, root);
2588         if (ret)
2589                 goto fail_commit;
2590
2591         new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2592         BUG_ON(!new_root);
2593
2594         trans = btrfs_start_transaction(new_root, 1);
2595         BUG_ON(!trans);
2596
2597         inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid,
2598                                 new_dirid,
2599                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2600         if (IS_ERR(inode))
2601                 goto fail;
2602         inode->i_op = &btrfs_dir_inode_operations;
2603         inode->i_fop = &btrfs_dir_file_operations;
2604         new_root->inode = inode;
2605
2606         ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2607                                      new_dirid);
2608         inode->i_nlink = 1;
2609         inode->i_size = 0;
2610         ret = btrfs_update_inode(trans, new_root, inode);
2611         if (ret)
2612                 goto fail;
2613 fail:
2614         nr = trans->blocks_used;
2615         err = btrfs_commit_transaction(trans, new_root);
2616         if (err && !ret)
2617                 ret = err;
2618 fail_commit:
2619         mutex_unlock(&root->fs_info->fs_mutex);
2620         btrfs_btree_balance_dirty(root, nr);
2621         btrfs_throttle(root);
2622         return ret;
2623 }
2624
2625 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2626 {
2627         struct btrfs_pending_snapshot *pending_snapshot;
2628         struct btrfs_trans_handle *trans;
2629         int ret;
2630         int err;
2631         unsigned long nr = 0;
2632
2633         if (!root->ref_cows)
2634                 return -EINVAL;
2635
2636         mutex_lock(&root->fs_info->fs_mutex);
2637         ret = btrfs_check_free_space(root, 1, 0);
2638         if (ret)
2639                 goto fail_unlock;
2640
2641         pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2642         if (!pending_snapshot) {
2643                 ret = -ENOMEM;
2644                 goto fail_unlock;
2645         }
2646         pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
2647         if (!pending_snapshot->name) {
2648                 ret = -ENOMEM;
2649                 kfree(pending_snapshot);
2650                 goto fail_unlock;
2651         }
2652         memcpy(pending_snapshot->name, name, namelen);
2653         pending_snapshot->name[namelen] = '\0';
2654         trans = btrfs_start_transaction(root, 1);
2655         BUG_ON(!trans);
2656         pending_snapshot->root = root;
2657         list_add(&pending_snapshot->list,
2658                  &trans->transaction->pending_snapshots);
2659         ret = btrfs_update_inode(trans, root, root->inode);
2660         err = btrfs_commit_transaction(trans, root);
2661
2662 fail_unlock:
2663         mutex_unlock(&root->fs_info->fs_mutex);
2664         btrfs_btree_balance_dirty(root, nr);
2665         btrfs_throttle(root);
2666         return ret;
2667 }
2668
2669 unsigned long btrfs_force_ra(struct address_space *mapping,
2670                               struct file_ra_state *ra, struct file *file,
2671                               pgoff_t offset, pgoff_t last_index)
2672 {
2673         pgoff_t req_size;
2674
2675 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2676         req_size = last_index - offset + 1;
2677         offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2678         return offset;
2679 #else
2680         req_size = min(last_index - offset + 1, (pgoff_t)128);
2681         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2682         return offset + req_size;
2683 #endif
2684 }
2685
2686 int btrfs_defrag_file(struct file *file) {
2687         struct inode *inode = fdentry(file)->d_inode;
2688         struct btrfs_root *root = BTRFS_I(inode)->root;
2689         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2690         struct page *page;
2691         unsigned long last_index;
2692         unsigned long ra_index = 0;
2693         u64 page_start;
2694         u64 page_end;
2695         unsigned long i;
2696         int ret;
2697
2698         mutex_lock(&root->fs_info->fs_mutex);
2699         ret = btrfs_check_free_space(root, inode->i_size, 0);
2700         mutex_unlock(&root->fs_info->fs_mutex);
2701         if (ret)
2702                 return -ENOSPC;
2703
2704         mutex_lock(&inode->i_mutex);
2705         last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2706         for (i = 0; i <= last_index; i++) {
2707                 if (i == ra_index) {
2708                         ra_index = btrfs_force_ra(inode->i_mapping,
2709                                                   &file->f_ra,
2710                                                   file, ra_index, last_index);
2711                 }
2712                 page = grab_cache_page(inode->i_mapping, i);
2713                 if (!page)
2714                         goto out_unlock;
2715                 if (!PageUptodate(page)) {
2716                         btrfs_readpage(NULL, page);
2717                         lock_page(page);
2718                         if (!PageUptodate(page)) {
2719                                 unlock_page(page);
2720                                 page_cache_release(page);
2721                                 goto out_unlock;
2722                         }
2723                 }
2724                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2725                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2726
2727                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2728                 set_extent_delalloc(io_tree, page_start,
2729                                     page_end, GFP_NOFS);
2730
2731                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2732                 set_page_dirty(page);
2733                 unlock_page(page);
2734                 page_cache_release(page);
2735                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2736         }
2737
2738 out_unlock:
2739         mutex_unlock(&inode->i_mutex);
2740         return 0;
2741 }
2742
2743 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2744 {
2745         u64 new_size;
2746         u64 old_size;
2747         struct btrfs_ioctl_vol_args *vol_args;
2748         struct btrfs_trans_handle *trans;
2749         char *sizestr;
2750         int ret = 0;
2751         int namelen;
2752         int mod = 0;
2753
2754         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2755
2756         if (!vol_args)
2757                 return -ENOMEM;
2758
2759         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2760                 ret = -EFAULT;
2761                 goto out;
2762         }
2763         namelen = strlen(vol_args->name);
2764         if (namelen > BTRFS_VOL_NAME_MAX) {
2765                 ret = -EINVAL;
2766                 goto out;
2767         }
2768
2769         sizestr = vol_args->name;
2770         if (!strcmp(sizestr, "max"))
2771                 new_size = root->fs_info->sb->s_bdev->bd_inode->i_size;
2772         else {
2773                 if (sizestr[0] == '-') {
2774                         mod = -1;
2775                         sizestr++;
2776                 } else if (sizestr[0] == '+') {
2777                         mod = 1;
2778                         sizestr++;
2779                 }
2780                 new_size = btrfs_parse_size(sizestr);
2781                 if (new_size == 0) {
2782                         ret = -EINVAL;
2783                         goto out;
2784                 }
2785         }
2786
2787         mutex_lock(&root->fs_info->fs_mutex);
2788         old_size = btrfs_super_total_bytes(&root->fs_info->super_copy);
2789
2790         if (mod < 0) {
2791                 if (new_size > old_size) {
2792                         ret = -EINVAL;
2793                         goto out_unlock;
2794                 }
2795                 new_size = old_size - new_size;
2796         } else if (mod > 0) {
2797                 new_size = old_size + new_size;
2798         }
2799
2800         if (new_size < 256 * 1024 * 1024) {
2801                 ret = -EINVAL;
2802                 goto out_unlock;
2803         }
2804         if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) {
2805                 ret = -EFBIG;
2806                 goto out_unlock;
2807         }
2808
2809         do_div(new_size, root->sectorsize);
2810         new_size *= root->sectorsize;
2811
2812 printk("new size is %Lu\n", new_size);
2813         if (new_size > old_size) {
2814                 trans = btrfs_start_transaction(root, 1);
2815                 ret = btrfs_grow_extent_tree(trans, root, new_size);
2816                 btrfs_commit_transaction(trans, root);
2817         } else {
2818                 ret = btrfs_shrink_extent_tree(root, new_size);
2819         }
2820
2821 out_unlock:
2822         mutex_unlock(&root->fs_info->fs_mutex);
2823 out:
2824         kfree(vol_args);
2825         return ret;
2826 }
2827
2828 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
2829                                             void __user *arg)
2830 {
2831         struct btrfs_ioctl_vol_args *vol_args;
2832         struct btrfs_dir_item *di;
2833         struct btrfs_path *path;
2834         u64 root_dirid;
2835         int namelen;
2836         int ret;
2837
2838         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2839
2840         if (!vol_args)
2841                 return -ENOMEM;
2842
2843         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2844                 ret = -EFAULT;
2845                 goto out;
2846         }
2847
2848         namelen = strlen(vol_args->name);
2849         if (namelen > BTRFS_VOL_NAME_MAX) {
2850                 ret = -EINVAL;
2851                 goto out;
2852         }
2853         if (strchr(vol_args->name, '/')) {
2854                 ret = -EINVAL;
2855                 goto out;
2856         }
2857
2858         path = btrfs_alloc_path();
2859         if (!path) {
2860                 ret = -ENOMEM;
2861                 goto out;
2862         }
2863
2864         root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2865         mutex_lock(&root->fs_info->fs_mutex);
2866         di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2867                             path, root_dirid,
2868                             vol_args->name, namelen, 0);
2869         mutex_unlock(&root->fs_info->fs_mutex);
2870         btrfs_free_path(path);
2871
2872         if (di && !IS_ERR(di)) {
2873                 ret = -EEXIST;
2874                 goto out;
2875         }
2876
2877         if (IS_ERR(di)) {
2878                 ret = PTR_ERR(di);
2879                 goto out;
2880         }
2881
2882         if (root == root->fs_info->tree_root)
2883                 ret = create_subvol(root, vol_args->name, namelen);
2884         else
2885                 ret = create_snapshot(root, vol_args->name, namelen);
2886 out:
2887         kfree(vol_args);
2888         return ret;
2889 }
2890
2891 static int btrfs_ioctl_defrag(struct file *file)
2892 {
2893         struct inode *inode = fdentry(file)->d_inode;
2894         struct btrfs_root *root = BTRFS_I(inode)->root;
2895
2896         switch (inode->i_mode & S_IFMT) {
2897         case S_IFDIR:
2898                 mutex_lock(&root->fs_info->fs_mutex);
2899                 btrfs_defrag_root(root, 0);
2900                 btrfs_defrag_root(root->fs_info->extent_root, 0);
2901                 mutex_unlock(&root->fs_info->fs_mutex);
2902                 break;
2903         case S_IFREG:
2904                 btrfs_defrag_file(file);
2905                 break;
2906         }
2907
2908         return 0;
2909 }
2910
2911 long btrfs_ioctl(struct file *file, unsigned int
2912                 cmd, unsigned long arg)
2913 {
2914         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
2915
2916         switch (cmd) {
2917         case BTRFS_IOC_SNAP_CREATE:
2918                 return btrfs_ioctl_snap_create(root, (void __user *)arg);
2919         case BTRFS_IOC_DEFRAG:
2920                 return btrfs_ioctl_defrag(file);
2921         case BTRFS_IOC_RESIZE:
2922                 return btrfs_ioctl_resize(root, (void __user *)arg);
2923         }
2924
2925         return -ENOTTY;
2926 }
2927
2928 /*
2929  * Called inside transaction, so use GFP_NOFS
2930  */
2931 struct inode *btrfs_alloc_inode(struct super_block *sb)
2932 {
2933         struct btrfs_inode *ei;
2934
2935         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2936         if (!ei)
2937                 return NULL;
2938         ei->last_trans = 0;
2939         ei->ordered_trans = 0;
2940         return &ei->vfs_inode;
2941 }
2942
2943 void btrfs_destroy_inode(struct inode *inode)
2944 {
2945         WARN_ON(!list_empty(&inode->i_dentry));
2946         WARN_ON(inode->i_data.nrpages);
2947
2948         btrfs_drop_extent_cache(inode, 0, (u64)-1);
2949         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2950 }
2951
2952 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2953 static void init_once(struct kmem_cache * cachep, void *foo)
2954 #else
2955 static void init_once(void * foo, struct kmem_cache * cachep,
2956                       unsigned long flags)
2957 #endif
2958 {
2959         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2960
2961         inode_init_once(&ei->vfs_inode);
2962 }
2963
2964 void btrfs_destroy_cachep(void)
2965 {
2966         if (btrfs_inode_cachep)
2967                 kmem_cache_destroy(btrfs_inode_cachep);
2968         if (btrfs_trans_handle_cachep)
2969                 kmem_cache_destroy(btrfs_trans_handle_cachep);
2970         if (btrfs_transaction_cachep)
2971                 kmem_cache_destroy(btrfs_transaction_cachep);
2972         if (btrfs_bit_radix_cachep)
2973                 kmem_cache_destroy(btrfs_bit_radix_cachep);
2974         if (btrfs_path_cachep)
2975                 kmem_cache_destroy(btrfs_path_cachep);
2976 }
2977
2978 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
2979                                        unsigned long extra_flags,
2980 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2981                                        void (*ctor)(struct kmem_cache *, void *)
2982 #else
2983                                        void (*ctor)(void *, struct kmem_cache *,
2984                                                     unsigned long)
2985 #endif
2986                                      )
2987 {
2988         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
2989                                  SLAB_MEM_SPREAD | extra_flags), ctor
2990 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2991                                  ,NULL
2992 #endif
2993                                 );
2994 }
2995
2996 int btrfs_init_cachep(void)
2997 {
2998         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
2999                                           sizeof(struct btrfs_inode),
3000                                           0, init_once);
3001         if (!btrfs_inode_cachep)
3002                 goto fail;
3003         btrfs_trans_handle_cachep =
3004                         btrfs_cache_create("btrfs_trans_handle_cache",
3005                                            sizeof(struct btrfs_trans_handle),
3006                                            0, NULL);
3007         if (!btrfs_trans_handle_cachep)
3008                 goto fail;
3009         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
3010                                              sizeof(struct btrfs_transaction),
3011                                              0, NULL);
3012         if (!btrfs_transaction_cachep)
3013                 goto fail;
3014         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
3015                                          sizeof(struct btrfs_path),
3016                                          0, NULL);
3017         if (!btrfs_path_cachep)
3018                 goto fail;
3019         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
3020                                               SLAB_DESTROY_BY_RCU, NULL);
3021         if (!btrfs_bit_radix_cachep)
3022                 goto fail;
3023         return 0;
3024 fail:
3025         btrfs_destroy_cachep();
3026         return -ENOMEM;
3027 }
3028
3029 static int btrfs_getattr(struct vfsmount *mnt,
3030                          struct dentry *dentry, struct kstat *stat)
3031 {
3032         struct inode *inode = dentry->d_inode;
3033         generic_fillattr(inode, stat);
3034         stat->blksize = PAGE_CACHE_SIZE;
3035         stat->blocks = inode->i_blocks + (BTRFS_I(inode)->delalloc_bytes >> 9);
3036         return 0;
3037 }
3038
3039 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
3040                            struct inode * new_dir,struct dentry *new_dentry)
3041 {
3042         struct btrfs_trans_handle *trans;
3043         struct btrfs_root *root = BTRFS_I(old_dir)->root;
3044         struct inode *new_inode = new_dentry->d_inode;
3045         struct inode *old_inode = old_dentry->d_inode;
3046         struct timespec ctime = CURRENT_TIME;
3047         struct btrfs_path *path;
3048         int ret;
3049
3050         if (S_ISDIR(old_inode->i_mode) && new_inode &&
3051             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
3052                 return -ENOTEMPTY;
3053         }
3054
3055         mutex_lock(&root->fs_info->fs_mutex);
3056         ret = btrfs_check_free_space(root, 1, 0);
3057         if (ret)
3058                 goto out_unlock;
3059
3060         trans = btrfs_start_transaction(root, 1);
3061
3062         btrfs_set_trans_block_group(trans, new_dir);
3063         path = btrfs_alloc_path();
3064         if (!path) {
3065                 ret = -ENOMEM;
3066                 goto out_fail;
3067         }
3068
3069         old_dentry->d_inode->i_nlink++;
3070         old_dir->i_ctime = old_dir->i_mtime = ctime;
3071         new_dir->i_ctime = new_dir->i_mtime = ctime;
3072         old_inode->i_ctime = ctime;
3073
3074         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
3075         if (ret)
3076                 goto out_fail;
3077
3078         if (new_inode) {
3079                 new_inode->i_ctime = CURRENT_TIME;
3080                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
3081                 if (ret)
3082                         goto out_fail;
3083         }
3084         ret = btrfs_add_link(trans, new_dentry, old_inode, 1);
3085         if (ret)
3086                 goto out_fail;
3087
3088 out_fail:
3089         btrfs_free_path(path);
3090         btrfs_end_transaction(trans, root);
3091 out_unlock:
3092         mutex_unlock(&root->fs_info->fs_mutex);
3093         return ret;
3094 }
3095
3096 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
3097                          const char *symname)
3098 {
3099         struct btrfs_trans_handle *trans;
3100         struct btrfs_root *root = BTRFS_I(dir)->root;
3101         struct btrfs_path *path;
3102         struct btrfs_key key;
3103         struct inode *inode = NULL;
3104         int err;
3105         int drop_inode = 0;
3106         u64 objectid;
3107         int name_len;
3108         int datasize;
3109         unsigned long ptr;
3110         struct btrfs_file_extent_item *ei;
3111         struct extent_buffer *leaf;
3112         unsigned long nr = 0;
3113
3114         name_len = strlen(symname) + 1;
3115         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
3116                 return -ENAMETOOLONG;
3117
3118         mutex_lock(&root->fs_info->fs_mutex);
3119         err = btrfs_check_free_space(root, 1, 0);
3120         if (err)
3121                 goto out_fail;
3122
3123         trans = btrfs_start_transaction(root, 1);
3124         btrfs_set_trans_block_group(trans, dir);
3125
3126         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
3127         if (err) {
3128                 err = -ENOSPC;
3129                 goto out_unlock;
3130         }
3131
3132         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
3133                                 dentry->d_name.len,
3134                                 dentry->d_parent->d_inode->i_ino, objectid,
3135                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
3136         err = PTR_ERR(inode);
3137         if (IS_ERR(inode))
3138                 goto out_unlock;
3139
3140         btrfs_set_trans_block_group(trans, inode);
3141         err = btrfs_add_nondir(trans, dentry, inode, 0);
3142         if (err)
3143                 drop_inode = 1;
3144         else {
3145                 inode->i_mapping->a_ops = &btrfs_aops;
3146                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3147                 inode->i_fop = &btrfs_file_operations;
3148                 inode->i_op = &btrfs_file_inode_operations;
3149                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
3150                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
3151                                      inode->i_mapping, GFP_NOFS);
3152                 extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree,
3153                                      inode->i_mapping, GFP_NOFS);
3154                 BTRFS_I(inode)->delalloc_bytes = 0;
3155                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
3156         }
3157         dir->i_sb->s_dirt = 1;
3158         btrfs_update_inode_block_group(trans, inode);
3159         btrfs_update_inode_block_group(trans, dir);
3160         if (drop_inode)
3161                 goto out_unlock;
3162
3163         path = btrfs_alloc_path();
3164         BUG_ON(!path);
3165         key.objectid = inode->i_ino;
3166         key.offset = 0;
3167         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
3168         datasize = btrfs_file_extent_calc_inline_size(name_len);
3169         err = btrfs_insert_empty_item(trans, root, path, &key,
3170                                       datasize);
3171         if (err) {
3172                 drop_inode = 1;
3173                 goto out_unlock;
3174         }
3175         leaf = path->nodes[0];
3176         ei = btrfs_item_ptr(leaf, path->slots[0],
3177                             struct btrfs_file_extent_item);
3178         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
3179         btrfs_set_file_extent_type(leaf, ei,
3180                                    BTRFS_FILE_EXTENT_INLINE);
3181         ptr = btrfs_file_extent_inline_start(ei);
3182         write_extent_buffer(leaf, symname, ptr, name_len);
3183         btrfs_mark_buffer_dirty(leaf);
3184         btrfs_free_path(path);
3185
3186         inode->i_op = &btrfs_symlink_inode_operations;
3187         inode->i_mapping->a_ops = &btrfs_symlink_aops;
3188         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3189         inode->i_size = name_len - 1;
3190         err = btrfs_update_inode(trans, root, inode);
3191         if (err)
3192                 drop_inode = 1;
3193
3194 out_unlock:
3195         nr = trans->blocks_used;
3196         btrfs_end_transaction(trans, root);
3197 out_fail:
3198         mutex_unlock(&root->fs_info->fs_mutex);
3199         if (drop_inode) {
3200                 inode_dec_link_count(inode);
3201                 iput(inode);
3202         }
3203         btrfs_btree_balance_dirty(root, nr);
3204         btrfs_throttle(root);
3205         return err;
3206 }
3207
3208 static int btrfs_permission(struct inode *inode, int mask,
3209                             struct nameidata *nd)
3210 {
3211         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
3212                 return -EACCES;
3213         return generic_permission(inode, mask, NULL);
3214 }
3215
3216 static struct inode_operations btrfs_dir_inode_operations = {
3217         .lookup         = btrfs_lookup,
3218         .create         = btrfs_create,
3219         .unlink         = btrfs_unlink,
3220         .link           = btrfs_link,
3221         .mkdir          = btrfs_mkdir,
3222         .rmdir          = btrfs_rmdir,
3223         .rename         = btrfs_rename,
3224         .symlink        = btrfs_symlink,
3225         .setattr        = btrfs_setattr,
3226         .mknod          = btrfs_mknod,
3227         .setxattr       = generic_setxattr,
3228         .getxattr       = generic_getxattr,
3229         .listxattr      = btrfs_listxattr,
3230         .removexattr    = generic_removexattr,
3231         .permission     = btrfs_permission,
3232 };
3233 static struct inode_operations btrfs_dir_ro_inode_operations = {
3234         .lookup         = btrfs_lookup,
3235         .permission     = btrfs_permission,
3236 };
3237 static struct file_operations btrfs_dir_file_operations = {
3238         .llseek         = generic_file_llseek,
3239         .read           = generic_read_dir,
3240         .readdir        = btrfs_readdir,
3241         .unlocked_ioctl = btrfs_ioctl,
3242 #ifdef CONFIG_COMPAT
3243         .compat_ioctl   = btrfs_ioctl,
3244 #endif
3245 };
3246
3247 static struct extent_io_ops btrfs_extent_io_ops = {
3248         .fill_delalloc = run_delalloc_range,
3249         .submit_bio_hook = btrfs_submit_bio_hook,
3250         .merge_bio_hook = btrfs_merge_bio_hook,
3251         .readpage_io_hook = btrfs_readpage_io_hook,
3252         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3253         .readpage_io_failed_hook = btrfs_readpage_io_failed_hook,
3254         .set_bit_hook = btrfs_set_bit_hook,
3255         .clear_bit_hook = btrfs_clear_bit_hook,
3256 };
3257
3258 static struct address_space_operations btrfs_aops = {
3259         .readpage       = btrfs_readpage,
3260         .writepage      = btrfs_writepage,
3261         .writepages     = btrfs_writepages,
3262         .readpages      = btrfs_readpages,
3263         .sync_page      = block_sync_page,
3264         .bmap           = btrfs_bmap,
3265         .direct_IO      = btrfs_direct_IO,
3266         .invalidatepage = btrfs_invalidatepage,
3267         .releasepage    = btrfs_releasepage,
3268         .set_page_dirty = __set_page_dirty_nobuffers,
3269 };
3270
3271 static struct address_space_operations btrfs_symlink_aops = {
3272         .readpage       = btrfs_readpage,
3273         .writepage      = btrfs_writepage,
3274         .invalidatepage = btrfs_invalidatepage,
3275         .releasepage    = btrfs_releasepage,
3276 };
3277
3278 static struct inode_operations btrfs_file_inode_operations = {
3279         .truncate       = btrfs_truncate,
3280         .getattr        = btrfs_getattr,
3281         .setattr        = btrfs_setattr,
3282         .setxattr       = generic_setxattr,
3283         .getxattr       = generic_getxattr,
3284         .listxattr      = btrfs_listxattr,
3285         .removexattr    = generic_removexattr,
3286         .permission     = btrfs_permission,
3287 };
3288 static struct inode_operations btrfs_special_inode_operations = {
3289         .getattr        = btrfs_getattr,
3290         .setattr        = btrfs_setattr,
3291         .permission     = btrfs_permission,
3292 };
3293 static struct inode_operations btrfs_symlink_inode_operations = {
3294         .readlink       = generic_readlink,
3295         .follow_link    = page_follow_link_light,
3296         .put_link       = page_put_link,
3297         .permission     = btrfs_permission,
3298 };