]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/btrfs/inode.c
Btrfs: Retry metadata reads in the face of checksum failures
[mv-sheeva.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/bio.h>
20 #include <linux/buffer_head.h>
21 #include <linux/fs.h>
22 #include <linux/pagemap.h>
23 #include <linux/highmem.h>
24 #include <linux/time.h>
25 #include <linux/init.h>
26 #include <linux/string.h>
27 #include <linux/smp_lock.h>
28 #include <linux/backing-dev.h>
29 #include <linux/mpage.h>
30 #include <linux/swap.h>
31 #include <linux/writeback.h>
32 #include <linux/statfs.h>
33 #include <linux/compat.h>
34 #include <linux/bit_spinlock.h>
35 #include <linux/version.h>
36 #include <linux/xattr.h>
37 #include "ctree.h"
38 #include "disk-io.h"
39 #include "transaction.h"
40 #include "btrfs_inode.h"
41 #include "ioctl.h"
42 #include "print-tree.h"
43 #include "volumes.h"
44
45 struct btrfs_iget_args {
46         u64 ino;
47         struct btrfs_root *root;
48 };
49
50 static struct inode_operations btrfs_dir_inode_operations;
51 static struct inode_operations btrfs_symlink_inode_operations;
52 static struct inode_operations btrfs_dir_ro_inode_operations;
53 static struct inode_operations btrfs_special_inode_operations;
54 static struct inode_operations btrfs_file_inode_operations;
55 static struct address_space_operations btrfs_aops;
56 static struct address_space_operations btrfs_symlink_aops;
57 static struct file_operations btrfs_dir_file_operations;
58 static struct extent_io_ops btrfs_extent_io_ops;
59
60 static struct kmem_cache *btrfs_inode_cachep;
61 struct kmem_cache *btrfs_trans_handle_cachep;
62 struct kmem_cache *btrfs_transaction_cachep;
63 struct kmem_cache *btrfs_bit_radix_cachep;
64 struct kmem_cache *btrfs_path_cachep;
65
66 #define S_SHIFT 12
67 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
68         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
69         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
70         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
71         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
72         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
73         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
74         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
75 };
76
77 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
78                            int for_del)
79 {
80         u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
81         u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
82         u64 thresh;
83         int ret = 0;
84
85         if (for_del)
86                 thresh = total * 90;
87         else
88                 thresh = total * 85;
89
90         do_div(thresh, 100);
91
92         spin_lock(&root->fs_info->delalloc_lock);
93         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
94                 ret = -ENOSPC;
95         spin_unlock(&root->fs_info->delalloc_lock);
96         return ret;
97 }
98
99 static int cow_file_range(struct inode *inode, u64 start, u64 end)
100 {
101         struct btrfs_root *root = BTRFS_I(inode)->root;
102         struct btrfs_trans_handle *trans;
103         u64 alloc_hint = 0;
104         u64 num_bytes;
105         u64 cur_alloc_size;
106         u64 blocksize = root->sectorsize;
107         u64 orig_start = start;
108         u64 orig_num_bytes;
109         struct btrfs_key ins;
110         int ret;
111
112         trans = btrfs_start_transaction(root, 1);
113         BUG_ON(!trans);
114         btrfs_set_trans_block_group(trans, inode);
115
116         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
117         num_bytes = max(blocksize,  num_bytes);
118         ret = btrfs_drop_extents(trans, root, inode,
119                                  start, start + num_bytes, start, &alloc_hint);
120         orig_num_bytes = num_bytes;
121
122         if (alloc_hint == EXTENT_MAP_INLINE)
123                 goto out;
124
125         while(num_bytes > 0) {
126                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
127                 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
128                                          root->root_key.objectid,
129                                          trans->transid,
130                                          inode->i_ino, start, 0,
131                                          alloc_hint, (u64)-1, &ins, 1);
132                 if (ret) {
133                         WARN_ON(1);
134                         goto out;
135                 }
136                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
137                                                start, ins.objectid, ins.offset,
138                                                ins.offset);
139                 inode->i_blocks += ins.offset >> 9;
140                 btrfs_check_file(root, inode);
141                 num_bytes -= cur_alloc_size;
142                 alloc_hint = ins.objectid + ins.offset;
143                 start += cur_alloc_size;
144         }
145         btrfs_drop_extent_cache(inode, orig_start,
146                                 orig_start + orig_num_bytes - 1);
147         btrfs_add_ordered_inode(inode);
148         btrfs_update_inode(trans, root, inode);
149 out:
150         btrfs_end_transaction(trans, root);
151         return ret;
152 }
153
154 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
155 {
156         u64 extent_start;
157         u64 extent_end;
158         u64 bytenr;
159         u64 cow_end;
160         u64 loops = 0;
161         u64 total_fs_bytes;
162         struct btrfs_root *root = BTRFS_I(inode)->root;
163         struct extent_buffer *leaf;
164         int found_type;
165         struct btrfs_path *path;
166         struct btrfs_file_extent_item *item;
167         int ret;
168         int err;
169         struct btrfs_key found_key;
170
171         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
172         path = btrfs_alloc_path();
173         BUG_ON(!path);
174 again:
175         ret = btrfs_lookup_file_extent(NULL, root, path,
176                                        inode->i_ino, start, 0);
177         if (ret < 0) {
178                 btrfs_free_path(path);
179                 return ret;
180         }
181
182         cow_end = end;
183         if (ret != 0) {
184                 if (path->slots[0] == 0)
185                         goto not_found;
186                 path->slots[0]--;
187         }
188
189         leaf = path->nodes[0];
190         item = btrfs_item_ptr(leaf, path->slots[0],
191                               struct btrfs_file_extent_item);
192
193         /* are we inside the extent that was found? */
194         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
195         found_type = btrfs_key_type(&found_key);
196         if (found_key.objectid != inode->i_ino ||
197             found_type != BTRFS_EXTENT_DATA_KEY) {
198                 goto not_found;
199         }
200
201         found_type = btrfs_file_extent_type(leaf, item);
202         extent_start = found_key.offset;
203         if (found_type == BTRFS_FILE_EXTENT_REG) {
204                 u64 extent_num_bytes;
205
206                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
207                 extent_end = extent_start + extent_num_bytes;
208                 err = 0;
209
210                 if (loops && start != extent_start)
211                         goto not_found;
212
213                 if (start < extent_start || start >= extent_end)
214                         goto not_found;
215
216                 cow_end = min(end, extent_end - 1);
217                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
218                 if (bytenr == 0)
219                         goto not_found;
220
221                 /*
222                  * we may be called by the resizer, make sure we're inside
223                  * the limits of the FS
224                  */
225                 if (bytenr + extent_num_bytes > total_fs_bytes)
226                         goto not_found;
227
228                 if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) {
229                         goto not_found;
230                 }
231
232                 start = extent_end;
233         } else {
234                 goto not_found;
235         }
236 loop:
237         if (start > end) {
238                 btrfs_free_path(path);
239                 return 0;
240         }
241         btrfs_release_path(root, path);
242         loops++;
243         goto again;
244
245 not_found:
246         cow_file_range(inode, start, cow_end);
247         start = cow_end + 1;
248         goto loop;
249 }
250
251 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
252 {
253         struct btrfs_root *root = BTRFS_I(inode)->root;
254         int ret;
255         mutex_lock(&root->fs_info->fs_mutex);
256         if (btrfs_test_opt(root, NODATACOW) ||
257             btrfs_test_flag(inode, NODATACOW))
258                 ret = run_delalloc_nocow(inode, start, end);
259         else
260                 ret = cow_file_range(inode, start, end);
261
262         mutex_unlock(&root->fs_info->fs_mutex);
263         return ret;
264 }
265
266 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
267                        unsigned long old, unsigned long bits)
268 {
269         if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
270                 struct btrfs_root *root = BTRFS_I(inode)->root;
271                 spin_lock(&root->fs_info->delalloc_lock);
272                 BTRFS_I(inode)->delalloc_bytes += end - start + 1;
273                 root->fs_info->delalloc_bytes += end - start + 1;
274                 spin_unlock(&root->fs_info->delalloc_lock);
275         }
276         return 0;
277 }
278
279 int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
280                          unsigned long old, unsigned long bits)
281 {
282         if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
283                 struct btrfs_root *root = BTRFS_I(inode)->root;
284                 spin_lock(&root->fs_info->delalloc_lock);
285                 if (end - start + 1 > root->fs_info->delalloc_bytes) {
286                         printk("warning: delalloc account %Lu %Lu\n",
287                                end - start + 1, root->fs_info->delalloc_bytes);
288                         root->fs_info->delalloc_bytes = 0;
289                         BTRFS_I(inode)->delalloc_bytes = 0;
290                 } else {
291                         root->fs_info->delalloc_bytes -= end - start + 1;
292                         BTRFS_I(inode)->delalloc_bytes -= end - start + 1;
293                 }
294                 spin_unlock(&root->fs_info->delalloc_lock);
295         }
296         return 0;
297 }
298
299 int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
300                          size_t size, struct bio *bio)
301 {
302         struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
303         struct btrfs_mapping_tree *map_tree;
304         u64 logical = bio->bi_sector << 9;
305         u64 length = 0;
306         u64 map_length;
307         struct bio_vec *bvec;
308         int i;
309         int ret;
310
311         bio_for_each_segment(bvec, bio, i) {
312                 length += bvec->bv_len;
313         }
314         map_tree = &root->fs_info->mapping_tree;
315         map_length = length;
316         ret = btrfs_map_block(map_tree, READ, logical,
317                               &map_length, NULL, 0);
318
319         if (map_length < length + size) {
320                 return 1;
321         }
322         return 0;
323 }
324
325 int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio,
326                           int mirror_num)
327 {
328         struct btrfs_root *root = BTRFS_I(inode)->root;
329         struct btrfs_trans_handle *trans;
330         int ret = 0;
331
332         if (!(rw & (1 << BIO_RW))) {
333                 ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0);
334                 BUG_ON(ret);
335                 goto mapit;
336         }
337
338         if (btrfs_test_opt(root, NODATASUM) ||
339             btrfs_test_flag(inode, NODATASUM)) {
340                 goto mapit;
341         }
342
343         mutex_lock(&root->fs_info->fs_mutex);
344         trans = btrfs_start_transaction(root, 1);
345         btrfs_set_trans_block_group(trans, inode);
346         btrfs_csum_file_blocks(trans, root, inode, bio);
347         ret = btrfs_end_transaction(trans, root);
348         BUG_ON(ret);
349         mutex_unlock(&root->fs_info->fs_mutex);
350 mapit:
351         return btrfs_map_bio(root, rw, bio, mirror_num);
352 }
353
354 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
355 {
356         int ret = 0;
357         struct inode *inode = page->mapping->host;
358         struct btrfs_root *root = BTRFS_I(inode)->root;
359         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
360         struct btrfs_csum_item *item;
361         struct btrfs_path *path = NULL;
362         u32 csum;
363         if (btrfs_test_opt(root, NODATASUM) ||
364             btrfs_test_flag(inode, NODATASUM))
365                 return 0;
366         mutex_lock(&root->fs_info->fs_mutex);
367         path = btrfs_alloc_path();
368         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
369         if (IS_ERR(item)) {
370                 ret = PTR_ERR(item);
371                 /* a csum that isn't present is a preallocated region. */
372                 if (ret == -ENOENT || ret == -EFBIG)
373                         ret = 0;
374                 csum = 0;
375                 printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start);
376                 goto out;
377         }
378         read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
379                            BTRFS_CRC32_SIZE);
380         set_state_private(io_tree, start, csum);
381 out:
382         if (path)
383                 btrfs_free_path(path);
384         mutex_unlock(&root->fs_info->fs_mutex);
385         return ret;
386 }
387
388 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
389                                struct extent_state *state)
390 {
391         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
392         struct inode *inode = page->mapping->host;
393         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
394         char *kaddr;
395         u64 private = ~(u32)0;
396         int ret;
397         struct btrfs_root *root = BTRFS_I(inode)->root;
398         u32 csum = ~(u32)0;
399         unsigned long flags;
400
401         if (btrfs_test_opt(root, NODATASUM) ||
402             btrfs_test_flag(inode, NODATASUM))
403                 return 0;
404         if (state && state->start == start) {
405                 private = state->private;
406                 ret = 0;
407         } else {
408                 ret = get_state_private(io_tree, start, &private);
409         }
410         local_irq_save(flags);
411         kaddr = kmap_atomic(page, KM_IRQ0);
412         if (ret) {
413                 goto zeroit;
414         }
415         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
416         btrfs_csum_final(csum, (char *)&csum);
417         if (csum != private) {
418                 goto zeroit;
419         }
420         kunmap_atomic(kaddr, KM_IRQ0);
421         local_irq_restore(flags);
422         return 0;
423
424 zeroit:
425         printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
426                page->mapping->host->i_ino, (unsigned long long)start, csum,
427                private);
428         memset(kaddr + offset, 1, end - start + 1);
429         flush_dcache_page(page);
430         kunmap_atomic(kaddr, KM_IRQ0);
431         local_irq_restore(flags);
432         return 0;
433 }
434
435 void btrfs_read_locked_inode(struct inode *inode)
436 {
437         struct btrfs_path *path;
438         struct extent_buffer *leaf;
439         struct btrfs_inode_item *inode_item;
440         struct btrfs_timespec *tspec;
441         struct btrfs_root *root = BTRFS_I(inode)->root;
442         struct btrfs_key location;
443         u64 alloc_group_block;
444         u32 rdev;
445         int ret;
446
447         path = btrfs_alloc_path();
448         BUG_ON(!path);
449         mutex_lock(&root->fs_info->fs_mutex);
450         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
451
452         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
453         if (ret)
454                 goto make_bad;
455
456         leaf = path->nodes[0];
457         inode_item = btrfs_item_ptr(leaf, path->slots[0],
458                                     struct btrfs_inode_item);
459
460         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
461         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
462         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
463         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
464         inode->i_size = btrfs_inode_size(leaf, inode_item);
465
466         tspec = btrfs_inode_atime(inode_item);
467         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
468         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
469
470         tspec = btrfs_inode_mtime(inode_item);
471         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
472         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
473
474         tspec = btrfs_inode_ctime(inode_item);
475         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
476         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
477
478         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
479         inode->i_generation = btrfs_inode_generation(leaf, inode_item);
480         inode->i_rdev = 0;
481         rdev = btrfs_inode_rdev(leaf, inode_item);
482
483         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
484         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
485                                                        alloc_group_block);
486         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
487         if (!BTRFS_I(inode)->block_group) {
488                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
489                                                  NULL, 0,
490                                                  BTRFS_BLOCK_GROUP_METADATA, 0);
491         }
492         btrfs_free_path(path);
493         inode_item = NULL;
494
495         mutex_unlock(&root->fs_info->fs_mutex);
496
497         switch (inode->i_mode & S_IFMT) {
498         case S_IFREG:
499                 inode->i_mapping->a_ops = &btrfs_aops;
500                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
501                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
502                 inode->i_fop = &btrfs_file_operations;
503                 inode->i_op = &btrfs_file_inode_operations;
504                 break;
505         case S_IFDIR:
506                 inode->i_fop = &btrfs_dir_file_operations;
507                 if (root == root->fs_info->tree_root)
508                         inode->i_op = &btrfs_dir_ro_inode_operations;
509                 else
510                         inode->i_op = &btrfs_dir_inode_operations;
511                 break;
512         case S_IFLNK:
513                 inode->i_op = &btrfs_symlink_inode_operations;
514                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
515                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
516                 break;
517         default:
518                 init_special_inode(inode, inode->i_mode, rdev);
519                 break;
520         }
521         return;
522
523 make_bad:
524         btrfs_release_path(root, path);
525         btrfs_free_path(path);
526         mutex_unlock(&root->fs_info->fs_mutex);
527         make_bad_inode(inode);
528 }
529
530 static void fill_inode_item(struct extent_buffer *leaf,
531                             struct btrfs_inode_item *item,
532                             struct inode *inode)
533 {
534         btrfs_set_inode_uid(leaf, item, inode->i_uid);
535         btrfs_set_inode_gid(leaf, item, inode->i_gid);
536         btrfs_set_inode_size(leaf, item, inode->i_size);
537         btrfs_set_inode_mode(leaf, item, inode->i_mode);
538         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
539
540         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
541                                inode->i_atime.tv_sec);
542         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
543                                 inode->i_atime.tv_nsec);
544
545         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
546                                inode->i_mtime.tv_sec);
547         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
548                                 inode->i_mtime.tv_nsec);
549
550         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
551                                inode->i_ctime.tv_sec);
552         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
553                                 inode->i_ctime.tv_nsec);
554
555         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
556         btrfs_set_inode_generation(leaf, item, inode->i_generation);
557         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
558         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
559         btrfs_set_inode_block_group(leaf, item,
560                                     BTRFS_I(inode)->block_group->key.objectid);
561 }
562
563 int btrfs_update_inode(struct btrfs_trans_handle *trans,
564                               struct btrfs_root *root,
565                               struct inode *inode)
566 {
567         struct btrfs_inode_item *inode_item;
568         struct btrfs_path *path;
569         struct extent_buffer *leaf;
570         int ret;
571
572         path = btrfs_alloc_path();
573         BUG_ON(!path);
574         ret = btrfs_lookup_inode(trans, root, path,
575                                  &BTRFS_I(inode)->location, 1);
576         if (ret) {
577                 if (ret > 0)
578                         ret = -ENOENT;
579                 goto failed;
580         }
581
582         leaf = path->nodes[0];
583         inode_item = btrfs_item_ptr(leaf, path->slots[0],
584                                   struct btrfs_inode_item);
585
586         fill_inode_item(leaf, inode_item, inode);
587         btrfs_mark_buffer_dirty(leaf);
588         btrfs_set_inode_last_trans(trans, inode);
589         ret = 0;
590 failed:
591         btrfs_release_path(root, path);
592         btrfs_free_path(path);
593         return ret;
594 }
595
596
597 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
598                               struct btrfs_root *root,
599                               struct inode *dir,
600                               struct dentry *dentry)
601 {
602         struct btrfs_path *path;
603         const char *name = dentry->d_name.name;
604         int name_len = dentry->d_name.len;
605         int ret = 0;
606         struct extent_buffer *leaf;
607         struct btrfs_dir_item *di;
608         struct btrfs_key key;
609
610         path = btrfs_alloc_path();
611         if (!path) {
612                 ret = -ENOMEM;
613                 goto err;
614         }
615
616         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
617                                     name, name_len, -1);
618         if (IS_ERR(di)) {
619                 ret = PTR_ERR(di);
620                 goto err;
621         }
622         if (!di) {
623                 ret = -ENOENT;
624                 goto err;
625         }
626         leaf = path->nodes[0];
627         btrfs_dir_item_key_to_cpu(leaf, di, &key);
628         ret = btrfs_delete_one_dir_name(trans, root, path, di);
629         if (ret)
630                 goto err;
631         btrfs_release_path(root, path);
632
633         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
634                                          key.objectid, name, name_len, -1);
635         if (IS_ERR(di)) {
636                 ret = PTR_ERR(di);
637                 goto err;
638         }
639         if (!di) {
640                 ret = -ENOENT;
641                 goto err;
642         }
643         ret = btrfs_delete_one_dir_name(trans, root, path, di);
644
645         dentry->d_inode->i_ctime = dir->i_ctime;
646         ret = btrfs_del_inode_ref(trans, root, name, name_len,
647                                   dentry->d_inode->i_ino,
648                                   dentry->d_parent->d_inode->i_ino);
649         if (ret) {
650                 printk("failed to delete reference to %.*s, "
651                        "inode %lu parent %lu\n", name_len, name,
652                        dentry->d_inode->i_ino,
653                        dentry->d_parent->d_inode->i_ino);
654         }
655 err:
656         btrfs_free_path(path);
657         if (!ret) {
658                 dir->i_size -= name_len * 2;
659                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
660                 btrfs_update_inode(trans, root, dir);
661 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
662                 dentry->d_inode->i_nlink--;
663 #else
664                 drop_nlink(dentry->d_inode);
665 #endif
666                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
667                 dir->i_sb->s_dirt = 1;
668         }
669         return ret;
670 }
671
672 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
673 {
674         struct btrfs_root *root;
675         struct btrfs_trans_handle *trans;
676         struct inode *inode = dentry->d_inode;
677         int ret;
678         unsigned long nr = 0;
679
680         root = BTRFS_I(dir)->root;
681         mutex_lock(&root->fs_info->fs_mutex);
682
683         ret = btrfs_check_free_space(root, 1, 1);
684         if (ret)
685                 goto fail;
686
687         trans = btrfs_start_transaction(root, 1);
688
689         btrfs_set_trans_block_group(trans, dir);
690         ret = btrfs_unlink_trans(trans, root, dir, dentry);
691         nr = trans->blocks_used;
692
693         if (inode->i_nlink == 0) {
694                 int found;
695                 /* if the inode isn't linked anywhere,
696                  * we don't need to worry about
697                  * data=ordered
698                  */
699                 found = btrfs_del_ordered_inode(inode);
700                 if (found == 1) {
701                         atomic_dec(&inode->i_count);
702                 }
703         }
704
705         btrfs_end_transaction(trans, root);
706 fail:
707         mutex_unlock(&root->fs_info->fs_mutex);
708         btrfs_btree_balance_dirty(root, nr);
709         btrfs_throttle(root);
710         return ret;
711 }
712
713 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
714 {
715         struct inode *inode = dentry->d_inode;
716         int err = 0;
717         int ret;
718         struct btrfs_root *root = BTRFS_I(dir)->root;
719         struct btrfs_trans_handle *trans;
720         unsigned long nr = 0;
721
722         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
723                 return -ENOTEMPTY;
724
725         mutex_lock(&root->fs_info->fs_mutex);
726         ret = btrfs_check_free_space(root, 1, 1);
727         if (ret)
728                 goto fail;
729
730         trans = btrfs_start_transaction(root, 1);
731         btrfs_set_trans_block_group(trans, dir);
732
733         /* now the directory is empty */
734         err = btrfs_unlink_trans(trans, root, dir, dentry);
735         if (!err) {
736                 inode->i_size = 0;
737         }
738
739         nr = trans->blocks_used;
740         ret = btrfs_end_transaction(trans, root);
741 fail:
742         mutex_unlock(&root->fs_info->fs_mutex);
743         btrfs_btree_balance_dirty(root, nr);
744         btrfs_throttle(root);
745
746         if (ret && !err)
747                 err = ret;
748         return err;
749 }
750
751 /*
752  * this can truncate away extent items, csum items and directory items.
753  * It starts at a high offset and removes keys until it can't find
754  * any higher than i_size.
755  *
756  * csum items that cross the new i_size are truncated to the new size
757  * as well.
758  */
759 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
760                                    struct btrfs_root *root,
761                                    struct inode *inode,
762                                    u32 min_type)
763 {
764         int ret;
765         struct btrfs_path *path;
766         struct btrfs_key key;
767         struct btrfs_key found_key;
768         u32 found_type;
769         struct extent_buffer *leaf;
770         struct btrfs_file_extent_item *fi;
771         u64 extent_start = 0;
772         u64 extent_num_bytes = 0;
773         u64 item_end = 0;
774         u64 root_gen = 0;
775         u64 root_owner = 0;
776         int found_extent;
777         int del_item;
778         int pending_del_nr = 0;
779         int pending_del_slot = 0;
780         int extent_type = -1;
781
782         btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
783         path = btrfs_alloc_path();
784         path->reada = -1;
785         BUG_ON(!path);
786
787         /* FIXME, add redo link to tree so we don't leak on crash */
788         key.objectid = inode->i_ino;
789         key.offset = (u64)-1;
790         key.type = (u8)-1;
791
792         btrfs_init_path(path);
793 search_again:
794         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
795         if (ret < 0) {
796                 goto error;
797         }
798         if (ret > 0) {
799                 BUG_ON(path->slots[0] == 0);
800                 path->slots[0]--;
801         }
802
803         while(1) {
804                 fi = NULL;
805                 leaf = path->nodes[0];
806                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
807                 found_type = btrfs_key_type(&found_key);
808
809                 if (found_key.objectid != inode->i_ino)
810                         break;
811
812                 if (found_type < min_type)
813                         break;
814
815                 item_end = found_key.offset;
816                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
817                         fi = btrfs_item_ptr(leaf, path->slots[0],
818                                             struct btrfs_file_extent_item);
819                         extent_type = btrfs_file_extent_type(leaf, fi);
820                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
821                                 item_end +=
822                                     btrfs_file_extent_num_bytes(leaf, fi);
823                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
824                                 struct btrfs_item *item = btrfs_item_nr(leaf,
825                                                                 path->slots[0]);
826                                 item_end += btrfs_file_extent_inline_len(leaf,
827                                                                          item);
828                         }
829                         item_end--;
830                 }
831                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
832                         ret = btrfs_csum_truncate(trans, root, path,
833                                                   inode->i_size);
834                         BUG_ON(ret);
835                 }
836                 if (item_end < inode->i_size) {
837                         if (found_type == BTRFS_DIR_ITEM_KEY) {
838                                 found_type = BTRFS_INODE_ITEM_KEY;
839                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
840                                 found_type = BTRFS_CSUM_ITEM_KEY;
841                         } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
842                                 found_type = BTRFS_XATTR_ITEM_KEY;
843                         } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
844                                 found_type = BTRFS_INODE_REF_KEY;
845                         } else if (found_type) {
846                                 found_type--;
847                         } else {
848                                 break;
849                         }
850                         btrfs_set_key_type(&key, found_type);
851                         goto next;
852                 }
853                 if (found_key.offset >= inode->i_size)
854                         del_item = 1;
855                 else
856                         del_item = 0;
857                 found_extent = 0;
858
859                 /* FIXME, shrink the extent if the ref count is only 1 */
860                 if (found_type != BTRFS_EXTENT_DATA_KEY)
861                         goto delete;
862
863                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
864                         u64 num_dec;
865                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
866                         if (!del_item) {
867                                 u64 orig_num_bytes =
868                                         btrfs_file_extent_num_bytes(leaf, fi);
869                                 extent_num_bytes = inode->i_size -
870                                         found_key.offset + root->sectorsize - 1;
871                                 extent_num_bytes = extent_num_bytes &
872                                         ~((u64)root->sectorsize - 1);
873                                 btrfs_set_file_extent_num_bytes(leaf, fi,
874                                                          extent_num_bytes);
875                                 num_dec = (orig_num_bytes -
876                                            extent_num_bytes);
877                                 if (extent_start != 0)
878                                         dec_i_blocks(inode, num_dec);
879                                 btrfs_mark_buffer_dirty(leaf);
880                         } else {
881                                 extent_num_bytes =
882                                         btrfs_file_extent_disk_num_bytes(leaf,
883                                                                          fi);
884                                 /* FIXME blocksize != 4096 */
885                                 num_dec = btrfs_file_extent_num_bytes(leaf, fi);
886                                 if (extent_start != 0) {
887                                         found_extent = 1;
888                                         dec_i_blocks(inode, num_dec);
889                                 }
890                                 root_gen = btrfs_header_generation(leaf);
891                                 root_owner = btrfs_header_owner(leaf);
892                         }
893                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
894                         if (!del_item) {
895                                 u32 newsize = inode->i_size - found_key.offset;
896                                 dec_i_blocks(inode, item_end + 1 -
897                                             found_key.offset - newsize);
898                                 newsize =
899                                     btrfs_file_extent_calc_inline_size(newsize);
900                                 ret = btrfs_truncate_item(trans, root, path,
901                                                           newsize, 1);
902                                 BUG_ON(ret);
903                         } else {
904                                 dec_i_blocks(inode, item_end + 1 -
905                                              found_key.offset);
906                         }
907                 }
908 delete:
909                 if (del_item) {
910                         if (!pending_del_nr) {
911                                 /* no pending yet, add ourselves */
912                                 pending_del_slot = path->slots[0];
913                                 pending_del_nr = 1;
914                         } else if (pending_del_nr &&
915                                    path->slots[0] + 1 == pending_del_slot) {
916                                 /* hop on the pending chunk */
917                                 pending_del_nr++;
918                                 pending_del_slot = path->slots[0];
919                         } else {
920                                 printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
921                         }
922                 } else {
923                         break;
924                 }
925                 if (found_extent) {
926                         ret = btrfs_free_extent(trans, root, extent_start,
927                                                 extent_num_bytes,
928                                                 root_owner,
929                                                 root_gen, inode->i_ino,
930                                                 found_key.offset, 0);
931                         BUG_ON(ret);
932                 }
933 next:
934                 if (path->slots[0] == 0) {
935                         if (pending_del_nr)
936                                 goto del_pending;
937                         btrfs_release_path(root, path);
938                         goto search_again;
939                 }
940
941                 path->slots[0]--;
942                 if (pending_del_nr &&
943                     path->slots[0] + 1 != pending_del_slot) {
944                         struct btrfs_key debug;
945 del_pending:
946                         btrfs_item_key_to_cpu(path->nodes[0], &debug,
947                                               pending_del_slot);
948                         ret = btrfs_del_items(trans, root, path,
949                                               pending_del_slot,
950                                               pending_del_nr);
951                         BUG_ON(ret);
952                         pending_del_nr = 0;
953                         btrfs_release_path(root, path);
954                         goto search_again;
955                 }
956         }
957         ret = 0;
958 error:
959         if (pending_del_nr) {
960                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
961                                       pending_del_nr);
962         }
963         btrfs_release_path(root, path);
964         btrfs_free_path(path);
965         inode->i_sb->s_dirt = 1;
966         return ret;
967 }
968
969 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
970                               size_t zero_start)
971 {
972         char *kaddr;
973         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
974         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
975         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
976         int ret = 0;
977
978         WARN_ON(!PageLocked(page));
979         set_page_extent_mapped(page);
980
981         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
982         set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
983                             page_end, GFP_NOFS);
984
985         if (zero_start != PAGE_CACHE_SIZE) {
986                 kaddr = kmap(page);
987                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
988                 flush_dcache_page(page);
989                 kunmap(page);
990         }
991         set_page_dirty(page);
992         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
993
994         return ret;
995 }
996
997 /*
998  * taken from block_truncate_page, but does cow as it zeros out
999  * any bytes left in the last page in the file.
1000  */
1001 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
1002 {
1003         struct inode *inode = mapping->host;
1004         struct btrfs_root *root = BTRFS_I(inode)->root;
1005         u32 blocksize = root->sectorsize;
1006         pgoff_t index = from >> PAGE_CACHE_SHIFT;
1007         unsigned offset = from & (PAGE_CACHE_SIZE-1);
1008         struct page *page;
1009         int ret = 0;
1010         u64 page_start;
1011
1012         if ((offset & (blocksize - 1)) == 0)
1013                 goto out;
1014
1015         ret = -ENOMEM;
1016         page = grab_cache_page(mapping, index);
1017         if (!page)
1018                 goto out;
1019         if (!PageUptodate(page)) {
1020                 ret = btrfs_readpage(NULL, page);
1021                 lock_page(page);
1022                 if (!PageUptodate(page)) {
1023                         ret = -EIO;
1024                         goto out;
1025                 }
1026         }
1027         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
1028
1029         ret = btrfs_cow_one_page(inode, page, offset);
1030
1031         unlock_page(page);
1032         page_cache_release(page);
1033 out:
1034         return ret;
1035 }
1036
1037 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
1038 {
1039         struct inode *inode = dentry->d_inode;
1040         int err;
1041
1042         err = inode_change_ok(inode, attr);
1043         if (err)
1044                 return err;
1045
1046         if (S_ISREG(inode->i_mode) &&
1047             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
1048                 struct btrfs_trans_handle *trans;
1049                 struct btrfs_root *root = BTRFS_I(inode)->root;
1050                 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1051
1052                 u64 mask = root->sectorsize - 1;
1053                 u64 hole_start = (inode->i_size + mask) & ~mask;
1054                 u64 block_end = (attr->ia_size + mask) & ~mask;
1055                 u64 hole_size;
1056                 u64 alloc_hint = 0;
1057
1058                 if (attr->ia_size <= hole_start)
1059                         goto out;
1060
1061                 mutex_lock(&root->fs_info->fs_mutex);
1062                 err = btrfs_check_free_space(root, 1, 0);
1063                 mutex_unlock(&root->fs_info->fs_mutex);
1064                 if (err)
1065                         goto fail;
1066
1067                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1068
1069                 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1070                 hole_size = block_end - hole_start;
1071
1072                 mutex_lock(&root->fs_info->fs_mutex);
1073                 trans = btrfs_start_transaction(root, 1);
1074                 btrfs_set_trans_block_group(trans, inode);
1075                 err = btrfs_drop_extents(trans, root, inode,
1076                                          hole_start, block_end, hole_start,
1077                                          &alloc_hint);
1078
1079                 if (alloc_hint != EXTENT_MAP_INLINE) {
1080                         err = btrfs_insert_file_extent(trans, root,
1081                                                        inode->i_ino,
1082                                                        hole_start, 0, 0,
1083                                                        hole_size);
1084                         btrfs_drop_extent_cache(inode, hole_start,
1085                                                 hole_size - 1);
1086                         btrfs_check_file(root, inode);
1087                 }
1088                 btrfs_end_transaction(trans, root);
1089                 mutex_unlock(&root->fs_info->fs_mutex);
1090                 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1091                 if (err)
1092                         return err;
1093         }
1094 out:
1095         err = inode_setattr(inode, attr);
1096 fail:
1097         return err;
1098 }
1099
1100 void btrfs_put_inode(struct inode *inode)
1101 {
1102         int ret;
1103
1104         if (!BTRFS_I(inode)->ordered_trans) {
1105                 return;
1106         }
1107
1108         if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
1109             mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1110                 return;
1111
1112         ret = btrfs_del_ordered_inode(inode);
1113         if (ret == 1) {
1114                 atomic_dec(&inode->i_count);
1115         }
1116 }
1117
1118 void btrfs_delete_inode(struct inode *inode)
1119 {
1120         struct btrfs_trans_handle *trans;
1121         struct btrfs_root *root = BTRFS_I(inode)->root;
1122         unsigned long nr;
1123         int ret;
1124
1125         truncate_inode_pages(&inode->i_data, 0);
1126         if (is_bad_inode(inode)) {
1127                 goto no_delete;
1128         }
1129
1130         inode->i_size = 0;
1131         mutex_lock(&root->fs_info->fs_mutex);
1132         trans = btrfs_start_transaction(root, 1);
1133
1134         btrfs_set_trans_block_group(trans, inode);
1135         ret = btrfs_truncate_in_trans(trans, root, inode, 0);
1136         if (ret)
1137                 goto no_delete_lock;
1138
1139         nr = trans->blocks_used;
1140         clear_inode(inode);
1141
1142         btrfs_end_transaction(trans, root);
1143         mutex_unlock(&root->fs_info->fs_mutex);
1144         btrfs_btree_balance_dirty(root, nr);
1145         btrfs_throttle(root);
1146         return;
1147
1148 no_delete_lock:
1149         nr = trans->blocks_used;
1150         btrfs_end_transaction(trans, root);
1151         mutex_unlock(&root->fs_info->fs_mutex);
1152         btrfs_btree_balance_dirty(root, nr);
1153         btrfs_throttle(root);
1154 no_delete:
1155         clear_inode(inode);
1156 }
1157
1158 /*
1159  * this returns the key found in the dir entry in the location pointer.
1160  * If no dir entries were found, location->objectid is 0.
1161  */
1162 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1163                                struct btrfs_key *location)
1164 {
1165         const char *name = dentry->d_name.name;
1166         int namelen = dentry->d_name.len;
1167         struct btrfs_dir_item *di;
1168         struct btrfs_path *path;
1169         struct btrfs_root *root = BTRFS_I(dir)->root;
1170         int ret = 0;
1171
1172         if (namelen == 1 && strcmp(name, ".") == 0) {
1173                 location->objectid = dir->i_ino;
1174                 location->type = BTRFS_INODE_ITEM_KEY;
1175                 location->offset = 0;
1176                 return 0;
1177         }
1178         path = btrfs_alloc_path();
1179         BUG_ON(!path);
1180
1181         if (namelen == 2 && strcmp(name, "..") == 0) {
1182                 struct btrfs_key key;
1183                 struct extent_buffer *leaf;
1184                 u32 nritems;
1185                 int slot;
1186
1187                 key.objectid = dir->i_ino;
1188                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1189                 key.offset = 0;
1190                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1191                 BUG_ON(ret == 0);
1192                 ret = 0;
1193
1194                 leaf = path->nodes[0];
1195                 slot = path->slots[0];
1196                 nritems = btrfs_header_nritems(leaf);
1197                 if (slot >= nritems)
1198                         goto out_err;
1199
1200                 btrfs_item_key_to_cpu(leaf, &key, slot);
1201                 if (key.objectid != dir->i_ino ||
1202                     key.type != BTRFS_INODE_REF_KEY) {
1203                         goto out_err;
1204                 }
1205                 location->objectid = key.offset;
1206                 location->type = BTRFS_INODE_ITEM_KEY;
1207                 location->offset = 0;
1208                 goto out;
1209         }
1210
1211         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1212                                     namelen, 0);
1213         if (IS_ERR(di))
1214                 ret = PTR_ERR(di);
1215         if (!di || IS_ERR(di)) {
1216                 goto out_err;
1217         }
1218         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1219 out:
1220         btrfs_free_path(path);
1221         return ret;
1222 out_err:
1223         location->objectid = 0;
1224         goto out;
1225 }
1226
1227 /*
1228  * when we hit a tree root in a directory, the btrfs part of the inode
1229  * needs to be changed to reflect the root directory of the tree root.  This
1230  * is kind of like crossing a mount point.
1231  */
1232 static int fixup_tree_root_location(struct btrfs_root *root,
1233                              struct btrfs_key *location,
1234                              struct btrfs_root **sub_root,
1235                              struct dentry *dentry)
1236 {
1237         struct btrfs_path *path;
1238         struct btrfs_root_item *ri;
1239
1240         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1241                 return 0;
1242         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1243                 return 0;
1244
1245         path = btrfs_alloc_path();
1246         BUG_ON(!path);
1247         mutex_lock(&root->fs_info->fs_mutex);
1248
1249         *sub_root = btrfs_read_fs_root(root->fs_info, location,
1250                                         dentry->d_name.name,
1251                                         dentry->d_name.len);
1252         if (IS_ERR(*sub_root))
1253                 return PTR_ERR(*sub_root);
1254
1255         ri = &(*sub_root)->root_item;
1256         location->objectid = btrfs_root_dirid(ri);
1257         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1258         location->offset = 0;
1259
1260         btrfs_free_path(path);
1261         mutex_unlock(&root->fs_info->fs_mutex);
1262         return 0;
1263 }
1264
1265 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1266 {
1267         struct btrfs_iget_args *args = p;
1268         inode->i_ino = args->ino;
1269         BTRFS_I(inode)->root = args->root;
1270         BTRFS_I(inode)->delalloc_bytes = 0;
1271         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1272         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1273                              inode->i_mapping, GFP_NOFS);
1274         return 0;
1275 }
1276
1277 static int btrfs_find_actor(struct inode *inode, void *opaque)
1278 {
1279         struct btrfs_iget_args *args = opaque;
1280         return (args->ino == inode->i_ino &&
1281                 args->root == BTRFS_I(inode)->root);
1282 }
1283
1284 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1285                             u64 root_objectid)
1286 {
1287         struct btrfs_iget_args args;
1288         args.ino = objectid;
1289         args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1290
1291         if (!args.root)
1292                 return NULL;
1293
1294         return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1295 }
1296
1297 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1298                                 struct btrfs_root *root)
1299 {
1300         struct inode *inode;
1301         struct btrfs_iget_args args;
1302         args.ino = objectid;
1303         args.root = root;
1304
1305         inode = iget5_locked(s, objectid, btrfs_find_actor,
1306                              btrfs_init_locked_inode,
1307                              (void *)&args);
1308         return inode;
1309 }
1310
1311 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1312                                    struct nameidata *nd)
1313 {
1314         struct inode * inode;
1315         struct btrfs_inode *bi = BTRFS_I(dir);
1316         struct btrfs_root *root = bi->root;
1317         struct btrfs_root *sub_root = root;
1318         struct btrfs_key location;
1319         int ret;
1320
1321         if (dentry->d_name.len > BTRFS_NAME_LEN)
1322                 return ERR_PTR(-ENAMETOOLONG);
1323
1324         mutex_lock(&root->fs_info->fs_mutex);
1325         ret = btrfs_inode_by_name(dir, dentry, &location);
1326         mutex_unlock(&root->fs_info->fs_mutex);
1327
1328         if (ret < 0)
1329                 return ERR_PTR(ret);
1330
1331         inode = NULL;
1332         if (location.objectid) {
1333                 ret = fixup_tree_root_location(root, &location, &sub_root,
1334                                                 dentry);
1335                 if (ret < 0)
1336                         return ERR_PTR(ret);
1337                 if (ret > 0)
1338                         return ERR_PTR(-ENOENT);
1339                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1340                                           sub_root);
1341                 if (!inode)
1342                         return ERR_PTR(-EACCES);
1343                 if (inode->i_state & I_NEW) {
1344                         /* the inode and parent dir are two different roots */
1345                         if (sub_root != root) {
1346                                 igrab(inode);
1347                                 sub_root->inode = inode;
1348                         }
1349                         BTRFS_I(inode)->root = sub_root;
1350                         memcpy(&BTRFS_I(inode)->location, &location,
1351                                sizeof(location));
1352                         btrfs_read_locked_inode(inode);
1353                         unlock_new_inode(inode);
1354                 }
1355         }
1356         return d_splice_alias(inode, dentry);
1357 }
1358
1359 static unsigned char btrfs_filetype_table[] = {
1360         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1361 };
1362
1363 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1364 {
1365         struct inode *inode = filp->f_dentry->d_inode;
1366         struct btrfs_root *root = BTRFS_I(inode)->root;
1367         struct btrfs_item *item;
1368         struct btrfs_dir_item *di;
1369         struct btrfs_key key;
1370         struct btrfs_key found_key;
1371         struct btrfs_path *path;
1372         int ret;
1373         u32 nritems;
1374         struct extent_buffer *leaf;
1375         int slot;
1376         int advance;
1377         unsigned char d_type;
1378         int over = 0;
1379         u32 di_cur;
1380         u32 di_total;
1381         u32 di_len;
1382         int key_type = BTRFS_DIR_INDEX_KEY;
1383         char tmp_name[32];
1384         char *name_ptr;
1385         int name_len;
1386
1387         /* FIXME, use a real flag for deciding about the key type */
1388         if (root->fs_info->tree_root == root)
1389                 key_type = BTRFS_DIR_ITEM_KEY;
1390
1391         /* special case for "." */
1392         if (filp->f_pos == 0) {
1393                 over = filldir(dirent, ".", 1,
1394                                1, inode->i_ino,
1395                                DT_DIR);
1396                 if (over)
1397                         return 0;
1398                 filp->f_pos = 1;
1399         }
1400
1401         mutex_lock(&root->fs_info->fs_mutex);
1402         key.objectid = inode->i_ino;
1403         path = btrfs_alloc_path();
1404         path->reada = 2;
1405
1406         /* special case for .., just use the back ref */
1407         if (filp->f_pos == 1) {
1408                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1409                 key.offset = 0;
1410                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1411                 BUG_ON(ret == 0);
1412                 leaf = path->nodes[0];
1413                 slot = path->slots[0];
1414                 nritems = btrfs_header_nritems(leaf);
1415                 if (slot >= nritems) {
1416                         btrfs_release_path(root, path);
1417                         goto read_dir_items;
1418                 }
1419                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1420                 btrfs_release_path(root, path);
1421                 if (found_key.objectid != key.objectid ||
1422                     found_key.type != BTRFS_INODE_REF_KEY)
1423                         goto read_dir_items;
1424                 over = filldir(dirent, "..", 2,
1425                                2, found_key.offset, DT_DIR);
1426                 if (over)
1427                         goto nopos;
1428                 filp->f_pos = 2;
1429         }
1430
1431 read_dir_items:
1432         btrfs_set_key_type(&key, key_type);
1433         key.offset = filp->f_pos;
1434
1435         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1436         if (ret < 0)
1437                 goto err;
1438         advance = 0;
1439         while(1) {
1440                 leaf = path->nodes[0];
1441                 nritems = btrfs_header_nritems(leaf);
1442                 slot = path->slots[0];
1443                 if (advance || slot >= nritems) {
1444                         if (slot >= nritems -1) {
1445                                 ret = btrfs_next_leaf(root, path);
1446                                 if (ret)
1447                                         break;
1448                                 leaf = path->nodes[0];
1449                                 nritems = btrfs_header_nritems(leaf);
1450                                 slot = path->slots[0];
1451                         } else {
1452                                 slot++;
1453                                 path->slots[0]++;
1454                         }
1455                 }
1456                 advance = 1;
1457                 item = btrfs_item_nr(leaf, slot);
1458                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1459
1460                 if (found_key.objectid != key.objectid)
1461                         break;
1462                 if (btrfs_key_type(&found_key) != key_type)
1463                         break;
1464                 if (found_key.offset < filp->f_pos)
1465                         continue;
1466
1467                 filp->f_pos = found_key.offset;
1468                 advance = 1;
1469                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1470                 di_cur = 0;
1471                 di_total = btrfs_item_size(leaf, item);
1472                 while(di_cur < di_total) {
1473                         struct btrfs_key location;
1474
1475                         name_len = btrfs_dir_name_len(leaf, di);
1476                         if (name_len < 32) {
1477                                 name_ptr = tmp_name;
1478                         } else {
1479                                 name_ptr = kmalloc(name_len, GFP_NOFS);
1480                                 BUG_ON(!name_ptr);
1481                         }
1482                         read_extent_buffer(leaf, name_ptr,
1483                                            (unsigned long)(di + 1), name_len);
1484
1485                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1486                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
1487                         over = filldir(dirent, name_ptr, name_len,
1488                                        found_key.offset,
1489                                        location.objectid,
1490                                        d_type);
1491
1492                         if (name_ptr != tmp_name)
1493                                 kfree(name_ptr);
1494
1495                         if (over)
1496                                 goto nopos;
1497                         di_len = btrfs_dir_name_len(leaf, di) +
1498                                 btrfs_dir_data_len(leaf, di) +sizeof(*di);
1499                         di_cur += di_len;
1500                         di = (struct btrfs_dir_item *)((char *)di + di_len);
1501                 }
1502         }
1503         if (key_type == BTRFS_DIR_INDEX_KEY)
1504                 filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
1505         else
1506                 filp->f_pos++;
1507 nopos:
1508         ret = 0;
1509 err:
1510         btrfs_release_path(root, path);
1511         btrfs_free_path(path);
1512         mutex_unlock(&root->fs_info->fs_mutex);
1513         return ret;
1514 }
1515
1516 int btrfs_write_inode(struct inode *inode, int wait)
1517 {
1518         struct btrfs_root *root = BTRFS_I(inode)->root;
1519         struct btrfs_trans_handle *trans;
1520         int ret = 0;
1521
1522         if (wait) {
1523                 mutex_lock(&root->fs_info->fs_mutex);
1524                 trans = btrfs_start_transaction(root, 1);
1525                 btrfs_set_trans_block_group(trans, inode);
1526                 ret = btrfs_commit_transaction(trans, root);
1527                 mutex_unlock(&root->fs_info->fs_mutex);
1528         }
1529         return ret;
1530 }
1531
1532 /*
1533  * This is somewhat expensive, updating the tree every time the
1534  * inode changes.  But, it is most likely to find the inode in cache.
1535  * FIXME, needs more benchmarking...there are no reasons other than performance
1536  * to keep or drop this code.
1537  */
1538 void btrfs_dirty_inode(struct inode *inode)
1539 {
1540         struct btrfs_root *root = BTRFS_I(inode)->root;
1541         struct btrfs_trans_handle *trans;
1542
1543         mutex_lock(&root->fs_info->fs_mutex);
1544         trans = btrfs_start_transaction(root, 1);
1545         btrfs_set_trans_block_group(trans, inode);
1546         btrfs_update_inode(trans, root, inode);
1547         btrfs_end_transaction(trans, root);
1548         mutex_unlock(&root->fs_info->fs_mutex);
1549 }
1550
1551 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1552                                      struct btrfs_root *root,
1553                                      const char *name, int name_len,
1554                                      u64 ref_objectid,
1555                                      u64 objectid,
1556                                      struct btrfs_block_group_cache *group,
1557                                      int mode)
1558 {
1559         struct inode *inode;
1560         struct btrfs_inode_item *inode_item;
1561         struct btrfs_block_group_cache *new_inode_group;
1562         struct btrfs_key *location;
1563         struct btrfs_path *path;
1564         struct btrfs_inode_ref *ref;
1565         struct btrfs_key key[2];
1566         u32 sizes[2];
1567         unsigned long ptr;
1568         int ret;
1569         int owner;
1570
1571         path = btrfs_alloc_path();
1572         BUG_ON(!path);
1573
1574         inode = new_inode(root->fs_info->sb);
1575         if (!inode)
1576                 return ERR_PTR(-ENOMEM);
1577
1578         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1579         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1580                              inode->i_mapping, GFP_NOFS);
1581         BTRFS_I(inode)->delalloc_bytes = 0;
1582         BTRFS_I(inode)->root = root;
1583
1584         if (mode & S_IFDIR)
1585                 owner = 0;
1586         else
1587                 owner = 1;
1588         new_inode_group = btrfs_find_block_group(root, group, 0,
1589                                        BTRFS_BLOCK_GROUP_METADATA, owner);
1590         if (!new_inode_group) {
1591                 printk("find_block group failed\n");
1592                 new_inode_group = group;
1593         }
1594         BTRFS_I(inode)->block_group = new_inode_group;
1595         BTRFS_I(inode)->flags = 0;
1596
1597         key[0].objectid = objectid;
1598         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
1599         key[0].offset = 0;
1600
1601         key[1].objectid = objectid;
1602         btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
1603         key[1].offset = ref_objectid;
1604
1605         sizes[0] = sizeof(struct btrfs_inode_item);
1606         sizes[1] = name_len + sizeof(*ref);
1607
1608         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
1609         if (ret != 0)
1610                 goto fail;
1611
1612         if (objectid > root->highest_inode)
1613                 root->highest_inode = objectid;
1614
1615         inode->i_uid = current->fsuid;
1616         inode->i_gid = current->fsgid;
1617         inode->i_mode = mode;
1618         inode->i_ino = objectid;
1619         inode->i_blocks = 0;
1620         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1621         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1622                                   struct btrfs_inode_item);
1623         fill_inode_item(path->nodes[0], inode_item, inode);
1624
1625         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1626                              struct btrfs_inode_ref);
1627         btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
1628         ptr = (unsigned long)(ref + 1);
1629         write_extent_buffer(path->nodes[0], name, ptr, name_len);
1630
1631         btrfs_mark_buffer_dirty(path->nodes[0]);
1632         btrfs_free_path(path);
1633
1634         location = &BTRFS_I(inode)->location;
1635         location->objectid = objectid;
1636         location->offset = 0;
1637         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1638
1639         insert_inode_hash(inode);
1640         return inode;
1641 fail:
1642         btrfs_free_path(path);
1643         return ERR_PTR(ret);
1644 }
1645
1646 static inline u8 btrfs_inode_type(struct inode *inode)
1647 {
1648         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1649 }
1650
1651 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1652                             struct dentry *dentry, struct inode *inode,
1653                             int add_backref)
1654 {
1655         int ret;
1656         struct btrfs_key key;
1657         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1658         struct inode *parent_inode;
1659
1660         key.objectid = inode->i_ino;
1661         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1662         key.offset = 0;
1663
1664         ret = btrfs_insert_dir_item(trans, root,
1665                                     dentry->d_name.name, dentry->d_name.len,
1666                                     dentry->d_parent->d_inode->i_ino,
1667                                     &key, btrfs_inode_type(inode));
1668         if (ret == 0) {
1669                 if (add_backref) {
1670                         ret = btrfs_insert_inode_ref(trans, root,
1671                                              dentry->d_name.name,
1672                                              dentry->d_name.len,
1673                                              inode->i_ino,
1674                                              dentry->d_parent->d_inode->i_ino);
1675                 }
1676                 parent_inode = dentry->d_parent->d_inode;
1677                 parent_inode->i_size += dentry->d_name.len * 2;
1678                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1679                 ret = btrfs_update_inode(trans, root,
1680                                          dentry->d_parent->d_inode);
1681         }
1682         return ret;
1683 }
1684
1685 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1686                             struct dentry *dentry, struct inode *inode,
1687                             int backref)
1688 {
1689         int err = btrfs_add_link(trans, dentry, inode, backref);
1690         if (!err) {
1691                 d_instantiate(dentry, inode);
1692                 return 0;
1693         }
1694         if (err > 0)
1695                 err = -EEXIST;
1696         return err;
1697 }
1698
1699 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1700                         int mode, dev_t rdev)
1701 {
1702         struct btrfs_trans_handle *trans;
1703         struct btrfs_root *root = BTRFS_I(dir)->root;
1704         struct inode *inode = NULL;
1705         int err;
1706         int drop_inode = 0;
1707         u64 objectid;
1708         unsigned long nr = 0;
1709
1710         if (!new_valid_dev(rdev))
1711                 return -EINVAL;
1712
1713         mutex_lock(&root->fs_info->fs_mutex);
1714         err = btrfs_check_free_space(root, 1, 0);
1715         if (err)
1716                 goto fail;
1717
1718         trans = btrfs_start_transaction(root, 1);
1719         btrfs_set_trans_block_group(trans, dir);
1720
1721         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1722         if (err) {
1723                 err = -ENOSPC;
1724                 goto out_unlock;
1725         }
1726
1727         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1728                                 dentry->d_name.len,
1729                                 dentry->d_parent->d_inode->i_ino, objectid,
1730                                 BTRFS_I(dir)->block_group, mode);
1731         err = PTR_ERR(inode);
1732         if (IS_ERR(inode))
1733                 goto out_unlock;
1734
1735         btrfs_set_trans_block_group(trans, inode);
1736         err = btrfs_add_nondir(trans, dentry, inode, 0);
1737         if (err)
1738                 drop_inode = 1;
1739         else {
1740                 inode->i_op = &btrfs_special_inode_operations;
1741                 init_special_inode(inode, inode->i_mode, rdev);
1742                 btrfs_update_inode(trans, root, inode);
1743         }
1744         dir->i_sb->s_dirt = 1;
1745         btrfs_update_inode_block_group(trans, inode);
1746         btrfs_update_inode_block_group(trans, dir);
1747 out_unlock:
1748         nr = trans->blocks_used;
1749         btrfs_end_transaction(trans, root);
1750 fail:
1751         mutex_unlock(&root->fs_info->fs_mutex);
1752
1753         if (drop_inode) {
1754                 inode_dec_link_count(inode);
1755                 iput(inode);
1756         }
1757         btrfs_btree_balance_dirty(root, nr);
1758         btrfs_throttle(root);
1759         return err;
1760 }
1761
1762 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1763                         int mode, struct nameidata *nd)
1764 {
1765         struct btrfs_trans_handle *trans;
1766         struct btrfs_root *root = BTRFS_I(dir)->root;
1767         struct inode *inode = NULL;
1768         int err;
1769         int drop_inode = 0;
1770         unsigned long nr = 0;
1771         u64 objectid;
1772
1773         mutex_lock(&root->fs_info->fs_mutex);
1774         err = btrfs_check_free_space(root, 1, 0);
1775         if (err)
1776                 goto fail;
1777         trans = btrfs_start_transaction(root, 1);
1778         btrfs_set_trans_block_group(trans, dir);
1779
1780         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1781         if (err) {
1782                 err = -ENOSPC;
1783                 goto out_unlock;
1784         }
1785
1786         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1787                                 dentry->d_name.len,
1788                                 dentry->d_parent->d_inode->i_ino,
1789                                 objectid, BTRFS_I(dir)->block_group, mode);
1790         err = PTR_ERR(inode);
1791         if (IS_ERR(inode))
1792                 goto out_unlock;
1793
1794         btrfs_set_trans_block_group(trans, inode);
1795         err = btrfs_add_nondir(trans, dentry, inode, 0);
1796         if (err)
1797                 drop_inode = 1;
1798         else {
1799                 inode->i_mapping->a_ops = &btrfs_aops;
1800                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
1801                 inode->i_fop = &btrfs_file_operations;
1802                 inode->i_op = &btrfs_file_inode_operations;
1803                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1804                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1805                                      inode->i_mapping, GFP_NOFS);
1806                 BTRFS_I(inode)->delalloc_bytes = 0;
1807                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1808         }
1809         dir->i_sb->s_dirt = 1;
1810         btrfs_update_inode_block_group(trans, inode);
1811         btrfs_update_inode_block_group(trans, dir);
1812 out_unlock:
1813         nr = trans->blocks_used;
1814         btrfs_end_transaction(trans, root);
1815 fail:
1816         mutex_unlock(&root->fs_info->fs_mutex);
1817
1818         if (drop_inode) {
1819                 inode_dec_link_count(inode);
1820                 iput(inode);
1821         }
1822         btrfs_btree_balance_dirty(root, nr);
1823         btrfs_throttle(root);
1824         return err;
1825 }
1826
1827 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1828                       struct dentry *dentry)
1829 {
1830         struct btrfs_trans_handle *trans;
1831         struct btrfs_root *root = BTRFS_I(dir)->root;
1832         struct inode *inode = old_dentry->d_inode;
1833         unsigned long nr = 0;
1834         int err;
1835         int drop_inode = 0;
1836
1837         if (inode->i_nlink == 0)
1838                 return -ENOENT;
1839
1840 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1841         inode->i_nlink++;
1842 #else
1843         inc_nlink(inode);
1844 #endif
1845         mutex_lock(&root->fs_info->fs_mutex);
1846         err = btrfs_check_free_space(root, 1, 0);
1847         if (err)
1848                 goto fail;
1849         trans = btrfs_start_transaction(root, 1);
1850
1851         btrfs_set_trans_block_group(trans, dir);
1852         atomic_inc(&inode->i_count);
1853         err = btrfs_add_nondir(trans, dentry, inode, 1);
1854
1855         if (err)
1856                 drop_inode = 1;
1857
1858         dir->i_sb->s_dirt = 1;
1859         btrfs_update_inode_block_group(trans, dir);
1860         err = btrfs_update_inode(trans, root, inode);
1861
1862         if (err)
1863                 drop_inode = 1;
1864
1865         nr = trans->blocks_used;
1866         btrfs_end_transaction(trans, root);
1867 fail:
1868         mutex_unlock(&root->fs_info->fs_mutex);
1869
1870         if (drop_inode) {
1871                 inode_dec_link_count(inode);
1872                 iput(inode);
1873         }
1874         btrfs_btree_balance_dirty(root, nr);
1875         btrfs_throttle(root);
1876         return err;
1877 }
1878
1879 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1880 {
1881         struct inode *inode;
1882         struct btrfs_trans_handle *trans;
1883         struct btrfs_root *root = BTRFS_I(dir)->root;
1884         int err = 0;
1885         int drop_on_err = 0;
1886         u64 objectid;
1887         unsigned long nr = 1;
1888
1889         mutex_lock(&root->fs_info->fs_mutex);
1890         err = btrfs_check_free_space(root, 1, 0);
1891         if (err)
1892                 goto out_unlock;
1893
1894         trans = btrfs_start_transaction(root, 1);
1895         btrfs_set_trans_block_group(trans, dir);
1896
1897         if (IS_ERR(trans)) {
1898                 err = PTR_ERR(trans);
1899                 goto out_unlock;
1900         }
1901
1902         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1903         if (err) {
1904                 err = -ENOSPC;
1905                 goto out_unlock;
1906         }
1907
1908         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1909                                 dentry->d_name.len,
1910                                 dentry->d_parent->d_inode->i_ino, objectid,
1911                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1912         if (IS_ERR(inode)) {
1913                 err = PTR_ERR(inode);
1914                 goto out_fail;
1915         }
1916
1917         drop_on_err = 1;
1918         inode->i_op = &btrfs_dir_inode_operations;
1919         inode->i_fop = &btrfs_dir_file_operations;
1920         btrfs_set_trans_block_group(trans, inode);
1921
1922         inode->i_size = 0;
1923         err = btrfs_update_inode(trans, root, inode);
1924         if (err)
1925                 goto out_fail;
1926
1927         err = btrfs_add_link(trans, dentry, inode, 0);
1928         if (err)
1929                 goto out_fail;
1930
1931         d_instantiate(dentry, inode);
1932         drop_on_err = 0;
1933         dir->i_sb->s_dirt = 1;
1934         btrfs_update_inode_block_group(trans, inode);
1935         btrfs_update_inode_block_group(trans, dir);
1936
1937 out_fail:
1938         nr = trans->blocks_used;
1939         btrfs_end_transaction(trans, root);
1940
1941 out_unlock:
1942         mutex_unlock(&root->fs_info->fs_mutex);
1943         if (drop_on_err)
1944                 iput(inode);
1945         btrfs_btree_balance_dirty(root, nr);
1946         btrfs_throttle(root);
1947         return err;
1948 }
1949
1950 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
1951                                     size_t pg_offset, u64 start, u64 len,
1952                                     int create)
1953 {
1954         int ret;
1955         int err = 0;
1956         u64 bytenr;
1957         u64 extent_start = 0;
1958         u64 extent_end = 0;
1959         u64 objectid = inode->i_ino;
1960         u32 found_type;
1961         struct btrfs_path *path;
1962         struct btrfs_root *root = BTRFS_I(inode)->root;
1963         struct btrfs_file_extent_item *item;
1964         struct extent_buffer *leaf;
1965         struct btrfs_key found_key;
1966         struct extent_map *em = NULL;
1967         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1968         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1969         struct btrfs_trans_handle *trans = NULL;
1970
1971         path = btrfs_alloc_path();
1972         BUG_ON(!path);
1973         mutex_lock(&root->fs_info->fs_mutex);
1974
1975 again:
1976         spin_lock(&em_tree->lock);
1977         em = lookup_extent_mapping(em_tree, start, len);
1978         spin_unlock(&em_tree->lock);
1979
1980         if (em) {
1981                 if (em->start > start) {
1982                         printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n",
1983                                start, len, em->start, em->len);
1984                         WARN_ON(1);
1985                 }
1986                 if (em->block_start == EXTENT_MAP_INLINE && page)
1987                         free_extent_map(em);
1988                 else
1989                         goto out;
1990         }
1991         em = alloc_extent_map(GFP_NOFS);
1992         if (!em) {
1993                 err = -ENOMEM;
1994                 goto out;
1995         }
1996
1997         em->start = EXTENT_MAP_HOLE;
1998         em->len = (u64)-1;
1999         em->bdev = inode->i_sb->s_bdev;
2000         ret = btrfs_lookup_file_extent(trans, root, path,
2001                                        objectid, start, trans != NULL);
2002         if (ret < 0) {
2003                 err = ret;
2004                 goto out;
2005         }
2006
2007         if (ret != 0) {
2008                 if (path->slots[0] == 0)
2009                         goto not_found;
2010                 path->slots[0]--;
2011         }
2012
2013         leaf = path->nodes[0];
2014         item = btrfs_item_ptr(leaf, path->slots[0],
2015                               struct btrfs_file_extent_item);
2016         /* are we inside the extent that was found? */
2017         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
2018         found_type = btrfs_key_type(&found_key);
2019         if (found_key.objectid != objectid ||
2020             found_type != BTRFS_EXTENT_DATA_KEY) {
2021                 goto not_found;
2022         }
2023
2024         found_type = btrfs_file_extent_type(leaf, item);
2025         extent_start = found_key.offset;
2026         if (found_type == BTRFS_FILE_EXTENT_REG) {
2027                 extent_end = extent_start +
2028                        btrfs_file_extent_num_bytes(leaf, item);
2029                 err = 0;
2030                 if (start < extent_start || start >= extent_end) {
2031                         em->start = start;
2032                         if (start < extent_start) {
2033                                 if (start + len <= extent_start)
2034                                         goto not_found;
2035                                 em->len = extent_end - extent_start;
2036                         } else {
2037                                 em->len = len;
2038                         }
2039                         goto not_found_em;
2040                 }
2041                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
2042                 if (bytenr == 0) {
2043                         em->start = extent_start;
2044                         em->len = extent_end - extent_start;
2045                         em->block_start = EXTENT_MAP_HOLE;
2046                         goto insert;
2047                 }
2048                 bytenr += btrfs_file_extent_offset(leaf, item);
2049                 em->block_start = bytenr;
2050                 em->start = extent_start;
2051                 em->len = extent_end - extent_start;
2052                 goto insert;
2053         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
2054                 u64 page_start;
2055                 unsigned long ptr;
2056                 char *map;
2057                 size_t size;
2058                 size_t extent_offset;
2059                 size_t copy_size;
2060
2061                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
2062                                                     path->slots[0]));
2063                 extent_end = (extent_start + size + root->sectorsize - 1) &
2064                         ~((u64)root->sectorsize - 1);
2065                 if (start < extent_start || start >= extent_end) {
2066                         em->start = start;
2067                         if (start < extent_start) {
2068                                 if (start + len <= extent_start)
2069                                         goto not_found;
2070                                 em->len = extent_end - extent_start;
2071                         } else {
2072                                 em->len = len;
2073                         }
2074                         goto not_found_em;
2075                 }
2076                 em->block_start = EXTENT_MAP_INLINE;
2077
2078                 if (!page) {
2079                         em->start = extent_start;
2080                         em->len = size;
2081                         goto out;
2082                 }
2083
2084                 page_start = page_offset(page) + pg_offset;
2085                 extent_offset = page_start - extent_start;
2086                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
2087                                 size - extent_offset);
2088                 em->start = extent_start + extent_offset;
2089                 em->len = (copy_size + root->sectorsize - 1) &
2090                         ~((u64)root->sectorsize - 1);
2091                 map = kmap(page);
2092                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
2093                 if (create == 0 && !PageUptodate(page)) {
2094                         read_extent_buffer(leaf, map + pg_offset, ptr,
2095                                            copy_size);
2096                         flush_dcache_page(page);
2097                 } else if (create && PageUptodate(page)) {
2098                         if (!trans) {
2099                                 kunmap(page);
2100                                 free_extent_map(em);
2101                                 em = NULL;
2102                                 btrfs_release_path(root, path);
2103                                 trans = btrfs_start_transaction(root, 1);
2104                                 goto again;
2105                         }
2106                         write_extent_buffer(leaf, map + pg_offset, ptr,
2107                                             copy_size);
2108                         btrfs_mark_buffer_dirty(leaf);
2109                 }
2110                 kunmap(page);
2111                 set_extent_uptodate(io_tree, em->start,
2112                                     extent_map_end(em) - 1, GFP_NOFS);
2113                 goto insert;
2114         } else {
2115                 printk("unkknown found_type %d\n", found_type);
2116                 WARN_ON(1);
2117         }
2118 not_found:
2119         em->start = start;
2120         em->len = len;
2121 not_found_em:
2122         em->block_start = EXTENT_MAP_HOLE;
2123 insert:
2124         btrfs_release_path(root, path);
2125         if (em->start > start || extent_map_end(em) <= start) {
2126                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
2127                 err = -EIO;
2128                 goto out;
2129         }
2130
2131         err = 0;
2132         spin_lock(&em_tree->lock);
2133         ret = add_extent_mapping(em_tree, em);
2134         if (ret == -EEXIST) {
2135                 free_extent_map(em);
2136                 em = lookup_extent_mapping(em_tree, start, len);
2137                 if (!em) {
2138                         err = -EIO;
2139                         printk("failing to insert %Lu %Lu\n", start, len);
2140                 }
2141         }
2142         spin_unlock(&em_tree->lock);
2143 out:
2144         btrfs_free_path(path);
2145         if (trans) {
2146                 ret = btrfs_end_transaction(trans, root);
2147                 if (!err)
2148                         err = ret;
2149         }
2150         mutex_unlock(&root->fs_info->fs_mutex);
2151         if (err) {
2152                 free_extent_map(em);
2153                 WARN_ON(1);
2154                 return ERR_PTR(err);
2155         }
2156         return em;
2157 }
2158
2159 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
2160 {
2161         return extent_bmap(mapping, iblock, btrfs_get_extent);
2162 }
2163
2164 int btrfs_readpage(struct file *file, struct page *page)
2165 {
2166         struct extent_io_tree *tree;
2167         tree = &BTRFS_I(page->mapping->host)->io_tree;
2168         return extent_read_full_page(tree, page, btrfs_get_extent);
2169 }
2170
2171 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
2172 {
2173         struct extent_io_tree *tree;
2174
2175
2176         if (current->flags & PF_MEMALLOC) {
2177                 redirty_page_for_writepage(wbc, page);
2178                 unlock_page(page);
2179                 return 0;
2180         }
2181         tree = &BTRFS_I(page->mapping->host)->io_tree;
2182         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2183 }
2184
2185 static int btrfs_writepages(struct address_space *mapping,
2186                             struct writeback_control *wbc)
2187 {
2188         struct extent_io_tree *tree;
2189         tree = &BTRFS_I(mapping->host)->io_tree;
2190         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2191 }
2192
2193 static int
2194 btrfs_readpages(struct file *file, struct address_space *mapping,
2195                 struct list_head *pages, unsigned nr_pages)
2196 {
2197         struct extent_io_tree *tree;
2198         tree = &BTRFS_I(mapping->host)->io_tree;
2199         return extent_readpages(tree, mapping, pages, nr_pages,
2200                                 btrfs_get_extent);
2201 }
2202
2203 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2204 {
2205         struct extent_io_tree *tree;
2206         struct extent_map_tree *map;
2207         int ret;
2208
2209         tree = &BTRFS_I(page->mapping->host)->io_tree;
2210         map = &BTRFS_I(page->mapping->host)->extent_tree;
2211         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
2212         if (ret == 1) {
2213                 ClearPagePrivate(page);
2214                 set_page_private(page, 0);
2215                 page_cache_release(page);
2216         }
2217         return ret;
2218 }
2219
2220 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2221 {
2222         struct extent_io_tree *tree;
2223
2224         tree = &BTRFS_I(page->mapping->host)->io_tree;
2225         extent_invalidatepage(tree, page, offset);
2226         btrfs_releasepage(page, GFP_NOFS);
2227 }
2228
2229 /*
2230  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2231  * called from a page fault handler when a page is first dirtied. Hence we must
2232  * be careful to check for EOF conditions here. We set the page up correctly
2233  * for a written page which means we get ENOSPC checking when writing into
2234  * holes and correct delalloc and unwritten extent mapping on filesystems that
2235  * support these features.
2236  *
2237  * We are not allowed to take the i_mutex here so we have to play games to
2238  * protect against truncate races as the page could now be beyond EOF.  Because
2239  * vmtruncate() writes the inode size before removing pages, once we have the
2240  * page lock we can determine safely if the page is beyond EOF. If it is not
2241  * beyond EOF, then the page is guaranteed safe against truncation until we
2242  * unlock the page.
2243  */
2244 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2245 {
2246         struct inode *inode = fdentry(vma->vm_file)->d_inode;
2247         struct btrfs_root *root = BTRFS_I(inode)->root;
2248         unsigned long end;
2249         loff_t size;
2250         int ret;
2251         u64 page_start;
2252
2253         mutex_lock(&root->fs_info->fs_mutex);
2254         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2255         mutex_unlock(&root->fs_info->fs_mutex);
2256         if (ret)
2257                 goto out;
2258
2259         ret = -EINVAL;
2260
2261         lock_page(page);
2262         wait_on_page_writeback(page);
2263         size = i_size_read(inode);
2264         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2265
2266         if ((page->mapping != inode->i_mapping) ||
2267             (page_start > size)) {
2268                 /* page got truncated out from underneath us */
2269                 goto out_unlock;
2270         }
2271
2272         /* page is wholly or partially inside EOF */
2273         if (page_start + PAGE_CACHE_SIZE > size)
2274                 end = size & ~PAGE_CACHE_MASK;
2275         else
2276                 end = PAGE_CACHE_SIZE;
2277
2278         ret = btrfs_cow_one_page(inode, page, end);
2279
2280 out_unlock:
2281         unlock_page(page);
2282 out:
2283         return ret;
2284 }
2285
2286 static void btrfs_truncate(struct inode *inode)
2287 {
2288         struct btrfs_root *root = BTRFS_I(inode)->root;
2289         int ret;
2290         struct btrfs_trans_handle *trans;
2291         unsigned long nr;
2292
2293         if (!S_ISREG(inode->i_mode))
2294                 return;
2295         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2296                 return;
2297
2298         btrfs_truncate_page(inode->i_mapping, inode->i_size);
2299
2300         mutex_lock(&root->fs_info->fs_mutex);
2301         trans = btrfs_start_transaction(root, 1);
2302         btrfs_set_trans_block_group(trans, inode);
2303
2304         /* FIXME, add redo link to tree so we don't leak on crash */
2305         ret = btrfs_truncate_in_trans(trans, root, inode,
2306                                       BTRFS_EXTENT_DATA_KEY);
2307         btrfs_update_inode(trans, root, inode);
2308         nr = trans->blocks_used;
2309
2310         ret = btrfs_end_transaction(trans, root);
2311         BUG_ON(ret);
2312         mutex_unlock(&root->fs_info->fs_mutex);
2313         btrfs_btree_balance_dirty(root, nr);
2314         btrfs_throttle(root);
2315 }
2316
2317 static int noinline create_subvol(struct btrfs_root *root, char *name,
2318                                   int namelen)
2319 {
2320         struct btrfs_trans_handle *trans;
2321         struct btrfs_key key;
2322         struct btrfs_root_item root_item;
2323         struct btrfs_inode_item *inode_item;
2324         struct extent_buffer *leaf;
2325         struct btrfs_root *new_root = root;
2326         struct inode *inode;
2327         struct inode *dir;
2328         int ret;
2329         int err;
2330         u64 objectid;
2331         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2332         unsigned long nr = 1;
2333
2334         mutex_lock(&root->fs_info->fs_mutex);
2335         ret = btrfs_check_free_space(root, 1, 0);
2336         if (ret)
2337                 goto fail_commit;
2338
2339         trans = btrfs_start_transaction(root, 1);
2340         BUG_ON(!trans);
2341
2342         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2343                                        0, &objectid);
2344         if (ret)
2345                 goto fail;
2346
2347         leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2348                                         objectid, trans->transid, 0, 0,
2349                                         0, 0);
2350         if (IS_ERR(leaf))
2351                 return PTR_ERR(leaf);
2352
2353         btrfs_set_header_nritems(leaf, 0);
2354         btrfs_set_header_level(leaf, 0);
2355         btrfs_set_header_bytenr(leaf, leaf->start);
2356         btrfs_set_header_generation(leaf, trans->transid);
2357         btrfs_set_header_owner(leaf, objectid);
2358
2359         write_extent_buffer(leaf, root->fs_info->fsid,
2360                             (unsigned long)btrfs_header_fsid(leaf),
2361                             BTRFS_FSID_SIZE);
2362         btrfs_mark_buffer_dirty(leaf);
2363
2364         inode_item = &root_item.inode;
2365         memset(inode_item, 0, sizeof(*inode_item));
2366         inode_item->generation = cpu_to_le64(1);
2367         inode_item->size = cpu_to_le64(3);
2368         inode_item->nlink = cpu_to_le32(1);
2369         inode_item->nblocks = cpu_to_le64(1);
2370         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2371
2372         btrfs_set_root_bytenr(&root_item, leaf->start);
2373         btrfs_set_root_level(&root_item, 0);
2374         btrfs_set_root_refs(&root_item, 1);
2375         btrfs_set_root_used(&root_item, 0);
2376
2377         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2378         root_item.drop_level = 0;
2379
2380         free_extent_buffer(leaf);
2381         leaf = NULL;
2382
2383         btrfs_set_root_dirid(&root_item, new_dirid);
2384
2385         key.objectid = objectid;
2386         key.offset = 1;
2387         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2388         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2389                                 &root_item);
2390         if (ret)
2391                 goto fail;
2392
2393         /*
2394          * insert the directory item
2395          */
2396         key.offset = (u64)-1;
2397         dir = root->fs_info->sb->s_root->d_inode;
2398         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2399                                     name, namelen, dir->i_ino, &key,
2400                                     BTRFS_FT_DIR);
2401         if (ret)
2402                 goto fail;
2403
2404         ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2405                              name, namelen, objectid,
2406                              root->fs_info->sb->s_root->d_inode->i_ino);
2407         if (ret)
2408                 goto fail;
2409
2410         ret = btrfs_commit_transaction(trans, root);
2411         if (ret)
2412                 goto fail_commit;
2413
2414         new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2415         BUG_ON(!new_root);
2416
2417         trans = btrfs_start_transaction(new_root, 1);
2418         BUG_ON(!trans);
2419
2420         inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid,
2421                                 new_dirid,
2422                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2423         if (IS_ERR(inode))
2424                 goto fail;
2425         inode->i_op = &btrfs_dir_inode_operations;
2426         inode->i_fop = &btrfs_dir_file_operations;
2427         new_root->inode = inode;
2428
2429         ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2430                                      new_dirid);
2431         inode->i_nlink = 1;
2432         inode->i_size = 0;
2433         ret = btrfs_update_inode(trans, new_root, inode);
2434         if (ret)
2435                 goto fail;
2436 fail:
2437         nr = trans->blocks_used;
2438         err = btrfs_commit_transaction(trans, new_root);
2439         if (err && !ret)
2440                 ret = err;
2441 fail_commit:
2442         mutex_unlock(&root->fs_info->fs_mutex);
2443         btrfs_btree_balance_dirty(root, nr);
2444         btrfs_throttle(root);
2445         return ret;
2446 }
2447
2448 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2449 {
2450         struct btrfs_pending_snapshot *pending_snapshot;
2451         struct btrfs_trans_handle *trans;
2452         int ret;
2453         int err;
2454         unsigned long nr = 0;
2455
2456         if (!root->ref_cows)
2457                 return -EINVAL;
2458
2459         mutex_lock(&root->fs_info->fs_mutex);
2460         ret = btrfs_check_free_space(root, 1, 0);
2461         if (ret)
2462                 goto fail_unlock;
2463
2464         pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2465         if (!pending_snapshot) {
2466                 ret = -ENOMEM;
2467                 goto fail_unlock;
2468         }
2469         pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
2470         if (!pending_snapshot->name) {
2471                 ret = -ENOMEM;
2472                 kfree(pending_snapshot);
2473                 goto fail_unlock;
2474         }
2475         memcpy(pending_snapshot->name, name, namelen);
2476         pending_snapshot->name[namelen] = '\0';
2477         trans = btrfs_start_transaction(root, 1);
2478         BUG_ON(!trans);
2479         pending_snapshot->root = root;
2480         list_add(&pending_snapshot->list,
2481                  &trans->transaction->pending_snapshots);
2482         ret = btrfs_update_inode(trans, root, root->inode);
2483         err = btrfs_commit_transaction(trans, root);
2484
2485 fail_unlock:
2486         mutex_unlock(&root->fs_info->fs_mutex);
2487         btrfs_btree_balance_dirty(root, nr);
2488         btrfs_throttle(root);
2489         return ret;
2490 }
2491
2492 unsigned long btrfs_force_ra(struct address_space *mapping,
2493                               struct file_ra_state *ra, struct file *file,
2494                               pgoff_t offset, pgoff_t last_index)
2495 {
2496         pgoff_t req_size;
2497
2498 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2499         req_size = last_index - offset + 1;
2500         offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2501         return offset;
2502 #else
2503         req_size = min(last_index - offset + 1, (pgoff_t)128);
2504         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2505         return offset + req_size;
2506 #endif
2507 }
2508
2509 int btrfs_defrag_file(struct file *file) {
2510         struct inode *inode = fdentry(file)->d_inode;
2511         struct btrfs_root *root = BTRFS_I(inode)->root;
2512         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2513         struct page *page;
2514         unsigned long last_index;
2515         unsigned long ra_index = 0;
2516         u64 page_start;
2517         u64 page_end;
2518         unsigned long i;
2519         int ret;
2520
2521         mutex_lock(&root->fs_info->fs_mutex);
2522         ret = btrfs_check_free_space(root, inode->i_size, 0);
2523         mutex_unlock(&root->fs_info->fs_mutex);
2524         if (ret)
2525                 return -ENOSPC;
2526
2527         mutex_lock(&inode->i_mutex);
2528         last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2529         for (i = 0; i <= last_index; i++) {
2530                 if (i == ra_index) {
2531                         ra_index = btrfs_force_ra(inode->i_mapping,
2532                                                   &file->f_ra,
2533                                                   file, ra_index, last_index);
2534                 }
2535                 page = grab_cache_page(inode->i_mapping, i);
2536                 if (!page)
2537                         goto out_unlock;
2538                 if (!PageUptodate(page)) {
2539                         btrfs_readpage(NULL, page);
2540                         lock_page(page);
2541                         if (!PageUptodate(page)) {
2542                                 unlock_page(page);
2543                                 page_cache_release(page);
2544                                 goto out_unlock;
2545                         }
2546                 }
2547                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2548                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2549
2550                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2551                 set_extent_delalloc(io_tree, page_start,
2552                                     page_end, GFP_NOFS);
2553
2554                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2555                 set_page_dirty(page);
2556                 unlock_page(page);
2557                 page_cache_release(page);
2558                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2559         }
2560
2561 out_unlock:
2562         mutex_unlock(&inode->i_mutex);
2563         return 0;
2564 }
2565
2566 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2567 {
2568         u64 new_size;
2569         u64 old_size;
2570         struct btrfs_ioctl_vol_args *vol_args;
2571         struct btrfs_trans_handle *trans;
2572         char *sizestr;
2573         int ret = 0;
2574         int namelen;
2575         int mod = 0;
2576
2577         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2578
2579         if (!vol_args)
2580                 return -ENOMEM;
2581
2582         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2583                 ret = -EFAULT;
2584                 goto out;
2585         }
2586         namelen = strlen(vol_args->name);
2587         if (namelen > BTRFS_VOL_NAME_MAX) {
2588                 ret = -EINVAL;
2589                 goto out;
2590         }
2591
2592         sizestr = vol_args->name;
2593         if (!strcmp(sizestr, "max"))
2594                 new_size = root->fs_info->sb->s_bdev->bd_inode->i_size;
2595         else {
2596                 if (sizestr[0] == '-') {
2597                         mod = -1;
2598                         sizestr++;
2599                 } else if (sizestr[0] == '+') {
2600                         mod = 1;
2601                         sizestr++;
2602                 }
2603                 new_size = btrfs_parse_size(sizestr);
2604                 if (new_size == 0) {
2605                         ret = -EINVAL;
2606                         goto out;
2607                 }
2608         }
2609
2610         mutex_lock(&root->fs_info->fs_mutex);
2611         old_size = btrfs_super_total_bytes(&root->fs_info->super_copy);
2612
2613         if (mod < 0) {
2614                 if (new_size > old_size) {
2615                         ret = -EINVAL;
2616                         goto out_unlock;
2617                 }
2618                 new_size = old_size - new_size;
2619         } else if (mod > 0) {
2620                 new_size = old_size + new_size;
2621         }
2622
2623         if (new_size < 256 * 1024 * 1024) {
2624                 ret = -EINVAL;
2625                 goto out_unlock;
2626         }
2627         if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) {
2628                 ret = -EFBIG;
2629                 goto out_unlock;
2630         }
2631
2632         do_div(new_size, root->sectorsize);
2633         new_size *= root->sectorsize;
2634
2635 printk("new size is %Lu\n", new_size);
2636         if (new_size > old_size) {
2637                 trans = btrfs_start_transaction(root, 1);
2638                 ret = btrfs_grow_extent_tree(trans, root, new_size);
2639                 btrfs_commit_transaction(trans, root);
2640         } else {
2641                 ret = btrfs_shrink_extent_tree(root, new_size);
2642         }
2643
2644 out_unlock:
2645         mutex_unlock(&root->fs_info->fs_mutex);
2646 out:
2647         kfree(vol_args);
2648         return ret;
2649 }
2650
2651 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
2652                                             void __user *arg)
2653 {
2654         struct btrfs_ioctl_vol_args *vol_args;
2655         struct btrfs_dir_item *di;
2656         struct btrfs_path *path;
2657         u64 root_dirid;
2658         int namelen;
2659         int ret;
2660
2661         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2662
2663         if (!vol_args)
2664                 return -ENOMEM;
2665
2666         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2667                 ret = -EFAULT;
2668                 goto out;
2669         }
2670
2671         namelen = strlen(vol_args->name);
2672         if (namelen > BTRFS_VOL_NAME_MAX) {
2673                 ret = -EINVAL;
2674                 goto out;
2675         }
2676         if (strchr(vol_args->name, '/')) {
2677                 ret = -EINVAL;
2678                 goto out;
2679         }
2680
2681         path = btrfs_alloc_path();
2682         if (!path) {
2683                 ret = -ENOMEM;
2684                 goto out;
2685         }
2686
2687         root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2688         mutex_lock(&root->fs_info->fs_mutex);
2689         di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2690                             path, root_dirid,
2691                             vol_args->name, namelen, 0);
2692         mutex_unlock(&root->fs_info->fs_mutex);
2693         btrfs_free_path(path);
2694
2695         if (di && !IS_ERR(di)) {
2696                 ret = -EEXIST;
2697                 goto out;
2698         }
2699
2700         if (IS_ERR(di)) {
2701                 ret = PTR_ERR(di);
2702                 goto out;
2703         }
2704
2705         if (root == root->fs_info->tree_root)
2706                 ret = create_subvol(root, vol_args->name, namelen);
2707         else
2708                 ret = create_snapshot(root, vol_args->name, namelen);
2709 out:
2710         kfree(vol_args);
2711         return ret;
2712 }
2713
2714 static int btrfs_ioctl_defrag(struct file *file)
2715 {
2716         struct inode *inode = fdentry(file)->d_inode;
2717         struct btrfs_root *root = BTRFS_I(inode)->root;
2718
2719         switch (inode->i_mode & S_IFMT) {
2720         case S_IFDIR:
2721                 mutex_lock(&root->fs_info->fs_mutex);
2722                 btrfs_defrag_root(root, 0);
2723                 btrfs_defrag_root(root->fs_info->extent_root, 0);
2724                 mutex_unlock(&root->fs_info->fs_mutex);
2725                 break;
2726         case S_IFREG:
2727                 btrfs_defrag_file(file);
2728                 break;
2729         }
2730
2731         return 0;
2732 }
2733
2734 long btrfs_ioctl(struct file *file, unsigned int
2735                 cmd, unsigned long arg)
2736 {
2737         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
2738
2739         switch (cmd) {
2740         case BTRFS_IOC_SNAP_CREATE:
2741                 return btrfs_ioctl_snap_create(root, (void __user *)arg);
2742         case BTRFS_IOC_DEFRAG:
2743                 return btrfs_ioctl_defrag(file);
2744         case BTRFS_IOC_RESIZE:
2745                 return btrfs_ioctl_resize(root, (void __user *)arg);
2746         }
2747
2748         return -ENOTTY;
2749 }
2750
2751 /*
2752  * Called inside transaction, so use GFP_NOFS
2753  */
2754 struct inode *btrfs_alloc_inode(struct super_block *sb)
2755 {
2756         struct btrfs_inode *ei;
2757
2758         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2759         if (!ei)
2760                 return NULL;
2761         ei->last_trans = 0;
2762         ei->ordered_trans = 0;
2763         return &ei->vfs_inode;
2764 }
2765
2766 void btrfs_destroy_inode(struct inode *inode)
2767 {
2768         WARN_ON(!list_empty(&inode->i_dentry));
2769         WARN_ON(inode->i_data.nrpages);
2770
2771         btrfs_drop_extent_cache(inode, 0, (u64)-1);
2772         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2773 }
2774
2775 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2776 static void init_once(struct kmem_cache * cachep, void *foo)
2777 #else
2778 static void init_once(void * foo, struct kmem_cache * cachep,
2779                       unsigned long flags)
2780 #endif
2781 {
2782         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2783
2784         inode_init_once(&ei->vfs_inode);
2785 }
2786
2787 void btrfs_destroy_cachep(void)
2788 {
2789         if (btrfs_inode_cachep)
2790                 kmem_cache_destroy(btrfs_inode_cachep);
2791         if (btrfs_trans_handle_cachep)
2792                 kmem_cache_destroy(btrfs_trans_handle_cachep);
2793         if (btrfs_transaction_cachep)
2794                 kmem_cache_destroy(btrfs_transaction_cachep);
2795         if (btrfs_bit_radix_cachep)
2796                 kmem_cache_destroy(btrfs_bit_radix_cachep);
2797         if (btrfs_path_cachep)
2798                 kmem_cache_destroy(btrfs_path_cachep);
2799 }
2800
2801 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
2802                                        unsigned long extra_flags,
2803 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2804                                        void (*ctor)(struct kmem_cache *, void *)
2805 #else
2806                                        void (*ctor)(void *, struct kmem_cache *,
2807                                                     unsigned long)
2808 #endif
2809                                      )
2810 {
2811         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
2812                                  SLAB_MEM_SPREAD | extra_flags), ctor
2813 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2814                                  ,NULL
2815 #endif
2816                                 );
2817 }
2818
2819 int btrfs_init_cachep(void)
2820 {
2821         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
2822                                           sizeof(struct btrfs_inode),
2823                                           0, init_once);
2824         if (!btrfs_inode_cachep)
2825                 goto fail;
2826         btrfs_trans_handle_cachep =
2827                         btrfs_cache_create("btrfs_trans_handle_cache",
2828                                            sizeof(struct btrfs_trans_handle),
2829                                            0, NULL);
2830         if (!btrfs_trans_handle_cachep)
2831                 goto fail;
2832         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
2833                                              sizeof(struct btrfs_transaction),
2834                                              0, NULL);
2835         if (!btrfs_transaction_cachep)
2836                 goto fail;
2837         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
2838                                          sizeof(struct btrfs_path),
2839                                          0, NULL);
2840         if (!btrfs_path_cachep)
2841                 goto fail;
2842         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
2843                                               SLAB_DESTROY_BY_RCU, NULL);
2844         if (!btrfs_bit_radix_cachep)
2845                 goto fail;
2846         return 0;
2847 fail:
2848         btrfs_destroy_cachep();
2849         return -ENOMEM;
2850 }
2851
2852 static int btrfs_getattr(struct vfsmount *mnt,
2853                          struct dentry *dentry, struct kstat *stat)
2854 {
2855         struct inode *inode = dentry->d_inode;
2856         generic_fillattr(inode, stat);
2857         stat->blksize = PAGE_CACHE_SIZE;
2858         stat->blocks = inode->i_blocks + (BTRFS_I(inode)->delalloc_bytes >> 9);
2859         return 0;
2860 }
2861
2862 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2863                            struct inode * new_dir,struct dentry *new_dentry)
2864 {
2865         struct btrfs_trans_handle *trans;
2866         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2867         struct inode *new_inode = new_dentry->d_inode;
2868         struct inode *old_inode = old_dentry->d_inode;
2869         struct timespec ctime = CURRENT_TIME;
2870         struct btrfs_path *path;
2871         int ret;
2872
2873         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2874             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2875                 return -ENOTEMPTY;
2876         }
2877
2878         mutex_lock(&root->fs_info->fs_mutex);
2879         ret = btrfs_check_free_space(root, 1, 0);
2880         if (ret)
2881                 goto out_unlock;
2882
2883         trans = btrfs_start_transaction(root, 1);
2884
2885         btrfs_set_trans_block_group(trans, new_dir);
2886         path = btrfs_alloc_path();
2887         if (!path) {
2888                 ret = -ENOMEM;
2889                 goto out_fail;
2890         }
2891
2892         old_dentry->d_inode->i_nlink++;
2893         old_dir->i_ctime = old_dir->i_mtime = ctime;
2894         new_dir->i_ctime = new_dir->i_mtime = ctime;
2895         old_inode->i_ctime = ctime;
2896
2897         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2898         if (ret)
2899                 goto out_fail;
2900
2901         if (new_inode) {
2902                 new_inode->i_ctime = CURRENT_TIME;
2903                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2904                 if (ret)
2905                         goto out_fail;
2906         }
2907         ret = btrfs_add_link(trans, new_dentry, old_inode, 1);
2908         if (ret)
2909                 goto out_fail;
2910
2911 out_fail:
2912         btrfs_free_path(path);
2913         btrfs_end_transaction(trans, root);
2914 out_unlock:
2915         mutex_unlock(&root->fs_info->fs_mutex);
2916         return ret;
2917 }
2918
2919 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2920                          const char *symname)
2921 {
2922         struct btrfs_trans_handle *trans;
2923         struct btrfs_root *root = BTRFS_I(dir)->root;
2924         struct btrfs_path *path;
2925         struct btrfs_key key;
2926         struct inode *inode = NULL;
2927         int err;
2928         int drop_inode = 0;
2929         u64 objectid;
2930         int name_len;
2931         int datasize;
2932         unsigned long ptr;
2933         struct btrfs_file_extent_item *ei;
2934         struct extent_buffer *leaf;
2935         unsigned long nr = 0;
2936
2937         name_len = strlen(symname) + 1;
2938         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2939                 return -ENAMETOOLONG;
2940
2941         mutex_lock(&root->fs_info->fs_mutex);
2942         err = btrfs_check_free_space(root, 1, 0);
2943         if (err)
2944                 goto out_fail;
2945
2946         trans = btrfs_start_transaction(root, 1);
2947         btrfs_set_trans_block_group(trans, dir);
2948
2949         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2950         if (err) {
2951                 err = -ENOSPC;
2952                 goto out_unlock;
2953         }
2954
2955         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
2956                                 dentry->d_name.len,
2957                                 dentry->d_parent->d_inode->i_ino, objectid,
2958                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2959         err = PTR_ERR(inode);
2960         if (IS_ERR(inode))
2961                 goto out_unlock;
2962
2963         btrfs_set_trans_block_group(trans, inode);
2964         err = btrfs_add_nondir(trans, dentry, inode, 0);
2965         if (err)
2966                 drop_inode = 1;
2967         else {
2968                 inode->i_mapping->a_ops = &btrfs_aops;
2969                 inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
2970                 inode->i_fop = &btrfs_file_operations;
2971                 inode->i_op = &btrfs_file_inode_operations;
2972                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
2973                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
2974                                      inode->i_mapping, GFP_NOFS);
2975                 BTRFS_I(inode)->delalloc_bytes = 0;
2976                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
2977         }
2978         dir->i_sb->s_dirt = 1;
2979         btrfs_update_inode_block_group(trans, inode);
2980         btrfs_update_inode_block_group(trans, dir);
2981         if (drop_inode)
2982                 goto out_unlock;
2983
2984         path = btrfs_alloc_path();
2985         BUG_ON(!path);
2986         key.objectid = inode->i_ino;
2987         key.offset = 0;
2988         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2989         datasize = btrfs_file_extent_calc_inline_size(name_len);
2990         err = btrfs_insert_empty_item(trans, root, path, &key,
2991                                       datasize);
2992         if (err) {
2993                 drop_inode = 1;
2994                 goto out_unlock;
2995         }
2996         leaf = path->nodes[0];
2997         ei = btrfs_item_ptr(leaf, path->slots[0],
2998                             struct btrfs_file_extent_item);
2999         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
3000         btrfs_set_file_extent_type(leaf, ei,
3001                                    BTRFS_FILE_EXTENT_INLINE);
3002         ptr = btrfs_file_extent_inline_start(ei);
3003         write_extent_buffer(leaf, symname, ptr, name_len);
3004         btrfs_mark_buffer_dirty(leaf);
3005         btrfs_free_path(path);
3006
3007         inode->i_op = &btrfs_symlink_inode_operations;
3008         inode->i_mapping->a_ops = &btrfs_symlink_aops;
3009         inode->i_mapping->backing_dev_info = &root->fs_info->bdi;
3010         inode->i_size = name_len - 1;
3011         err = btrfs_update_inode(trans, root, inode);
3012         if (err)
3013                 drop_inode = 1;
3014
3015 out_unlock:
3016         nr = trans->blocks_used;
3017         btrfs_end_transaction(trans, root);
3018 out_fail:
3019         mutex_unlock(&root->fs_info->fs_mutex);
3020         if (drop_inode) {
3021                 inode_dec_link_count(inode);
3022                 iput(inode);
3023         }
3024         btrfs_btree_balance_dirty(root, nr);
3025         btrfs_throttle(root);
3026         return err;
3027 }
3028 static int btrfs_permission(struct inode *inode, int mask,
3029                             struct nameidata *nd)
3030 {
3031         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
3032                 return -EACCES;
3033         return generic_permission(inode, mask, NULL);
3034 }
3035
3036 static struct inode_operations btrfs_dir_inode_operations = {
3037         .lookup         = btrfs_lookup,
3038         .create         = btrfs_create,
3039         .unlink         = btrfs_unlink,
3040         .link           = btrfs_link,
3041         .mkdir          = btrfs_mkdir,
3042         .rmdir          = btrfs_rmdir,
3043         .rename         = btrfs_rename,
3044         .symlink        = btrfs_symlink,
3045         .setattr        = btrfs_setattr,
3046         .mknod          = btrfs_mknod,
3047         .setxattr       = generic_setxattr,
3048         .getxattr       = generic_getxattr,
3049         .listxattr      = btrfs_listxattr,
3050         .removexattr    = generic_removexattr,
3051         .permission     = btrfs_permission,
3052 };
3053 static struct inode_operations btrfs_dir_ro_inode_operations = {
3054         .lookup         = btrfs_lookup,
3055         .permission     = btrfs_permission,
3056 };
3057 static struct file_operations btrfs_dir_file_operations = {
3058         .llseek         = generic_file_llseek,
3059         .read           = generic_read_dir,
3060         .readdir        = btrfs_readdir,
3061         .unlocked_ioctl = btrfs_ioctl,
3062 #ifdef CONFIG_COMPAT
3063         .compat_ioctl   = btrfs_ioctl,
3064 #endif
3065 };
3066
3067 static struct extent_io_ops btrfs_extent_io_ops = {
3068         .fill_delalloc = run_delalloc_range,
3069         .submit_bio_hook = btrfs_submit_bio_hook,
3070         .merge_bio_hook = btrfs_merge_bio_hook,
3071         .readpage_io_hook = btrfs_readpage_io_hook,
3072         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3073         .set_bit_hook = btrfs_set_bit_hook,
3074         .clear_bit_hook = btrfs_clear_bit_hook,
3075 };
3076
3077 static struct address_space_operations btrfs_aops = {
3078         .readpage       = btrfs_readpage,
3079         .writepage      = btrfs_writepage,
3080         .writepages     = btrfs_writepages,
3081         .readpages      = btrfs_readpages,
3082         .sync_page      = block_sync_page,
3083         .bmap           = btrfs_bmap,
3084         .invalidatepage = btrfs_invalidatepage,
3085         .releasepage    = btrfs_releasepage,
3086         .set_page_dirty = __set_page_dirty_nobuffers,
3087 };
3088
3089 static struct address_space_operations btrfs_symlink_aops = {
3090         .readpage       = btrfs_readpage,
3091         .writepage      = btrfs_writepage,
3092         .invalidatepage = btrfs_invalidatepage,
3093         .releasepage    = btrfs_releasepage,
3094 };
3095
3096 static struct inode_operations btrfs_file_inode_operations = {
3097         .truncate       = btrfs_truncate,
3098         .getattr        = btrfs_getattr,
3099         .setattr        = btrfs_setattr,
3100         .setxattr       = generic_setxattr,
3101         .getxattr       = generic_getxattr,
3102         .listxattr      = btrfs_listxattr,
3103         .removexattr    = generic_removexattr,
3104         .permission     = btrfs_permission,
3105 };
3106 static struct inode_operations btrfs_special_inode_operations = {
3107         .getattr        = btrfs_getattr,
3108         .setattr        = btrfs_setattr,
3109         .permission     = btrfs_permission,
3110 };
3111 static struct inode_operations btrfs_symlink_inode_operations = {
3112         .readlink       = generic_readlink,
3113         .follow_link    = page_follow_link_light,
3114         .put_link       = page_put_link,
3115         .permission     = btrfs_permission,
3116 };