]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/btrfs/inode.c
Btrfs: Fix typo in extent_io.c
[karo-tx-linux.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/buffer_head.h>
20 #include <linux/fs.h>
21 #include <linux/pagemap.h>
22 #include <linux/highmem.h>
23 #include <linux/time.h>
24 #include <linux/init.h>
25 #include <linux/string.h>
26 #include <linux/smp_lock.h>
27 #include <linux/backing-dev.h>
28 #include <linux/mpage.h>
29 #include <linux/swap.h>
30 #include <linux/writeback.h>
31 #include <linux/statfs.h>
32 #include <linux/compat.h>
33 #include <linux/bit_spinlock.h>
34 #include <linux/version.h>
35 #include <linux/xattr.h>
36 #include "ctree.h"
37 #include "disk-io.h"
38 #include "transaction.h"
39 #include "btrfs_inode.h"
40 #include "ioctl.h"
41 #include "print-tree.h"
42
43 struct btrfs_iget_args {
44         u64 ino;
45         struct btrfs_root *root;
46 };
47
48 static struct inode_operations btrfs_dir_inode_operations;
49 static struct inode_operations btrfs_symlink_inode_operations;
50 static struct inode_operations btrfs_dir_ro_inode_operations;
51 static struct inode_operations btrfs_special_inode_operations;
52 static struct inode_operations btrfs_file_inode_operations;
53 static struct address_space_operations btrfs_aops;
54 static struct address_space_operations btrfs_symlink_aops;
55 static struct file_operations btrfs_dir_file_operations;
56 static struct extent_io_ops btrfs_extent_io_ops;
57
58 static struct kmem_cache *btrfs_inode_cachep;
59 struct kmem_cache *btrfs_trans_handle_cachep;
60 struct kmem_cache *btrfs_transaction_cachep;
61 struct kmem_cache *btrfs_bit_radix_cachep;
62 struct kmem_cache *btrfs_path_cachep;
63
64 #define S_SHIFT 12
65 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
66         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
67         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
68         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
69         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
70         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
71         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
72         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
73 };
74
75 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
76                            int for_del)
77 {
78         u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
79         u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
80         u64 thresh;
81         int ret = 0;
82
83         if (for_del)
84                 thresh = total * 90;
85         else
86                 thresh = total * 85;
87
88         do_div(thresh, 100);
89
90         spin_lock(&root->fs_info->delalloc_lock);
91         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
92                 ret = -ENOSPC;
93         spin_unlock(&root->fs_info->delalloc_lock);
94         return ret;
95 }
96
97 static int cow_file_range(struct inode *inode, u64 start, u64 end)
98 {
99         struct btrfs_root *root = BTRFS_I(inode)->root;
100         struct btrfs_trans_handle *trans;
101         u64 alloc_hint = 0;
102         u64 num_bytes;
103         u64 cur_alloc_size;
104         u64 blocksize = root->sectorsize;
105         u64 orig_start = start;
106         u64 orig_num_bytes;
107         struct btrfs_key ins;
108         int ret;
109
110         trans = btrfs_start_transaction(root, 1);
111         BUG_ON(!trans);
112         btrfs_set_trans_block_group(trans, inode);
113
114         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
115         num_bytes = max(blocksize,  num_bytes);
116         ret = btrfs_drop_extents(trans, root, inode,
117                                  start, start + num_bytes, start, &alloc_hint);
118         orig_num_bytes = num_bytes;
119
120         if (alloc_hint == EXTENT_MAP_INLINE)
121                 goto out;
122
123         while(num_bytes > 0) {
124                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
125                 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
126                                          root->root_key.objectid,
127                                          trans->transid,
128                                          inode->i_ino, start, 0,
129                                          alloc_hint, (u64)-1, &ins, 1);
130                 if (ret) {
131                         WARN_ON(1);
132                         goto out;
133                 }
134                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
135                                                start, ins.objectid, ins.offset,
136                                                ins.offset);
137                 btrfs_check_file(root, inode);
138                 num_bytes -= cur_alloc_size;
139                 alloc_hint = ins.objectid + ins.offset;
140                 start += cur_alloc_size;
141         }
142         btrfs_drop_extent_cache(inode, orig_start,
143                                 orig_start + orig_num_bytes - 1);
144         btrfs_add_ordered_inode(inode);
145 out:
146         btrfs_end_transaction(trans, root);
147         return ret;
148 }
149
150 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
151 {
152         u64 extent_start;
153         u64 extent_end;
154         u64 bytenr;
155         u64 cow_end;
156         u64 loops = 0;
157         u64 total_fs_bytes;
158         struct btrfs_root *root = BTRFS_I(inode)->root;
159         struct extent_buffer *leaf;
160         int found_type;
161         struct btrfs_path *path;
162         struct btrfs_file_extent_item *item;
163         int ret;
164         int err;
165         struct btrfs_key found_key;
166
167         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
168         path = btrfs_alloc_path();
169         BUG_ON(!path);
170 again:
171         ret = btrfs_lookup_file_extent(NULL, root, path,
172                                        inode->i_ino, start, 0);
173         if (ret < 0) {
174                 btrfs_free_path(path);
175                 return ret;
176         }
177
178         cow_end = end;
179         if (ret != 0) {
180                 if (path->slots[0] == 0)
181                         goto not_found;
182                 path->slots[0]--;
183         }
184
185         leaf = path->nodes[0];
186         item = btrfs_item_ptr(leaf, path->slots[0],
187                               struct btrfs_file_extent_item);
188
189         /* are we inside the extent that was found? */
190         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
191         found_type = btrfs_key_type(&found_key);
192         if (found_key.objectid != inode->i_ino ||
193             found_type != BTRFS_EXTENT_DATA_KEY) {
194                 goto not_found;
195         }
196
197         found_type = btrfs_file_extent_type(leaf, item);
198         extent_start = found_key.offset;
199         if (found_type == BTRFS_FILE_EXTENT_REG) {
200                 u64 extent_num_bytes;
201
202                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
203                 extent_end = extent_start + extent_num_bytes;
204                 err = 0;
205
206                 if (loops && start != extent_start)
207                         goto not_found;
208
209                 if (start < extent_start || start >= extent_end)
210                         goto not_found;
211
212                 cow_end = min(end, extent_end - 1);
213                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
214                 if (bytenr == 0)
215                         goto not_found;
216
217                 /*
218                  * we may be called by the resizer, make sure we're inside
219                  * the limits of the FS
220                  */
221                 if (bytenr + extent_num_bytes > total_fs_bytes)
222                         goto not_found;
223
224                 if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) {
225                         goto not_found;
226                 }
227
228                 start = extent_end;
229         } else {
230                 goto not_found;
231         }
232 loop:
233         if (start > end) {
234                 btrfs_free_path(path);
235                 return 0;
236         }
237         btrfs_release_path(root, path);
238         loops++;
239         goto again;
240
241 not_found:
242         cow_file_range(inode, start, cow_end);
243         start = cow_end + 1;
244         goto loop;
245 }
246
247 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
248 {
249         struct btrfs_root *root = BTRFS_I(inode)->root;
250         int ret;
251         mutex_lock(&root->fs_info->fs_mutex);
252         if (btrfs_test_opt(root, NODATACOW) ||
253             btrfs_test_flag(inode, NODATACOW))
254                 ret = run_delalloc_nocow(inode, start, end);
255         else
256                 ret = cow_file_range(inode, start, end);
257
258         mutex_unlock(&root->fs_info->fs_mutex);
259         return ret;
260 }
261
262 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
263                        unsigned long old, unsigned long bits)
264 {
265         if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
266                 struct btrfs_root *root = BTRFS_I(inode)->root;
267                 spin_lock(&root->fs_info->delalloc_lock);
268                 root->fs_info->delalloc_bytes += end - start + 1;
269                 spin_unlock(&root->fs_info->delalloc_lock);
270         }
271         return 0;
272 }
273
274 int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
275                          unsigned long old, unsigned long bits)
276 {
277         if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
278                 struct btrfs_root *root = BTRFS_I(inode)->root;
279                 spin_lock(&root->fs_info->delalloc_lock);
280                 if (end - start + 1 > root->fs_info->delalloc_bytes) {
281                         printk("warning: delalloc account %Lu %Lu\n",
282                                end - start + 1, root->fs_info->delalloc_bytes);
283                         root->fs_info->delalloc_bytes = 0;
284                 } else {
285                         root->fs_info->delalloc_bytes -= end - start + 1;
286                 }
287                 spin_unlock(&root->fs_info->delalloc_lock);
288         }
289         return 0;
290 }
291
292 int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end)
293 {
294         struct inode *inode = page->mapping->host;
295         struct btrfs_root *root = BTRFS_I(inode)->root;
296         struct btrfs_trans_handle *trans;
297         char *kaddr;
298         int ret = 0;
299         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
300         size_t offset = start - page_start;
301         if (btrfs_test_opt(root, NODATASUM) ||
302             btrfs_test_flag(inode, NODATASUM))
303                 return 0;
304         mutex_lock(&root->fs_info->fs_mutex);
305         trans = btrfs_start_transaction(root, 1);
306         btrfs_set_trans_block_group(trans, inode);
307         kaddr = kmap(page);
308         btrfs_csum_file_block(trans, root, inode, inode->i_ino,
309                               start, kaddr + offset, end - start + 1);
310         kunmap(page);
311         ret = btrfs_end_transaction(trans, root);
312         BUG_ON(ret);
313         mutex_unlock(&root->fs_info->fs_mutex);
314         return ret;
315 }
316
317 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
318 {
319         int ret = 0;
320         struct inode *inode = page->mapping->host;
321         struct btrfs_root *root = BTRFS_I(inode)->root;
322         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
323         struct btrfs_csum_item *item;
324         struct btrfs_path *path = NULL;
325         u32 csum;
326         if (btrfs_test_opt(root, NODATASUM) ||
327             btrfs_test_flag(inode, NODATASUM))
328                 return 0;
329         mutex_lock(&root->fs_info->fs_mutex);
330         path = btrfs_alloc_path();
331         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
332         if (IS_ERR(item)) {
333                 ret = PTR_ERR(item);
334                 /* a csum that isn't present is a preallocated region. */
335                 if (ret == -ENOENT || ret == -EFBIG)
336                         ret = 0;
337                 csum = 0;
338                 printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start);
339                 goto out;
340         }
341         read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
342                            BTRFS_CRC32_SIZE);
343         set_state_private(io_tree, start, csum);
344 out:
345         if (path)
346                 btrfs_free_path(path);
347         mutex_unlock(&root->fs_info->fs_mutex);
348         return ret;
349 }
350
351 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
352                                struct extent_state *state)
353 {
354         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
355         struct inode *inode = page->mapping->host;
356         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
357         char *kaddr;
358         u64 private = ~(u32)0;
359         int ret;
360         struct btrfs_root *root = BTRFS_I(inode)->root;
361         u32 csum = ~(u32)0;
362         unsigned long flags;
363
364         if (btrfs_test_opt(root, NODATASUM) ||
365             btrfs_test_flag(inode, NODATASUM))
366                 return 0;
367         if (state && state->start == start) {
368                 private = state->private;
369                 ret = 0;
370         } else {
371                 ret = get_state_private(io_tree, start, &private);
372         }
373         local_irq_save(flags);
374         kaddr = kmap_atomic(page, KM_IRQ0);
375         if (ret) {
376                 goto zeroit;
377         }
378         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
379         btrfs_csum_final(csum, (char *)&csum);
380         if (csum != private) {
381                 goto zeroit;
382         }
383         kunmap_atomic(kaddr, KM_IRQ0);
384         local_irq_restore(flags);
385         return 0;
386
387 zeroit:
388         printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
389                page->mapping->host->i_ino, (unsigned long long)start, csum,
390                private);
391         memset(kaddr + offset, 1, end - start + 1);
392         flush_dcache_page(page);
393         kunmap_atomic(kaddr, KM_IRQ0);
394         local_irq_restore(flags);
395         return 0;
396 }
397
398 void btrfs_read_locked_inode(struct inode *inode)
399 {
400         struct btrfs_path *path;
401         struct extent_buffer *leaf;
402         struct btrfs_inode_item *inode_item;
403         struct btrfs_inode_timespec *tspec;
404         struct btrfs_root *root = BTRFS_I(inode)->root;
405         struct btrfs_key location;
406         u64 alloc_group_block;
407         u32 rdev;
408         int ret;
409
410         path = btrfs_alloc_path();
411         BUG_ON(!path);
412         mutex_lock(&root->fs_info->fs_mutex);
413         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
414
415         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
416         if (ret)
417                 goto make_bad;
418
419         leaf = path->nodes[0];
420         inode_item = btrfs_item_ptr(leaf, path->slots[0],
421                                     struct btrfs_inode_item);
422
423         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
424         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
425         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
426         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
427         inode->i_size = btrfs_inode_size(leaf, inode_item);
428
429         tspec = btrfs_inode_atime(inode_item);
430         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
431         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
432
433         tspec = btrfs_inode_mtime(inode_item);
434         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
435         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
436
437         tspec = btrfs_inode_ctime(inode_item);
438         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
439         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
440
441         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
442         inode->i_generation = btrfs_inode_generation(leaf, inode_item);
443         inode->i_rdev = 0;
444         rdev = btrfs_inode_rdev(leaf, inode_item);
445
446         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
447         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
448                                                        alloc_group_block);
449         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
450         if (!BTRFS_I(inode)->block_group) {
451                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
452                                                          NULL, 0, 0, 0);
453         }
454         btrfs_free_path(path);
455         inode_item = NULL;
456
457         mutex_unlock(&root->fs_info->fs_mutex);
458
459         switch (inode->i_mode & S_IFMT) {
460         case S_IFREG:
461                 inode->i_mapping->a_ops = &btrfs_aops;
462                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
463                 inode->i_fop = &btrfs_file_operations;
464                 inode->i_op = &btrfs_file_inode_operations;
465                 break;
466         case S_IFDIR:
467                 inode->i_fop = &btrfs_dir_file_operations;
468                 if (root == root->fs_info->tree_root)
469                         inode->i_op = &btrfs_dir_ro_inode_operations;
470                 else
471                         inode->i_op = &btrfs_dir_inode_operations;
472                 break;
473         case S_IFLNK:
474                 inode->i_op = &btrfs_symlink_inode_operations;
475                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
476                 break;
477         default:
478                 init_special_inode(inode, inode->i_mode, rdev);
479                 break;
480         }
481         return;
482
483 make_bad:
484         btrfs_release_path(root, path);
485         btrfs_free_path(path);
486         mutex_unlock(&root->fs_info->fs_mutex);
487         make_bad_inode(inode);
488 }
489
490 static void fill_inode_item(struct extent_buffer *leaf,
491                             struct btrfs_inode_item *item,
492                             struct inode *inode)
493 {
494         btrfs_set_inode_uid(leaf, item, inode->i_uid);
495         btrfs_set_inode_gid(leaf, item, inode->i_gid);
496         btrfs_set_inode_size(leaf, item, inode->i_size);
497         btrfs_set_inode_mode(leaf, item, inode->i_mode);
498         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
499
500         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
501                                inode->i_atime.tv_sec);
502         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
503                                 inode->i_atime.tv_nsec);
504
505         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
506                                inode->i_mtime.tv_sec);
507         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
508                                 inode->i_mtime.tv_nsec);
509
510         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
511                                inode->i_ctime.tv_sec);
512         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
513                                 inode->i_ctime.tv_nsec);
514
515         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
516         btrfs_set_inode_generation(leaf, item, inode->i_generation);
517         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
518         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
519         btrfs_set_inode_block_group(leaf, item,
520                                     BTRFS_I(inode)->block_group->key.objectid);
521 }
522
523 int btrfs_update_inode(struct btrfs_trans_handle *trans,
524                               struct btrfs_root *root,
525                               struct inode *inode)
526 {
527         struct btrfs_inode_item *inode_item;
528         struct btrfs_path *path;
529         struct extent_buffer *leaf;
530         int ret;
531
532         path = btrfs_alloc_path();
533         BUG_ON(!path);
534         ret = btrfs_lookup_inode(trans, root, path,
535                                  &BTRFS_I(inode)->location, 1);
536         if (ret) {
537                 if (ret > 0)
538                         ret = -ENOENT;
539                 goto failed;
540         }
541
542         leaf = path->nodes[0];
543         inode_item = btrfs_item_ptr(leaf, path->slots[0],
544                                   struct btrfs_inode_item);
545
546         fill_inode_item(leaf, inode_item, inode);
547         btrfs_mark_buffer_dirty(leaf);
548         btrfs_set_inode_last_trans(trans, inode);
549         ret = 0;
550 failed:
551         btrfs_release_path(root, path);
552         btrfs_free_path(path);
553         return ret;
554 }
555
556
557 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
558                               struct btrfs_root *root,
559                               struct inode *dir,
560                               struct dentry *dentry)
561 {
562         struct btrfs_path *path;
563         const char *name = dentry->d_name.name;
564         int name_len = dentry->d_name.len;
565         int ret = 0;
566         struct extent_buffer *leaf;
567         struct btrfs_dir_item *di;
568         struct btrfs_key key;
569
570         path = btrfs_alloc_path();
571         if (!path) {
572                 ret = -ENOMEM;
573                 goto err;
574         }
575
576         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
577                                     name, name_len, -1);
578         if (IS_ERR(di)) {
579                 ret = PTR_ERR(di);
580                 goto err;
581         }
582         if (!di) {
583                 ret = -ENOENT;
584                 goto err;
585         }
586         leaf = path->nodes[0];
587         btrfs_dir_item_key_to_cpu(leaf, di, &key);
588         ret = btrfs_delete_one_dir_name(trans, root, path, di);
589         if (ret)
590                 goto err;
591         btrfs_release_path(root, path);
592
593         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
594                                          key.objectid, name, name_len, -1);
595         if (IS_ERR(di)) {
596                 ret = PTR_ERR(di);
597                 goto err;
598         }
599         if (!di) {
600                 ret = -ENOENT;
601                 goto err;
602         }
603         ret = btrfs_delete_one_dir_name(trans, root, path, di);
604
605         dentry->d_inode->i_ctime = dir->i_ctime;
606         ret = btrfs_del_inode_ref(trans, root, name, name_len,
607                                   dentry->d_inode->i_ino,
608                                   dentry->d_parent->d_inode->i_ino);
609         if (ret) {
610                 printk("failed to delete reference to %.*s, "
611                        "inode %lu parent %lu\n", name_len, name,
612                        dentry->d_inode->i_ino,
613                        dentry->d_parent->d_inode->i_ino);
614         }
615 err:
616         btrfs_free_path(path);
617         if (!ret) {
618                 dir->i_size -= name_len * 2;
619                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
620                 btrfs_update_inode(trans, root, dir);
621 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
622                 dentry->d_inode->i_nlink--;
623 #else
624                 drop_nlink(dentry->d_inode);
625 #endif
626                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
627                 dir->i_sb->s_dirt = 1;
628         }
629         return ret;
630 }
631
632 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
633 {
634         struct btrfs_root *root;
635         struct btrfs_trans_handle *trans;
636         struct inode *inode = dentry->d_inode;
637         int ret;
638         unsigned long nr = 0;
639
640         root = BTRFS_I(dir)->root;
641         mutex_lock(&root->fs_info->fs_mutex);
642
643         ret = btrfs_check_free_space(root, 1, 1);
644         if (ret)
645                 goto fail;
646
647         trans = btrfs_start_transaction(root, 1);
648
649         btrfs_set_trans_block_group(trans, dir);
650         ret = btrfs_unlink_trans(trans, root, dir, dentry);
651         nr = trans->blocks_used;
652
653         if (inode->i_nlink == 0) {
654                 int found;
655                 /* if the inode isn't linked anywhere,
656                  * we don't need to worry about
657                  * data=ordered
658                  */
659                 found = btrfs_del_ordered_inode(inode);
660                 if (found == 1) {
661                         atomic_dec(&inode->i_count);
662                 }
663         }
664
665         btrfs_end_transaction(trans, root);
666 fail:
667         mutex_unlock(&root->fs_info->fs_mutex);
668         btrfs_btree_balance_dirty(root, nr);
669         btrfs_throttle(root);
670         return ret;
671 }
672
673 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
674 {
675         struct inode *inode = dentry->d_inode;
676         int err = 0;
677         int ret;
678         struct btrfs_root *root = BTRFS_I(dir)->root;
679         struct btrfs_trans_handle *trans;
680         unsigned long nr = 0;
681
682         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
683                 return -ENOTEMPTY;
684
685         mutex_lock(&root->fs_info->fs_mutex);
686         ret = btrfs_check_free_space(root, 1, 1);
687         if (ret)
688                 goto fail;
689
690         trans = btrfs_start_transaction(root, 1);
691         btrfs_set_trans_block_group(trans, dir);
692
693         /* now the directory is empty */
694         err = btrfs_unlink_trans(trans, root, dir, dentry);
695         if (!err) {
696                 inode->i_size = 0;
697         }
698
699         nr = trans->blocks_used;
700         ret = btrfs_end_transaction(trans, root);
701 fail:
702         mutex_unlock(&root->fs_info->fs_mutex);
703         btrfs_btree_balance_dirty(root, nr);
704         btrfs_throttle(root);
705
706         if (ret && !err)
707                 err = ret;
708         return err;
709 }
710
711 /*
712  * this can truncate away extent items, csum items and directory items.
713  * It starts at a high offset and removes keys until it can't find
714  * any higher than i_size.
715  *
716  * csum items that cross the new i_size are truncated to the new size
717  * as well.
718  */
719 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
720                                    struct btrfs_root *root,
721                                    struct inode *inode,
722                                    u32 min_type)
723 {
724         int ret;
725         struct btrfs_path *path;
726         struct btrfs_key key;
727         struct btrfs_key found_key;
728         u32 found_type;
729         struct extent_buffer *leaf;
730         struct btrfs_file_extent_item *fi;
731         u64 extent_start = 0;
732         u64 extent_num_bytes = 0;
733         u64 item_end = 0;
734         u64 root_gen = 0;
735         u64 root_owner = 0;
736         int found_extent;
737         int del_item;
738         int pending_del_nr = 0;
739         int pending_del_slot = 0;
740         int extent_type = -1;
741
742         btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
743         path = btrfs_alloc_path();
744         path->reada = -1;
745         BUG_ON(!path);
746
747         /* FIXME, add redo link to tree so we don't leak on crash */
748         key.objectid = inode->i_ino;
749         key.offset = (u64)-1;
750         key.type = (u8)-1;
751
752         btrfs_init_path(path);
753 search_again:
754         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
755         if (ret < 0) {
756                 goto error;
757         }
758         if (ret > 0) {
759                 BUG_ON(path->slots[0] == 0);
760                 path->slots[0]--;
761         }
762
763         while(1) {
764                 fi = NULL;
765                 leaf = path->nodes[0];
766                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
767                 found_type = btrfs_key_type(&found_key);
768
769                 if (found_key.objectid != inode->i_ino)
770                         break;
771
772                 if (found_type < min_type)
773                         break;
774
775                 item_end = found_key.offset;
776                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
777                         fi = btrfs_item_ptr(leaf, path->slots[0],
778                                             struct btrfs_file_extent_item);
779                         extent_type = btrfs_file_extent_type(leaf, fi);
780                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
781                                 item_end +=
782                                     btrfs_file_extent_num_bytes(leaf, fi);
783                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
784                                 struct btrfs_item *item = btrfs_item_nr(leaf,
785                                                                 path->slots[0]);
786                                 item_end += btrfs_file_extent_inline_len(leaf,
787                                                                          item);
788                         }
789                         item_end--;
790                 }
791                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
792                         ret = btrfs_csum_truncate(trans, root, path,
793                                                   inode->i_size);
794                         BUG_ON(ret);
795                 }
796                 if (item_end < inode->i_size) {
797                         if (found_type == BTRFS_DIR_ITEM_KEY) {
798                                 found_type = BTRFS_INODE_ITEM_KEY;
799                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
800                                 found_type = BTRFS_CSUM_ITEM_KEY;
801                         } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
802                                 found_type = BTRFS_XATTR_ITEM_KEY;
803                         } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
804                                 found_type = BTRFS_INODE_REF_KEY;
805                         } else if (found_type) {
806                                 found_type--;
807                         } else {
808                                 break;
809                         }
810                         btrfs_set_key_type(&key, found_type);
811                         goto next;
812                 }
813                 if (found_key.offset >= inode->i_size)
814                         del_item = 1;
815                 else
816                         del_item = 0;
817                 found_extent = 0;
818
819                 /* FIXME, shrink the extent if the ref count is only 1 */
820                 if (found_type != BTRFS_EXTENT_DATA_KEY)
821                         goto delete;
822
823                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
824                         u64 num_dec;
825                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
826                         if (!del_item) {
827                                 u64 orig_num_bytes =
828                                         btrfs_file_extent_num_bytes(leaf, fi);
829                                 extent_num_bytes = inode->i_size -
830                                         found_key.offset + root->sectorsize - 1;
831                                 extent_num_bytes = extent_num_bytes &
832                                         ~((u64)root->sectorsize - 1);
833                                 btrfs_set_file_extent_num_bytes(leaf, fi,
834                                                          extent_num_bytes);
835                                 num_dec = (orig_num_bytes -
836                                            extent_num_bytes) >> 9;
837                                 if (extent_start != 0) {
838                                         inode->i_blocks -= num_dec;
839                                 }
840                                 btrfs_mark_buffer_dirty(leaf);
841                         } else {
842                                 extent_num_bytes =
843                                         btrfs_file_extent_disk_num_bytes(leaf,
844                                                                          fi);
845                                 /* FIXME blocksize != 4096 */
846                                 num_dec = btrfs_file_extent_num_bytes(leaf,
847                                                                        fi) >> 9;
848                                 if (extent_start != 0) {
849                                         found_extent = 1;
850                                         inode->i_blocks -= num_dec;
851                                 }
852                                 root_gen = btrfs_header_generation(leaf);
853                                 root_owner = btrfs_header_owner(leaf);
854                         }
855                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE &&
856                            !del_item) {
857                         u32 newsize = inode->i_size - found_key.offset;
858                         newsize = btrfs_file_extent_calc_inline_size(newsize);
859                         ret = btrfs_truncate_item(trans, root, path,
860                                                   newsize, 1);
861                         BUG_ON(ret);
862                 }
863 delete:
864                 if (del_item) {
865                         if (!pending_del_nr) {
866                                 /* no pending yet, add ourselves */
867                                 pending_del_slot = path->slots[0];
868                                 pending_del_nr = 1;
869                         } else if (pending_del_nr &&
870                                    path->slots[0] + 1 == pending_del_slot) {
871                                 /* hop on the pending chunk */
872                                 pending_del_nr++;
873                                 pending_del_slot = path->slots[0];
874                         } else {
875                                 printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
876                         }
877                 } else {
878                         break;
879                 }
880                 if (found_extent) {
881                         ret = btrfs_free_extent(trans, root, extent_start,
882                                                 extent_num_bytes,
883                                                 root_owner,
884                                                 root_gen, inode->i_ino,
885                                                 found_key.offset, 0);
886                         BUG_ON(ret);
887                 }
888 next:
889                 if (path->slots[0] == 0) {
890                         if (pending_del_nr)
891                                 goto del_pending;
892                         btrfs_release_path(root, path);
893                         goto search_again;
894                 }
895
896                 path->slots[0]--;
897                 if (pending_del_nr &&
898                     path->slots[0] + 1 != pending_del_slot) {
899                         struct btrfs_key debug;
900 del_pending:
901                         btrfs_item_key_to_cpu(path->nodes[0], &debug,
902                                               pending_del_slot);
903                         ret = btrfs_del_items(trans, root, path,
904                                               pending_del_slot,
905                                               pending_del_nr);
906                         BUG_ON(ret);
907                         pending_del_nr = 0;
908                         btrfs_release_path(root, path);
909                         goto search_again;
910                 }
911         }
912         ret = 0;
913 error:
914         if (pending_del_nr) {
915                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
916                                       pending_del_nr);
917         }
918         btrfs_release_path(root, path);
919         btrfs_free_path(path);
920         inode->i_sb->s_dirt = 1;
921         return ret;
922 }
923
924 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
925                               size_t zero_start)
926 {
927         char *kaddr;
928         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
929         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
930         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
931         int ret = 0;
932
933         WARN_ON(!PageLocked(page));
934         set_page_extent_mapped(page);
935
936         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
937         set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
938                             page_end, GFP_NOFS);
939
940         if (zero_start != PAGE_CACHE_SIZE) {
941                 kaddr = kmap(page);
942                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
943                 flush_dcache_page(page);
944                 kunmap(page);
945         }
946         set_page_dirty(page);
947         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
948
949         return ret;
950 }
951
952 /*
953  * taken from block_truncate_page, but does cow as it zeros out
954  * any bytes left in the last page in the file.
955  */
956 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
957 {
958         struct inode *inode = mapping->host;
959         struct btrfs_root *root = BTRFS_I(inode)->root;
960         u32 blocksize = root->sectorsize;
961         pgoff_t index = from >> PAGE_CACHE_SHIFT;
962         unsigned offset = from & (PAGE_CACHE_SIZE-1);
963         struct page *page;
964         int ret = 0;
965         u64 page_start;
966
967         if ((offset & (blocksize - 1)) == 0)
968                 goto out;
969
970         ret = -ENOMEM;
971         page = grab_cache_page(mapping, index);
972         if (!page)
973                 goto out;
974         if (!PageUptodate(page)) {
975                 ret = btrfs_readpage(NULL, page);
976                 lock_page(page);
977                 if (!PageUptodate(page)) {
978                         ret = -EIO;
979                         goto out;
980                 }
981         }
982         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
983
984         ret = btrfs_cow_one_page(inode, page, offset);
985
986         unlock_page(page);
987         page_cache_release(page);
988 out:
989         return ret;
990 }
991
992 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
993 {
994         struct inode *inode = dentry->d_inode;
995         int err;
996
997         err = inode_change_ok(inode, attr);
998         if (err)
999                 return err;
1000
1001         if (S_ISREG(inode->i_mode) &&
1002             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
1003                 struct btrfs_trans_handle *trans;
1004                 struct btrfs_root *root = BTRFS_I(inode)->root;
1005                 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1006
1007                 u64 mask = root->sectorsize - 1;
1008                 u64 hole_start = (inode->i_size + mask) & ~mask;
1009                 u64 block_end = (attr->ia_size + mask) & ~mask;
1010                 u64 hole_size;
1011                 u64 alloc_hint = 0;
1012
1013                 if (attr->ia_size <= hole_start)
1014                         goto out;
1015
1016                 mutex_lock(&root->fs_info->fs_mutex);
1017                 err = btrfs_check_free_space(root, 1, 0);
1018                 mutex_unlock(&root->fs_info->fs_mutex);
1019                 if (err)
1020                         goto fail;
1021
1022                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1023
1024                 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1025                 hole_size = block_end - hole_start;
1026
1027                 mutex_lock(&root->fs_info->fs_mutex);
1028                 trans = btrfs_start_transaction(root, 1);
1029                 btrfs_set_trans_block_group(trans, inode);
1030                 err = btrfs_drop_extents(trans, root, inode,
1031                                          hole_start, block_end, hole_start,
1032                                          &alloc_hint);
1033
1034                 if (alloc_hint != EXTENT_MAP_INLINE) {
1035                         err = btrfs_insert_file_extent(trans, root,
1036                                                        inode->i_ino,
1037                                                        hole_start, 0, 0,
1038                                                        hole_size);
1039                         btrfs_drop_extent_cache(inode, hole_start,
1040                                                 hole_size - 1);
1041                         btrfs_check_file(root, inode);
1042                 }
1043                 btrfs_end_transaction(trans, root);
1044                 mutex_unlock(&root->fs_info->fs_mutex);
1045                 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1046                 if (err)
1047                         return err;
1048         }
1049 out:
1050         err = inode_setattr(inode, attr);
1051 fail:
1052         return err;
1053 }
1054
1055 void btrfs_put_inode(struct inode *inode)
1056 {
1057         int ret;
1058
1059         if (!BTRFS_I(inode)->ordered_trans) {
1060                 return;
1061         }
1062
1063         if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
1064             mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1065                 return;
1066
1067         ret = btrfs_del_ordered_inode(inode);
1068         if (ret == 1) {
1069                 atomic_dec(&inode->i_count);
1070         }
1071 }
1072
1073 void btrfs_delete_inode(struct inode *inode)
1074 {
1075         struct btrfs_trans_handle *trans;
1076         struct btrfs_root *root = BTRFS_I(inode)->root;
1077         unsigned long nr;
1078         int ret;
1079
1080         truncate_inode_pages(&inode->i_data, 0);
1081         if (is_bad_inode(inode)) {
1082                 goto no_delete;
1083         }
1084
1085         inode->i_size = 0;
1086         mutex_lock(&root->fs_info->fs_mutex);
1087         trans = btrfs_start_transaction(root, 1);
1088
1089         btrfs_set_trans_block_group(trans, inode);
1090         ret = btrfs_truncate_in_trans(trans, root, inode, 0);
1091         if (ret)
1092                 goto no_delete_lock;
1093
1094         nr = trans->blocks_used;
1095         clear_inode(inode);
1096
1097         btrfs_end_transaction(trans, root);
1098         mutex_unlock(&root->fs_info->fs_mutex);
1099         btrfs_btree_balance_dirty(root, nr);
1100         btrfs_throttle(root);
1101         return;
1102
1103 no_delete_lock:
1104         nr = trans->blocks_used;
1105         btrfs_end_transaction(trans, root);
1106         mutex_unlock(&root->fs_info->fs_mutex);
1107         btrfs_btree_balance_dirty(root, nr);
1108         btrfs_throttle(root);
1109 no_delete:
1110         clear_inode(inode);
1111 }
1112
1113 /*
1114  * this returns the key found in the dir entry in the location pointer.
1115  * If no dir entries were found, location->objectid is 0.
1116  */
1117 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1118                                struct btrfs_key *location)
1119 {
1120         const char *name = dentry->d_name.name;
1121         int namelen = dentry->d_name.len;
1122         struct btrfs_dir_item *di;
1123         struct btrfs_path *path;
1124         struct btrfs_root *root = BTRFS_I(dir)->root;
1125         int ret = 0;
1126
1127         if (namelen == 1 && strcmp(name, ".") == 0) {
1128                 location->objectid = dir->i_ino;
1129                 location->type = BTRFS_INODE_ITEM_KEY;
1130                 location->offset = 0;
1131                 return 0;
1132         }
1133         path = btrfs_alloc_path();
1134         BUG_ON(!path);
1135
1136         if (namelen == 2 && strcmp(name, "..") == 0) {
1137                 struct btrfs_key key;
1138                 struct extent_buffer *leaf;
1139                 u32 nritems;
1140                 int slot;
1141
1142                 key.objectid = dir->i_ino;
1143                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1144                 key.offset = 0;
1145                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1146                 BUG_ON(ret == 0);
1147                 ret = 0;
1148
1149                 leaf = path->nodes[0];
1150                 slot = path->slots[0];
1151                 nritems = btrfs_header_nritems(leaf);
1152                 if (slot >= nritems)
1153                         goto out_err;
1154
1155                 btrfs_item_key_to_cpu(leaf, &key, slot);
1156                 if (key.objectid != dir->i_ino ||
1157                     key.type != BTRFS_INODE_REF_KEY) {
1158                         goto out_err;
1159                 }
1160                 location->objectid = key.offset;
1161                 location->type = BTRFS_INODE_ITEM_KEY;
1162                 location->offset = 0;
1163                 goto out;
1164         }
1165
1166         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1167                                     namelen, 0);
1168         if (IS_ERR(di))
1169                 ret = PTR_ERR(di);
1170         if (!di || IS_ERR(di)) {
1171                 goto out_err;
1172         }
1173         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1174 out:
1175         btrfs_free_path(path);
1176         return ret;
1177 out_err:
1178         location->objectid = 0;
1179         goto out;
1180 }
1181
1182 /*
1183  * when we hit a tree root in a directory, the btrfs part of the inode
1184  * needs to be changed to reflect the root directory of the tree root.  This
1185  * is kind of like crossing a mount point.
1186  */
1187 static int fixup_tree_root_location(struct btrfs_root *root,
1188                              struct btrfs_key *location,
1189                              struct btrfs_root **sub_root,
1190                              struct dentry *dentry)
1191 {
1192         struct btrfs_path *path;
1193         struct btrfs_root_item *ri;
1194
1195         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1196                 return 0;
1197         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1198                 return 0;
1199
1200         path = btrfs_alloc_path();
1201         BUG_ON(!path);
1202         mutex_lock(&root->fs_info->fs_mutex);
1203
1204         *sub_root = btrfs_read_fs_root(root->fs_info, location,
1205                                         dentry->d_name.name,
1206                                         dentry->d_name.len);
1207         if (IS_ERR(*sub_root))
1208                 return PTR_ERR(*sub_root);
1209
1210         ri = &(*sub_root)->root_item;
1211         location->objectid = btrfs_root_dirid(ri);
1212         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1213         location->offset = 0;
1214
1215         btrfs_free_path(path);
1216         mutex_unlock(&root->fs_info->fs_mutex);
1217         return 0;
1218 }
1219
1220 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1221 {
1222         struct btrfs_iget_args *args = p;
1223         inode->i_ino = args->ino;
1224         BTRFS_I(inode)->root = args->root;
1225         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1226         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1227                              inode->i_mapping, GFP_NOFS);
1228         return 0;
1229 }
1230
1231 static int btrfs_find_actor(struct inode *inode, void *opaque)
1232 {
1233         struct btrfs_iget_args *args = opaque;
1234         return (args->ino == inode->i_ino &&
1235                 args->root == BTRFS_I(inode)->root);
1236 }
1237
1238 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1239                             u64 root_objectid)
1240 {
1241         struct btrfs_iget_args args;
1242         args.ino = objectid;
1243         args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1244
1245         if (!args.root)
1246                 return NULL;
1247
1248         return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1249 }
1250
1251 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1252                                 struct btrfs_root *root)
1253 {
1254         struct inode *inode;
1255         struct btrfs_iget_args args;
1256         args.ino = objectid;
1257         args.root = root;
1258
1259         inode = iget5_locked(s, objectid, btrfs_find_actor,
1260                              btrfs_init_locked_inode,
1261                              (void *)&args);
1262         return inode;
1263 }
1264
1265 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1266                                    struct nameidata *nd)
1267 {
1268         struct inode * inode;
1269         struct btrfs_inode *bi = BTRFS_I(dir);
1270         struct btrfs_root *root = bi->root;
1271         struct btrfs_root *sub_root = root;
1272         struct btrfs_key location;
1273         int ret;
1274
1275         if (dentry->d_name.len > BTRFS_NAME_LEN)
1276                 return ERR_PTR(-ENAMETOOLONG);
1277
1278         mutex_lock(&root->fs_info->fs_mutex);
1279         ret = btrfs_inode_by_name(dir, dentry, &location);
1280         mutex_unlock(&root->fs_info->fs_mutex);
1281
1282         if (ret < 0)
1283                 return ERR_PTR(ret);
1284
1285         inode = NULL;
1286         if (location.objectid) {
1287                 ret = fixup_tree_root_location(root, &location, &sub_root,
1288                                                 dentry);
1289                 if (ret < 0)
1290                         return ERR_PTR(ret);
1291                 if (ret > 0)
1292                         return ERR_PTR(-ENOENT);
1293                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1294                                           sub_root);
1295                 if (!inode)
1296                         return ERR_PTR(-EACCES);
1297                 if (inode->i_state & I_NEW) {
1298                         /* the inode and parent dir are two different roots */
1299                         if (sub_root != root) {
1300                                 igrab(inode);
1301                                 sub_root->inode = inode;
1302                         }
1303                         BTRFS_I(inode)->root = sub_root;
1304                         memcpy(&BTRFS_I(inode)->location, &location,
1305                                sizeof(location));
1306                         btrfs_read_locked_inode(inode);
1307                         unlock_new_inode(inode);
1308                 }
1309         }
1310         return d_splice_alias(inode, dentry);
1311 }
1312
1313 static unsigned char btrfs_filetype_table[] = {
1314         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1315 };
1316
1317 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1318 {
1319         struct inode *inode = filp->f_dentry->d_inode;
1320         struct btrfs_root *root = BTRFS_I(inode)->root;
1321         struct btrfs_item *item;
1322         struct btrfs_dir_item *di;
1323         struct btrfs_key key;
1324         struct btrfs_key found_key;
1325         struct btrfs_path *path;
1326         int ret;
1327         u32 nritems;
1328         struct extent_buffer *leaf;
1329         int slot;
1330         int advance;
1331         unsigned char d_type;
1332         int over = 0;
1333         u32 di_cur;
1334         u32 di_total;
1335         u32 di_len;
1336         int key_type = BTRFS_DIR_INDEX_KEY;
1337         char tmp_name[32];
1338         char *name_ptr;
1339         int name_len;
1340
1341         /* FIXME, use a real flag for deciding about the key type */
1342         if (root->fs_info->tree_root == root)
1343                 key_type = BTRFS_DIR_ITEM_KEY;
1344
1345         /* special case for "." */
1346         if (filp->f_pos == 0) {
1347                 over = filldir(dirent, ".", 1,
1348                                1, inode->i_ino,
1349                                DT_DIR);
1350                 if (over)
1351                         return 0;
1352                 filp->f_pos = 1;
1353         }
1354
1355         mutex_lock(&root->fs_info->fs_mutex);
1356         key.objectid = inode->i_ino;
1357         path = btrfs_alloc_path();
1358         path->reada = 2;
1359
1360         /* special case for .., just use the back ref */
1361         if (filp->f_pos == 1) {
1362                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1363                 key.offset = 0;
1364                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1365                 BUG_ON(ret == 0);
1366                 leaf = path->nodes[0];
1367                 slot = path->slots[0];
1368                 nritems = btrfs_header_nritems(leaf);
1369                 if (slot >= nritems) {
1370                         btrfs_release_path(root, path);
1371                         goto read_dir_items;
1372                 }
1373                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1374                 btrfs_release_path(root, path);
1375                 if (found_key.objectid != key.objectid ||
1376                     found_key.type != BTRFS_INODE_REF_KEY)
1377                         goto read_dir_items;
1378                 over = filldir(dirent, "..", 2,
1379                                2, found_key.offset, DT_DIR);
1380                 if (over)
1381                         goto nopos;
1382                 filp->f_pos = 2;
1383         }
1384
1385 read_dir_items:
1386         btrfs_set_key_type(&key, key_type);
1387         key.offset = filp->f_pos;
1388
1389         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1390         if (ret < 0)
1391                 goto err;
1392         advance = 0;
1393         while(1) {
1394                 leaf = path->nodes[0];
1395                 nritems = btrfs_header_nritems(leaf);
1396                 slot = path->slots[0];
1397                 if (advance || slot >= nritems) {
1398                         if (slot >= nritems -1) {
1399                                 ret = btrfs_next_leaf(root, path);
1400                                 if (ret)
1401                                         break;
1402                                 leaf = path->nodes[0];
1403                                 nritems = btrfs_header_nritems(leaf);
1404                                 slot = path->slots[0];
1405                         } else {
1406                                 slot++;
1407                                 path->slots[0]++;
1408                         }
1409                 }
1410                 advance = 1;
1411                 item = btrfs_item_nr(leaf, slot);
1412                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1413
1414                 if (found_key.objectid != key.objectid)
1415                         break;
1416                 if (btrfs_key_type(&found_key) != key_type)
1417                         break;
1418                 if (found_key.offset < filp->f_pos)
1419                         continue;
1420
1421                 filp->f_pos = found_key.offset;
1422                 advance = 1;
1423                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1424                 di_cur = 0;
1425                 di_total = btrfs_item_size(leaf, item);
1426                 while(di_cur < di_total) {
1427                         struct btrfs_key location;
1428
1429                         name_len = btrfs_dir_name_len(leaf, di);
1430                         if (name_len < 32) {
1431                                 name_ptr = tmp_name;
1432                         } else {
1433                                 name_ptr = kmalloc(name_len, GFP_NOFS);
1434                                 BUG_ON(!name_ptr);
1435                         }
1436                         read_extent_buffer(leaf, name_ptr,
1437                                            (unsigned long)(di + 1), name_len);
1438
1439                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1440                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
1441                         over = filldir(dirent, name_ptr, name_len,
1442                                        found_key.offset,
1443                                        location.objectid,
1444                                        d_type);
1445
1446                         if (name_ptr != tmp_name)
1447                                 kfree(name_ptr);
1448
1449                         if (over)
1450                                 goto nopos;
1451                         di_len = btrfs_dir_name_len(leaf, di) +
1452                                 btrfs_dir_data_len(leaf, di) +sizeof(*di);
1453                         di_cur += di_len;
1454                         di = (struct btrfs_dir_item *)((char *)di + di_len);
1455                 }
1456         }
1457         filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
1458 nopos:
1459         ret = 0;
1460 err:
1461         btrfs_release_path(root, path);
1462         btrfs_free_path(path);
1463         mutex_unlock(&root->fs_info->fs_mutex);
1464         return ret;
1465 }
1466
1467 int btrfs_write_inode(struct inode *inode, int wait)
1468 {
1469         struct btrfs_root *root = BTRFS_I(inode)->root;
1470         struct btrfs_trans_handle *trans;
1471         int ret = 0;
1472
1473         if (wait) {
1474                 mutex_lock(&root->fs_info->fs_mutex);
1475                 trans = btrfs_start_transaction(root, 1);
1476                 btrfs_set_trans_block_group(trans, inode);
1477                 ret = btrfs_commit_transaction(trans, root);
1478                 mutex_unlock(&root->fs_info->fs_mutex);
1479         }
1480         return ret;
1481 }
1482
1483 /*
1484  * This is somewhat expensive, updating the tree every time the
1485  * inode changes.  But, it is most likely to find the inode in cache.
1486  * FIXME, needs more benchmarking...there are no reasons other than performance
1487  * to keep or drop this code.
1488  */
1489 void btrfs_dirty_inode(struct inode *inode)
1490 {
1491         struct btrfs_root *root = BTRFS_I(inode)->root;
1492         struct btrfs_trans_handle *trans;
1493
1494         mutex_lock(&root->fs_info->fs_mutex);
1495         trans = btrfs_start_transaction(root, 1);
1496         btrfs_set_trans_block_group(trans, inode);
1497         btrfs_update_inode(trans, root, inode);
1498         btrfs_end_transaction(trans, root);
1499         mutex_unlock(&root->fs_info->fs_mutex);
1500 }
1501
1502 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1503                                      struct btrfs_root *root,
1504                                      const char *name, int name_len,
1505                                      u64 ref_objectid,
1506                                      u64 objectid,
1507                                      struct btrfs_block_group_cache *group,
1508                                      int mode)
1509 {
1510         struct inode *inode;
1511         struct btrfs_inode_item *inode_item;
1512         struct btrfs_key *location;
1513         struct btrfs_path *path;
1514         struct btrfs_inode_ref *ref;
1515         struct btrfs_key key[2];
1516         u32 sizes[2];
1517         unsigned long ptr;
1518         int ret;
1519         int owner;
1520
1521         path = btrfs_alloc_path();
1522         BUG_ON(!path);
1523
1524         inode = new_inode(root->fs_info->sb);
1525         if (!inode)
1526                 return ERR_PTR(-ENOMEM);
1527
1528         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1529         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1530                              inode->i_mapping, GFP_NOFS);
1531         BTRFS_I(inode)->root = root;
1532
1533         if (mode & S_IFDIR)
1534                 owner = 0;
1535         else
1536                 owner = 1;
1537         group = btrfs_find_block_group(root, group, 0, 0, owner);
1538         BTRFS_I(inode)->block_group = group;
1539         BTRFS_I(inode)->flags = 0;
1540
1541         key[0].objectid = objectid;
1542         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
1543         key[0].offset = 0;
1544
1545         key[1].objectid = objectid;
1546         btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
1547         key[1].offset = ref_objectid;
1548
1549         sizes[0] = sizeof(struct btrfs_inode_item);
1550         sizes[1] = name_len + sizeof(*ref);
1551
1552         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
1553         if (ret != 0)
1554                 goto fail;
1555
1556         if (objectid > root->highest_inode)
1557                 root->highest_inode = objectid;
1558
1559         inode->i_uid = current->fsuid;
1560         inode->i_gid = current->fsgid;
1561         inode->i_mode = mode;
1562         inode->i_ino = objectid;
1563         inode->i_blocks = 0;
1564         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1565         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1566                                   struct btrfs_inode_item);
1567         fill_inode_item(path->nodes[0], inode_item, inode);
1568
1569         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1570                              struct btrfs_inode_ref);
1571         btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
1572         ptr = (unsigned long)(ref + 1);
1573         write_extent_buffer(path->nodes[0], name, ptr, name_len);
1574
1575         btrfs_mark_buffer_dirty(path->nodes[0]);
1576         btrfs_free_path(path);
1577
1578         location = &BTRFS_I(inode)->location;
1579         location->objectid = objectid;
1580         location->offset = 0;
1581         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1582
1583         insert_inode_hash(inode);
1584         return inode;
1585 fail:
1586         btrfs_free_path(path);
1587         return ERR_PTR(ret);
1588 }
1589
1590 static inline u8 btrfs_inode_type(struct inode *inode)
1591 {
1592         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1593 }
1594
1595 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1596                             struct dentry *dentry, struct inode *inode,
1597                             int add_backref)
1598 {
1599         int ret;
1600         struct btrfs_key key;
1601         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1602         struct inode *parent_inode;
1603
1604         key.objectid = inode->i_ino;
1605         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1606         key.offset = 0;
1607
1608         ret = btrfs_insert_dir_item(trans, root,
1609                                     dentry->d_name.name, dentry->d_name.len,
1610                                     dentry->d_parent->d_inode->i_ino,
1611                                     &key, btrfs_inode_type(inode));
1612         if (ret == 0) {
1613                 if (add_backref) {
1614                         ret = btrfs_insert_inode_ref(trans, root,
1615                                              dentry->d_name.name,
1616                                              dentry->d_name.len,
1617                                              inode->i_ino,
1618                                              dentry->d_parent->d_inode->i_ino);
1619                 }
1620                 parent_inode = dentry->d_parent->d_inode;
1621                 parent_inode->i_size += dentry->d_name.len * 2;
1622                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1623                 ret = btrfs_update_inode(trans, root,
1624                                          dentry->d_parent->d_inode);
1625         }
1626         return ret;
1627 }
1628
1629 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1630                             struct dentry *dentry, struct inode *inode,
1631                             int backref)
1632 {
1633         int err = btrfs_add_link(trans, dentry, inode, backref);
1634         if (!err) {
1635                 d_instantiate(dentry, inode);
1636                 return 0;
1637         }
1638         if (err > 0)
1639                 err = -EEXIST;
1640         return err;
1641 }
1642
1643 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1644                         int mode, dev_t rdev)
1645 {
1646         struct btrfs_trans_handle *trans;
1647         struct btrfs_root *root = BTRFS_I(dir)->root;
1648         struct inode *inode = NULL;
1649         int err;
1650         int drop_inode = 0;
1651         u64 objectid;
1652         unsigned long nr = 0;
1653
1654         if (!new_valid_dev(rdev))
1655                 return -EINVAL;
1656
1657         mutex_lock(&root->fs_info->fs_mutex);
1658         err = btrfs_check_free_space(root, 1, 0);
1659         if (err)
1660                 goto fail;
1661
1662         trans = btrfs_start_transaction(root, 1);
1663         btrfs_set_trans_block_group(trans, dir);
1664
1665         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1666         if (err) {
1667                 err = -ENOSPC;
1668                 goto out_unlock;
1669         }
1670
1671         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1672                                 dentry->d_name.len,
1673                                 dentry->d_parent->d_inode->i_ino, objectid,
1674                                 BTRFS_I(dir)->block_group, mode);
1675         err = PTR_ERR(inode);
1676         if (IS_ERR(inode))
1677                 goto out_unlock;
1678
1679         btrfs_set_trans_block_group(trans, inode);
1680         err = btrfs_add_nondir(trans, dentry, inode, 0);
1681         if (err)
1682                 drop_inode = 1;
1683         else {
1684                 inode->i_op = &btrfs_special_inode_operations;
1685                 init_special_inode(inode, inode->i_mode, rdev);
1686                 btrfs_update_inode(trans, root, inode);
1687         }
1688         dir->i_sb->s_dirt = 1;
1689         btrfs_update_inode_block_group(trans, inode);
1690         btrfs_update_inode_block_group(trans, dir);
1691 out_unlock:
1692         nr = trans->blocks_used;
1693         btrfs_end_transaction(trans, root);
1694 fail:
1695         mutex_unlock(&root->fs_info->fs_mutex);
1696
1697         if (drop_inode) {
1698                 inode_dec_link_count(inode);
1699                 iput(inode);
1700         }
1701         btrfs_btree_balance_dirty(root, nr);
1702         btrfs_throttle(root);
1703         return err;
1704 }
1705
1706 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1707                         int mode, struct nameidata *nd)
1708 {
1709         struct btrfs_trans_handle *trans;
1710         struct btrfs_root *root = BTRFS_I(dir)->root;
1711         struct inode *inode = NULL;
1712         int err;
1713         int drop_inode = 0;
1714         unsigned long nr = 0;
1715         u64 objectid;
1716
1717         mutex_lock(&root->fs_info->fs_mutex);
1718         err = btrfs_check_free_space(root, 1, 0);
1719         if (err)
1720                 goto fail;
1721         trans = btrfs_start_transaction(root, 1);
1722         btrfs_set_trans_block_group(trans, dir);
1723
1724         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1725         if (err) {
1726                 err = -ENOSPC;
1727                 goto out_unlock;
1728         }
1729
1730         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1731                                 dentry->d_name.len,
1732                                 dentry->d_parent->d_inode->i_ino,
1733                                 objectid, BTRFS_I(dir)->block_group, mode);
1734         err = PTR_ERR(inode);
1735         if (IS_ERR(inode))
1736                 goto out_unlock;
1737
1738         btrfs_set_trans_block_group(trans, inode);
1739         err = btrfs_add_nondir(trans, dentry, inode, 0);
1740         if (err)
1741                 drop_inode = 1;
1742         else {
1743                 inode->i_mapping->a_ops = &btrfs_aops;
1744                 inode->i_fop = &btrfs_file_operations;
1745                 inode->i_op = &btrfs_file_inode_operations;
1746                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1747                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1748                                      inode->i_mapping, GFP_NOFS);
1749                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1750         }
1751         dir->i_sb->s_dirt = 1;
1752         btrfs_update_inode_block_group(trans, inode);
1753         btrfs_update_inode_block_group(trans, dir);
1754 out_unlock:
1755         nr = trans->blocks_used;
1756         btrfs_end_transaction(trans, root);
1757 fail:
1758         mutex_unlock(&root->fs_info->fs_mutex);
1759
1760         if (drop_inode) {
1761                 inode_dec_link_count(inode);
1762                 iput(inode);
1763         }
1764         btrfs_btree_balance_dirty(root, nr);
1765         btrfs_throttle(root);
1766         return err;
1767 }
1768
1769 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1770                       struct dentry *dentry)
1771 {
1772         struct btrfs_trans_handle *trans;
1773         struct btrfs_root *root = BTRFS_I(dir)->root;
1774         struct inode *inode = old_dentry->d_inode;
1775         unsigned long nr = 0;
1776         int err;
1777         int drop_inode = 0;
1778
1779         if (inode->i_nlink == 0)
1780                 return -ENOENT;
1781
1782 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1783         inode->i_nlink++;
1784 #else
1785         inc_nlink(inode);
1786 #endif
1787         mutex_lock(&root->fs_info->fs_mutex);
1788         err = btrfs_check_free_space(root, 1, 0);
1789         if (err)
1790                 goto fail;
1791         trans = btrfs_start_transaction(root, 1);
1792
1793         btrfs_set_trans_block_group(trans, dir);
1794         atomic_inc(&inode->i_count);
1795         err = btrfs_add_nondir(trans, dentry, inode, 1);
1796
1797         if (err)
1798                 drop_inode = 1;
1799
1800         dir->i_sb->s_dirt = 1;
1801         btrfs_update_inode_block_group(trans, dir);
1802         err = btrfs_update_inode(trans, root, inode);
1803
1804         if (err)
1805                 drop_inode = 1;
1806
1807         nr = trans->blocks_used;
1808         btrfs_end_transaction(trans, root);
1809 fail:
1810         mutex_unlock(&root->fs_info->fs_mutex);
1811
1812         if (drop_inode) {
1813                 inode_dec_link_count(inode);
1814                 iput(inode);
1815         }
1816         btrfs_btree_balance_dirty(root, nr);
1817         btrfs_throttle(root);
1818         return err;
1819 }
1820
1821 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1822 {
1823         struct inode *inode;
1824         struct btrfs_trans_handle *trans;
1825         struct btrfs_root *root = BTRFS_I(dir)->root;
1826         int err = 0;
1827         int drop_on_err = 0;
1828         u64 objectid;
1829         unsigned long nr = 1;
1830
1831         mutex_lock(&root->fs_info->fs_mutex);
1832         err = btrfs_check_free_space(root, 1, 0);
1833         if (err)
1834                 goto out_unlock;
1835
1836         trans = btrfs_start_transaction(root, 1);
1837         btrfs_set_trans_block_group(trans, dir);
1838
1839         if (IS_ERR(trans)) {
1840                 err = PTR_ERR(trans);
1841                 goto out_unlock;
1842         }
1843
1844         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1845         if (err) {
1846                 err = -ENOSPC;
1847                 goto out_unlock;
1848         }
1849
1850         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1851                                 dentry->d_name.len,
1852                                 dentry->d_parent->d_inode->i_ino, objectid,
1853                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1854         if (IS_ERR(inode)) {
1855                 err = PTR_ERR(inode);
1856                 goto out_fail;
1857         }
1858
1859         drop_on_err = 1;
1860         inode->i_op = &btrfs_dir_inode_operations;
1861         inode->i_fop = &btrfs_dir_file_operations;
1862         btrfs_set_trans_block_group(trans, inode);
1863
1864         inode->i_size = 0;
1865         err = btrfs_update_inode(trans, root, inode);
1866         if (err)
1867                 goto out_fail;
1868
1869         err = btrfs_add_link(trans, dentry, inode, 0);
1870         if (err)
1871                 goto out_fail;
1872
1873         d_instantiate(dentry, inode);
1874         drop_on_err = 0;
1875         dir->i_sb->s_dirt = 1;
1876         btrfs_update_inode_block_group(trans, inode);
1877         btrfs_update_inode_block_group(trans, dir);
1878
1879 out_fail:
1880         nr = trans->blocks_used;
1881         btrfs_end_transaction(trans, root);
1882
1883 out_unlock:
1884         mutex_unlock(&root->fs_info->fs_mutex);
1885         if (drop_on_err)
1886                 iput(inode);
1887         btrfs_btree_balance_dirty(root, nr);
1888         btrfs_throttle(root);
1889         return err;
1890 }
1891
1892 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
1893                                     size_t pg_offset, u64 start, u64 len,
1894                                     int create)
1895 {
1896         int ret;
1897         int err = 0;
1898         u64 bytenr;
1899         u64 extent_start = 0;
1900         u64 extent_end = 0;
1901         u64 objectid = inode->i_ino;
1902         u32 found_type;
1903         struct btrfs_path *path;
1904         struct btrfs_root *root = BTRFS_I(inode)->root;
1905         struct btrfs_file_extent_item *item;
1906         struct extent_buffer *leaf;
1907         struct btrfs_key found_key;
1908         struct extent_map *em = NULL;
1909         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1910         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1911         struct btrfs_trans_handle *trans = NULL;
1912
1913         path = btrfs_alloc_path();
1914         BUG_ON(!path);
1915         mutex_lock(&root->fs_info->fs_mutex);
1916
1917 again:
1918         spin_lock(&em_tree->lock);
1919         em = lookup_extent_mapping(em_tree, start, len);
1920         spin_unlock(&em_tree->lock);
1921
1922         if (em) {
1923                 if (em->start > start) {
1924                         printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n",
1925                                start, len, em->start, em->len);
1926                         WARN_ON(1);
1927                 }
1928                 if (em->block_start == EXTENT_MAP_INLINE && page)
1929                         free_extent_map(em);
1930                 else
1931                         goto out;
1932         }
1933         em = alloc_extent_map(GFP_NOFS);
1934         if (!em) {
1935                 err = -ENOMEM;
1936                 goto out;
1937         }
1938
1939         em->start = EXTENT_MAP_HOLE;
1940         em->len = (u64)-1;
1941         em->bdev = inode->i_sb->s_bdev;
1942         ret = btrfs_lookup_file_extent(trans, root, path,
1943                                        objectid, start, trans != NULL);
1944         if (ret < 0) {
1945                 err = ret;
1946                 goto out;
1947         }
1948
1949         if (ret != 0) {
1950                 if (path->slots[0] == 0)
1951                         goto not_found;
1952                 path->slots[0]--;
1953         }
1954
1955         leaf = path->nodes[0];
1956         item = btrfs_item_ptr(leaf, path->slots[0],
1957                               struct btrfs_file_extent_item);
1958         /* are we inside the extent that was found? */
1959         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1960         found_type = btrfs_key_type(&found_key);
1961         if (found_key.objectid != objectid ||
1962             found_type != BTRFS_EXTENT_DATA_KEY) {
1963                 goto not_found;
1964         }
1965
1966         found_type = btrfs_file_extent_type(leaf, item);
1967         extent_start = found_key.offset;
1968         if (found_type == BTRFS_FILE_EXTENT_REG) {
1969                 extent_end = extent_start +
1970                        btrfs_file_extent_num_bytes(leaf, item);
1971                 err = 0;
1972                 if (start < extent_start || start >= extent_end) {
1973                         em->start = start;
1974                         if (start < extent_start) {
1975                                 if (start + len <= extent_start)
1976                                         goto not_found;
1977                                 em->len = extent_end - extent_start;
1978                         } else {
1979                                 em->len = len;
1980                         }
1981                         goto not_found_em;
1982                 }
1983                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
1984                 if (bytenr == 0) {
1985                         em->start = extent_start;
1986                         em->len = extent_end - extent_start;
1987                         em->block_start = EXTENT_MAP_HOLE;
1988                         goto insert;
1989                 }
1990                 bytenr += btrfs_file_extent_offset(leaf, item);
1991                 em->block_start = bytenr;
1992                 em->start = extent_start;
1993                 em->len = extent_end - extent_start;
1994                 goto insert;
1995         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1996                 u64 page_start;
1997                 unsigned long ptr;
1998                 char *map;
1999                 size_t size;
2000                 size_t extent_offset;
2001                 size_t copy_size;
2002
2003                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
2004                                                     path->slots[0]));
2005                 extent_end = (extent_start + size + root->sectorsize - 1) &
2006                         ~((u64)root->sectorsize - 1);
2007                 if (start < extent_start || start >= extent_end) {
2008                         em->start = start;
2009                         if (start < extent_start) {
2010                                 if (start + len <= extent_start)
2011                                         goto not_found;
2012                                 em->len = extent_end - extent_start;
2013                         } else {
2014                                 em->len = len;
2015                         }
2016                         goto not_found_em;
2017                 }
2018                 em->block_start = EXTENT_MAP_INLINE;
2019
2020                 if (!page) {
2021                         em->start = extent_start;
2022                         em->len = size;
2023                         goto out;
2024                 }
2025
2026                 page_start = page_offset(page) + pg_offset;
2027                 extent_offset = page_start - extent_start;
2028                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
2029                                 size - extent_offset);
2030                 em->start = extent_start + extent_offset;
2031                 em->len = (copy_size + root->sectorsize - 1) &
2032                         ~((u64)root->sectorsize - 1);
2033                 map = kmap(page);
2034                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
2035                 if (create == 0 && !PageUptodate(page)) {
2036                         read_extent_buffer(leaf, map + pg_offset, ptr,
2037                                            copy_size);
2038                         flush_dcache_page(page);
2039                 } else if (create && PageUptodate(page)) {
2040                         if (!trans) {
2041                                 kunmap(page);
2042                                 free_extent_map(em);
2043                                 em = NULL;
2044                                 btrfs_release_path(root, path);
2045                                 trans = btrfs_start_transaction(root, 1);
2046                                 goto again;
2047                         }
2048                         write_extent_buffer(leaf, map + pg_offset, ptr,
2049                                             copy_size);
2050                         btrfs_mark_buffer_dirty(leaf);
2051                 }
2052                 kunmap(page);
2053                 set_extent_uptodate(io_tree, em->start,
2054                                     extent_map_end(em) - 1, GFP_NOFS);
2055                 goto insert;
2056         } else {
2057                 printk("unkknown found_type %d\n", found_type);
2058                 WARN_ON(1);
2059         }
2060 not_found:
2061         em->start = start;
2062         em->len = len;
2063 not_found_em:
2064         em->block_start = EXTENT_MAP_HOLE;
2065 insert:
2066         btrfs_release_path(root, path);
2067         if (em->start > start || extent_map_end(em) <= start) {
2068                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
2069                 err = -EIO;
2070                 goto out;
2071         }
2072
2073         err = 0;
2074         spin_lock(&em_tree->lock);
2075         ret = add_extent_mapping(em_tree, em);
2076         if (ret == -EEXIST) {
2077                 free_extent_map(em);
2078                 em = lookup_extent_mapping(em_tree, start, len);
2079                 if (!em) {
2080                         err = -EIO;
2081                         printk("failing to insert %Lu %Lu\n", start, len);
2082                 }
2083         }
2084         spin_unlock(&em_tree->lock);
2085 out:
2086         btrfs_free_path(path);
2087         if (trans) {
2088                 ret = btrfs_end_transaction(trans, root);
2089                 if (!err)
2090                         err = ret;
2091         }
2092         mutex_unlock(&root->fs_info->fs_mutex);
2093         if (err) {
2094                 free_extent_map(em);
2095                 WARN_ON(1);
2096                 return ERR_PTR(err);
2097         }
2098         return em;
2099 }
2100
2101 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
2102 {
2103         return extent_bmap(mapping, iblock, btrfs_get_extent);
2104 }
2105
2106 int btrfs_readpage(struct file *file, struct page *page)
2107 {
2108         struct extent_io_tree *tree;
2109         tree = &BTRFS_I(page->mapping->host)->io_tree;
2110         return extent_read_full_page(tree, page, btrfs_get_extent);
2111 }
2112
2113 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
2114 {
2115         struct extent_io_tree *tree;
2116
2117
2118         if (current->flags & PF_MEMALLOC) {
2119                 redirty_page_for_writepage(wbc, page);
2120                 unlock_page(page);
2121                 return 0;
2122         }
2123         tree = &BTRFS_I(page->mapping->host)->io_tree;
2124         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2125 }
2126
2127 static int btrfs_writepages(struct address_space *mapping,
2128                             struct writeback_control *wbc)
2129 {
2130         struct extent_io_tree *tree;
2131         tree = &BTRFS_I(mapping->host)->io_tree;
2132         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2133 }
2134
2135 static int
2136 btrfs_readpages(struct file *file, struct address_space *mapping,
2137                 struct list_head *pages, unsigned nr_pages)
2138 {
2139         struct extent_io_tree *tree;
2140         tree = &BTRFS_I(mapping->host)->io_tree;
2141         return extent_readpages(tree, mapping, pages, nr_pages,
2142                                 btrfs_get_extent);
2143 }
2144
2145 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2146 {
2147         struct extent_io_tree *tree;
2148         struct extent_map_tree *map;
2149         int ret;
2150
2151         tree = &BTRFS_I(page->mapping->host)->io_tree;
2152         map = &BTRFS_I(page->mapping->host)->extent_tree;
2153         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
2154         if (ret == 1) {
2155                 ClearPagePrivate(page);
2156                 set_page_private(page, 0);
2157                 page_cache_release(page);
2158         }
2159         return ret;
2160 }
2161
2162 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2163 {
2164         struct extent_io_tree *tree;
2165
2166         tree = &BTRFS_I(page->mapping->host)->io_tree;
2167         extent_invalidatepage(tree, page, offset);
2168         btrfs_releasepage(page, GFP_NOFS);
2169 }
2170
2171 /*
2172  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2173  * called from a page fault handler when a page is first dirtied. Hence we must
2174  * be careful to check for EOF conditions here. We set the page up correctly
2175  * for a written page which means we get ENOSPC checking when writing into
2176  * holes and correct delalloc and unwritten extent mapping on filesystems that
2177  * support these features.
2178  *
2179  * We are not allowed to take the i_mutex here so we have to play games to
2180  * protect against truncate races as the page could now be beyond EOF.  Because
2181  * vmtruncate() writes the inode size before removing pages, once we have the
2182  * page lock we can determine safely if the page is beyond EOF. If it is not
2183  * beyond EOF, then the page is guaranteed safe against truncation until we
2184  * unlock the page.
2185  */
2186 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2187 {
2188         struct inode *inode = fdentry(vma->vm_file)->d_inode;
2189         struct btrfs_root *root = BTRFS_I(inode)->root;
2190         unsigned long end;
2191         loff_t size;
2192         int ret;
2193         u64 page_start;
2194
2195         mutex_lock(&root->fs_info->fs_mutex);
2196         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2197         mutex_unlock(&root->fs_info->fs_mutex);
2198         if (ret)
2199                 goto out;
2200
2201         ret = -EINVAL;
2202
2203         lock_page(page);
2204         wait_on_page_writeback(page);
2205         size = i_size_read(inode);
2206         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2207
2208         if ((page->mapping != inode->i_mapping) ||
2209             (page_start > size)) {
2210                 /* page got truncated out from underneath us */
2211                 goto out_unlock;
2212         }
2213
2214         /* page is wholly or partially inside EOF */
2215         if (page_start + PAGE_CACHE_SIZE > size)
2216                 end = size & ~PAGE_CACHE_MASK;
2217         else
2218                 end = PAGE_CACHE_SIZE;
2219
2220         ret = btrfs_cow_one_page(inode, page, end);
2221
2222 out_unlock:
2223         unlock_page(page);
2224 out:
2225         return ret;
2226 }
2227
2228 static void btrfs_truncate(struct inode *inode)
2229 {
2230         struct btrfs_root *root = BTRFS_I(inode)->root;
2231         int ret;
2232         struct btrfs_trans_handle *trans;
2233         unsigned long nr;
2234
2235         if (!S_ISREG(inode->i_mode))
2236                 return;
2237         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2238                 return;
2239
2240         btrfs_truncate_page(inode->i_mapping, inode->i_size);
2241
2242         mutex_lock(&root->fs_info->fs_mutex);
2243         trans = btrfs_start_transaction(root, 1);
2244         btrfs_set_trans_block_group(trans, inode);
2245
2246         /* FIXME, add redo link to tree so we don't leak on crash */
2247         ret = btrfs_truncate_in_trans(trans, root, inode,
2248                                       BTRFS_EXTENT_DATA_KEY);
2249         btrfs_update_inode(trans, root, inode);
2250         nr = trans->blocks_used;
2251
2252         ret = btrfs_end_transaction(trans, root);
2253         BUG_ON(ret);
2254         mutex_unlock(&root->fs_info->fs_mutex);
2255         btrfs_btree_balance_dirty(root, nr);
2256         btrfs_throttle(root);
2257 }
2258
2259 static int noinline create_subvol(struct btrfs_root *root, char *name,
2260                                   int namelen)
2261 {
2262         struct btrfs_trans_handle *trans;
2263         struct btrfs_key key;
2264         struct btrfs_root_item root_item;
2265         struct btrfs_inode_item *inode_item;
2266         struct extent_buffer *leaf;
2267         struct btrfs_root *new_root = root;
2268         struct inode *inode;
2269         struct inode *dir;
2270         int ret;
2271         int err;
2272         u64 objectid;
2273         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2274         unsigned long nr = 1;
2275
2276         mutex_lock(&root->fs_info->fs_mutex);
2277         ret = btrfs_check_free_space(root, 1, 0);
2278         if (ret)
2279                 goto fail_commit;
2280
2281         trans = btrfs_start_transaction(root, 1);
2282         BUG_ON(!trans);
2283
2284         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2285                                        0, &objectid);
2286         if (ret)
2287                 goto fail;
2288
2289         leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2290                                         objectid, trans->transid, 0, 0,
2291                                         0, 0);
2292         if (IS_ERR(leaf))
2293                 return PTR_ERR(leaf);
2294
2295         btrfs_set_header_nritems(leaf, 0);
2296         btrfs_set_header_level(leaf, 0);
2297         btrfs_set_header_bytenr(leaf, leaf->start);
2298         btrfs_set_header_generation(leaf, trans->transid);
2299         btrfs_set_header_owner(leaf, objectid);
2300
2301         write_extent_buffer(leaf, root->fs_info->fsid,
2302                             (unsigned long)btrfs_header_fsid(leaf),
2303                             BTRFS_FSID_SIZE);
2304         btrfs_mark_buffer_dirty(leaf);
2305
2306         inode_item = &root_item.inode;
2307         memset(inode_item, 0, sizeof(*inode_item));
2308         inode_item->generation = cpu_to_le64(1);
2309         inode_item->size = cpu_to_le64(3);
2310         inode_item->nlink = cpu_to_le32(1);
2311         inode_item->nblocks = cpu_to_le64(1);
2312         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2313
2314         btrfs_set_root_bytenr(&root_item, leaf->start);
2315         btrfs_set_root_level(&root_item, 0);
2316         btrfs_set_root_refs(&root_item, 1);
2317         btrfs_set_root_used(&root_item, 0);
2318
2319         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2320         root_item.drop_level = 0;
2321
2322         free_extent_buffer(leaf);
2323         leaf = NULL;
2324
2325         btrfs_set_root_dirid(&root_item, new_dirid);
2326
2327         key.objectid = objectid;
2328         key.offset = 1;
2329         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2330         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2331                                 &root_item);
2332         if (ret)
2333                 goto fail;
2334
2335         /*
2336          * insert the directory item
2337          */
2338         key.offset = (u64)-1;
2339         dir = root->fs_info->sb->s_root->d_inode;
2340         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2341                                     name, namelen, dir->i_ino, &key,
2342                                     BTRFS_FT_DIR);
2343         if (ret)
2344                 goto fail;
2345
2346         ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2347                              name, namelen, objectid,
2348                              root->fs_info->sb->s_root->d_inode->i_ino);
2349         if (ret)
2350                 goto fail;
2351
2352         ret = btrfs_commit_transaction(trans, root);
2353         if (ret)
2354                 goto fail_commit;
2355
2356         new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2357         BUG_ON(!new_root);
2358
2359         trans = btrfs_start_transaction(new_root, 1);
2360         BUG_ON(!trans);
2361
2362         inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid,
2363                                 new_dirid,
2364                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2365         if (IS_ERR(inode))
2366                 goto fail;
2367         inode->i_op = &btrfs_dir_inode_operations;
2368         inode->i_fop = &btrfs_dir_file_operations;
2369         new_root->inode = inode;
2370
2371         ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2372                                      new_dirid);
2373         inode->i_nlink = 1;
2374         inode->i_size = 0;
2375         ret = btrfs_update_inode(trans, new_root, inode);
2376         if (ret)
2377                 goto fail;
2378 fail:
2379         nr = trans->blocks_used;
2380         err = btrfs_commit_transaction(trans, new_root);
2381         if (err && !ret)
2382                 ret = err;
2383 fail_commit:
2384         mutex_unlock(&root->fs_info->fs_mutex);
2385         btrfs_btree_balance_dirty(root, nr);
2386         btrfs_throttle(root);
2387         return ret;
2388 }
2389
2390 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2391 {
2392         struct btrfs_pending_snapshot *pending_snapshot;
2393         struct btrfs_trans_handle *trans;
2394         int ret;
2395         int err;
2396         unsigned long nr = 0;
2397
2398         if (!root->ref_cows)
2399                 return -EINVAL;
2400
2401         mutex_lock(&root->fs_info->fs_mutex);
2402         ret = btrfs_check_free_space(root, 1, 0);
2403         if (ret)
2404                 goto fail_unlock;
2405
2406         pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2407         if (!pending_snapshot) {
2408                 ret = -ENOMEM;
2409                 goto fail_unlock;
2410         }
2411         pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
2412         if (!pending_snapshot->name) {
2413                 ret = -ENOMEM;
2414                 kfree(pending_snapshot);
2415                 goto fail_unlock;
2416         }
2417         memcpy(pending_snapshot->name, name, namelen);
2418         pending_snapshot->name[namelen] = '\0';
2419         trans = btrfs_start_transaction(root, 1);
2420         BUG_ON(!trans);
2421         pending_snapshot->root = root;
2422         list_add(&pending_snapshot->list,
2423                  &trans->transaction->pending_snapshots);
2424         ret = btrfs_update_inode(trans, root, root->inode);
2425         err = btrfs_commit_transaction(trans, root);
2426
2427 fail_unlock:
2428         mutex_unlock(&root->fs_info->fs_mutex);
2429         btrfs_btree_balance_dirty(root, nr);
2430         btrfs_throttle(root);
2431         return ret;
2432 }
2433
2434 unsigned long btrfs_force_ra(struct address_space *mapping,
2435                               struct file_ra_state *ra, struct file *file,
2436                               pgoff_t offset, pgoff_t last_index)
2437 {
2438         pgoff_t req_size;
2439
2440 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2441         req_size = last_index - offset + 1;
2442         offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2443         return offset;
2444 #else
2445         req_size = min(last_index - offset + 1, (pgoff_t)128);
2446         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2447         return offset + req_size;
2448 #endif
2449 }
2450
2451 int btrfs_defrag_file(struct file *file) {
2452         struct inode *inode = fdentry(file)->d_inode;
2453         struct btrfs_root *root = BTRFS_I(inode)->root;
2454         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2455         struct page *page;
2456         unsigned long last_index;
2457         unsigned long ra_index = 0;
2458         u64 page_start;
2459         u64 page_end;
2460         unsigned long i;
2461         int ret;
2462
2463         mutex_lock(&root->fs_info->fs_mutex);
2464         ret = btrfs_check_free_space(root, inode->i_size, 0);
2465         mutex_unlock(&root->fs_info->fs_mutex);
2466         if (ret)
2467                 return -ENOSPC;
2468
2469         mutex_lock(&inode->i_mutex);
2470         last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2471         for (i = 0; i <= last_index; i++) {
2472                 if (i == ra_index) {
2473                         ra_index = btrfs_force_ra(inode->i_mapping,
2474                                                   &file->f_ra,
2475                                                   file, ra_index, last_index);
2476                 }
2477                 page = grab_cache_page(inode->i_mapping, i);
2478                 if (!page)
2479                         goto out_unlock;
2480                 if (!PageUptodate(page)) {
2481                         btrfs_readpage(NULL, page);
2482                         lock_page(page);
2483                         if (!PageUptodate(page)) {
2484                                 unlock_page(page);
2485                                 page_cache_release(page);
2486                                 goto out_unlock;
2487                         }
2488                 }
2489                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2490                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2491
2492                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2493                 set_extent_delalloc(io_tree, page_start,
2494                                     page_end, GFP_NOFS);
2495
2496                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2497                 set_page_dirty(page);
2498                 unlock_page(page);
2499                 page_cache_release(page);
2500                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2501         }
2502
2503 out_unlock:
2504         mutex_unlock(&inode->i_mutex);
2505         return 0;
2506 }
2507
2508 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2509 {
2510         u64 new_size;
2511         u64 old_size;
2512         struct btrfs_ioctl_vol_args *vol_args;
2513         struct btrfs_trans_handle *trans;
2514         char *sizestr;
2515         int ret = 0;
2516         int namelen;
2517         int mod = 0;
2518
2519         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2520
2521         if (!vol_args)
2522                 return -ENOMEM;
2523
2524         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2525                 ret = -EFAULT;
2526                 goto out;
2527         }
2528         namelen = strlen(vol_args->name);
2529         if (namelen > BTRFS_VOL_NAME_MAX) {
2530                 ret = -EINVAL;
2531                 goto out;
2532         }
2533
2534         sizestr = vol_args->name;
2535         if (!strcmp(sizestr, "max"))
2536                 new_size = root->fs_info->sb->s_bdev->bd_inode->i_size;
2537         else {
2538                 if (sizestr[0] == '-') {
2539                         mod = -1;
2540                         sizestr++;
2541                 } else if (sizestr[0] == '+') {
2542                         mod = 1;
2543                         sizestr++;
2544                 }
2545                 new_size = btrfs_parse_size(sizestr);
2546                 if (new_size == 0) {
2547                         ret = -EINVAL;
2548                         goto out;
2549                 }
2550         }
2551
2552         mutex_lock(&root->fs_info->fs_mutex);
2553         old_size = btrfs_super_total_bytes(&root->fs_info->super_copy);
2554
2555         if (mod < 0) {
2556                 if (new_size > old_size) {
2557                         ret = -EINVAL;
2558                         goto out_unlock;
2559                 }
2560                 new_size = old_size - new_size;
2561         } else if (mod > 0) {
2562                 new_size = old_size + new_size;
2563         }
2564
2565         if (new_size < 256 * 1024 * 1024) {
2566                 ret = -EINVAL;
2567                 goto out_unlock;
2568         }
2569         if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) {
2570                 ret = -EFBIG;
2571                 goto out_unlock;
2572         }
2573
2574         do_div(new_size, root->sectorsize);
2575         new_size *= root->sectorsize;
2576
2577 printk("new size is %Lu\n", new_size);
2578         if (new_size > old_size) {
2579                 trans = btrfs_start_transaction(root, 1);
2580                 ret = btrfs_grow_extent_tree(trans, root, new_size);
2581                 btrfs_commit_transaction(trans, root);
2582         } else {
2583                 ret = btrfs_shrink_extent_tree(root, new_size);
2584         }
2585
2586 out_unlock:
2587         mutex_unlock(&root->fs_info->fs_mutex);
2588 out:
2589         kfree(vol_args);
2590         return ret;
2591 }
2592
2593 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
2594                                             void __user *arg)
2595 {
2596         struct btrfs_ioctl_vol_args *vol_args;
2597         struct btrfs_dir_item *di;
2598         struct btrfs_path *path;
2599         u64 root_dirid;
2600         int namelen;
2601         int ret;
2602
2603         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2604
2605         if (!vol_args)
2606                 return -ENOMEM;
2607
2608         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2609                 ret = -EFAULT;
2610                 goto out;
2611         }
2612
2613         namelen = strlen(vol_args->name);
2614         if (namelen > BTRFS_VOL_NAME_MAX) {
2615                 ret = -EINVAL;
2616                 goto out;
2617         }
2618         if (strchr(vol_args->name, '/')) {
2619                 ret = -EINVAL;
2620                 goto out;
2621         }
2622
2623         path = btrfs_alloc_path();
2624         if (!path) {
2625                 ret = -ENOMEM;
2626                 goto out;
2627         }
2628
2629         root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2630         mutex_lock(&root->fs_info->fs_mutex);
2631         di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2632                             path, root_dirid,
2633                             vol_args->name, namelen, 0);
2634         mutex_unlock(&root->fs_info->fs_mutex);
2635         btrfs_free_path(path);
2636
2637         if (di && !IS_ERR(di)) {
2638                 ret = -EEXIST;
2639                 goto out;
2640         }
2641
2642         if (IS_ERR(di)) {
2643                 ret = PTR_ERR(di);
2644                 goto out;
2645         }
2646
2647         if (root == root->fs_info->tree_root)
2648                 ret = create_subvol(root, vol_args->name, namelen);
2649         else
2650                 ret = create_snapshot(root, vol_args->name, namelen);
2651 out:
2652         kfree(vol_args);
2653         return ret;
2654 }
2655
2656 static int btrfs_ioctl_defrag(struct file *file)
2657 {
2658         struct inode *inode = fdentry(file)->d_inode;
2659         struct btrfs_root *root = BTRFS_I(inode)->root;
2660
2661         switch (inode->i_mode & S_IFMT) {
2662         case S_IFDIR:
2663                 mutex_lock(&root->fs_info->fs_mutex);
2664                 btrfs_defrag_root(root, 0);
2665                 btrfs_defrag_root(root->fs_info->extent_root, 0);
2666                 mutex_unlock(&root->fs_info->fs_mutex);
2667                 break;
2668         case S_IFREG:
2669                 btrfs_defrag_file(file);
2670                 break;
2671         }
2672
2673         return 0;
2674 }
2675
2676 long btrfs_ioctl(struct file *file, unsigned int
2677                 cmd, unsigned long arg)
2678 {
2679         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
2680
2681         switch (cmd) {
2682         case BTRFS_IOC_SNAP_CREATE:
2683                 return btrfs_ioctl_snap_create(root, (void __user *)arg);
2684         case BTRFS_IOC_DEFRAG:
2685                 return btrfs_ioctl_defrag(file);
2686         case BTRFS_IOC_RESIZE:
2687                 return btrfs_ioctl_resize(root, (void __user *)arg);
2688         }
2689
2690         return -ENOTTY;
2691 }
2692
2693 /*
2694  * Called inside transaction, so use GFP_NOFS
2695  */
2696 struct inode *btrfs_alloc_inode(struct super_block *sb)
2697 {
2698         struct btrfs_inode *ei;
2699
2700         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2701         if (!ei)
2702                 return NULL;
2703         ei->last_trans = 0;
2704         ei->ordered_trans = 0;
2705         return &ei->vfs_inode;
2706 }
2707
2708 void btrfs_destroy_inode(struct inode *inode)
2709 {
2710         WARN_ON(!list_empty(&inode->i_dentry));
2711         WARN_ON(inode->i_data.nrpages);
2712
2713         btrfs_drop_extent_cache(inode, 0, (u64)-1);
2714         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2715 }
2716
2717 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2718 static void init_once(struct kmem_cache * cachep, void *foo)
2719 #else
2720 static void init_once(void * foo, struct kmem_cache * cachep,
2721                       unsigned long flags)
2722 #endif
2723 {
2724         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2725
2726         inode_init_once(&ei->vfs_inode);
2727 }
2728
2729 void btrfs_destroy_cachep(void)
2730 {
2731         if (btrfs_inode_cachep)
2732                 kmem_cache_destroy(btrfs_inode_cachep);
2733         if (btrfs_trans_handle_cachep)
2734                 kmem_cache_destroy(btrfs_trans_handle_cachep);
2735         if (btrfs_transaction_cachep)
2736                 kmem_cache_destroy(btrfs_transaction_cachep);
2737         if (btrfs_bit_radix_cachep)
2738                 kmem_cache_destroy(btrfs_bit_radix_cachep);
2739         if (btrfs_path_cachep)
2740                 kmem_cache_destroy(btrfs_path_cachep);
2741 }
2742
2743 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
2744                                        unsigned long extra_flags,
2745 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2746                                        void (*ctor)(struct kmem_cache *, void *)
2747 #else
2748                                        void (*ctor)(void *, struct kmem_cache *,
2749                                                     unsigned long)
2750 #endif
2751                                      )
2752 {
2753         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
2754                                  SLAB_MEM_SPREAD | extra_flags), ctor
2755 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2756                                  ,NULL
2757 #endif
2758                                 );
2759 }
2760
2761 int btrfs_init_cachep(void)
2762 {
2763         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
2764                                           sizeof(struct btrfs_inode),
2765                                           0, init_once);
2766         if (!btrfs_inode_cachep)
2767                 goto fail;
2768         btrfs_trans_handle_cachep =
2769                         btrfs_cache_create("btrfs_trans_handle_cache",
2770                                            sizeof(struct btrfs_trans_handle),
2771                                            0, NULL);
2772         if (!btrfs_trans_handle_cachep)
2773                 goto fail;
2774         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
2775                                              sizeof(struct btrfs_transaction),
2776                                              0, NULL);
2777         if (!btrfs_transaction_cachep)
2778                 goto fail;
2779         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
2780                                          sizeof(struct btrfs_path),
2781                                          0, NULL);
2782         if (!btrfs_path_cachep)
2783                 goto fail;
2784         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
2785                                               SLAB_DESTROY_BY_RCU, NULL);
2786         if (!btrfs_bit_radix_cachep)
2787                 goto fail;
2788         return 0;
2789 fail:
2790         btrfs_destroy_cachep();
2791         return -ENOMEM;
2792 }
2793
2794 static int btrfs_getattr(struct vfsmount *mnt,
2795                          struct dentry *dentry, struct kstat *stat)
2796 {
2797         struct inode *inode = dentry->d_inode;
2798         generic_fillattr(inode, stat);
2799         stat->blksize = PAGE_CACHE_SIZE;
2800         return 0;
2801 }
2802
2803 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2804                            struct inode * new_dir,struct dentry *new_dentry)
2805 {
2806         struct btrfs_trans_handle *trans;
2807         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2808         struct inode *new_inode = new_dentry->d_inode;
2809         struct inode *old_inode = old_dentry->d_inode;
2810         struct timespec ctime = CURRENT_TIME;
2811         struct btrfs_path *path;
2812         int ret;
2813
2814         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2815             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2816                 return -ENOTEMPTY;
2817         }
2818
2819         mutex_lock(&root->fs_info->fs_mutex);
2820         ret = btrfs_check_free_space(root, 1, 0);
2821         if (ret)
2822                 goto out_unlock;
2823
2824         trans = btrfs_start_transaction(root, 1);
2825
2826         btrfs_set_trans_block_group(trans, new_dir);
2827         path = btrfs_alloc_path();
2828         if (!path) {
2829                 ret = -ENOMEM;
2830                 goto out_fail;
2831         }
2832
2833         old_dentry->d_inode->i_nlink++;
2834         old_dir->i_ctime = old_dir->i_mtime = ctime;
2835         new_dir->i_ctime = new_dir->i_mtime = ctime;
2836         old_inode->i_ctime = ctime;
2837
2838         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2839         if (ret)
2840                 goto out_fail;
2841
2842         if (new_inode) {
2843                 new_inode->i_ctime = CURRENT_TIME;
2844                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2845                 if (ret)
2846                         goto out_fail;
2847         }
2848         ret = btrfs_add_link(trans, new_dentry, old_inode, 1);
2849         if (ret)
2850                 goto out_fail;
2851
2852 out_fail:
2853         btrfs_free_path(path);
2854         btrfs_end_transaction(trans, root);
2855 out_unlock:
2856         mutex_unlock(&root->fs_info->fs_mutex);
2857         return ret;
2858 }
2859
2860 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2861                          const char *symname)
2862 {
2863         struct btrfs_trans_handle *trans;
2864         struct btrfs_root *root = BTRFS_I(dir)->root;
2865         struct btrfs_path *path;
2866         struct btrfs_key key;
2867         struct inode *inode = NULL;
2868         int err;
2869         int drop_inode = 0;
2870         u64 objectid;
2871         int name_len;
2872         int datasize;
2873         unsigned long ptr;
2874         struct btrfs_file_extent_item *ei;
2875         struct extent_buffer *leaf;
2876         unsigned long nr = 0;
2877
2878         name_len = strlen(symname) + 1;
2879         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2880                 return -ENAMETOOLONG;
2881
2882         mutex_lock(&root->fs_info->fs_mutex);
2883         err = btrfs_check_free_space(root, 1, 0);
2884         if (err)
2885                 goto out_fail;
2886
2887         trans = btrfs_start_transaction(root, 1);
2888         btrfs_set_trans_block_group(trans, dir);
2889
2890         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2891         if (err) {
2892                 err = -ENOSPC;
2893                 goto out_unlock;
2894         }
2895
2896         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
2897                                 dentry->d_name.len,
2898                                 dentry->d_parent->d_inode->i_ino, objectid,
2899                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2900         err = PTR_ERR(inode);
2901         if (IS_ERR(inode))
2902                 goto out_unlock;
2903
2904         btrfs_set_trans_block_group(trans, inode);
2905         err = btrfs_add_nondir(trans, dentry, inode, 0);
2906         if (err)
2907                 drop_inode = 1;
2908         else {
2909                 inode->i_mapping->a_ops = &btrfs_aops;
2910                 inode->i_fop = &btrfs_file_operations;
2911                 inode->i_op = &btrfs_file_inode_operations;
2912                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
2913                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
2914                                      inode->i_mapping, GFP_NOFS);
2915                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
2916         }
2917         dir->i_sb->s_dirt = 1;
2918         btrfs_update_inode_block_group(trans, inode);
2919         btrfs_update_inode_block_group(trans, dir);
2920         if (drop_inode)
2921                 goto out_unlock;
2922
2923         path = btrfs_alloc_path();
2924         BUG_ON(!path);
2925         key.objectid = inode->i_ino;
2926         key.offset = 0;
2927         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2928         datasize = btrfs_file_extent_calc_inline_size(name_len);
2929         err = btrfs_insert_empty_item(trans, root, path, &key,
2930                                       datasize);
2931         if (err) {
2932                 drop_inode = 1;
2933                 goto out_unlock;
2934         }
2935         leaf = path->nodes[0];
2936         ei = btrfs_item_ptr(leaf, path->slots[0],
2937                             struct btrfs_file_extent_item);
2938         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
2939         btrfs_set_file_extent_type(leaf, ei,
2940                                    BTRFS_FILE_EXTENT_INLINE);
2941         ptr = btrfs_file_extent_inline_start(ei);
2942         write_extent_buffer(leaf, symname, ptr, name_len);
2943         btrfs_mark_buffer_dirty(leaf);
2944         btrfs_free_path(path);
2945
2946         inode->i_op = &btrfs_symlink_inode_operations;
2947         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2948         inode->i_size = name_len - 1;
2949         err = btrfs_update_inode(trans, root, inode);
2950         if (err)
2951                 drop_inode = 1;
2952
2953 out_unlock:
2954         nr = trans->blocks_used;
2955         btrfs_end_transaction(trans, root);
2956 out_fail:
2957         mutex_unlock(&root->fs_info->fs_mutex);
2958         if (drop_inode) {
2959                 inode_dec_link_count(inode);
2960                 iput(inode);
2961         }
2962         btrfs_btree_balance_dirty(root, nr);
2963         btrfs_throttle(root);
2964         return err;
2965 }
2966 static int btrfs_permission(struct inode *inode, int mask,
2967                             struct nameidata *nd)
2968 {
2969         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
2970                 return -EACCES;
2971         return generic_permission(inode, mask, NULL);
2972 }
2973
2974 static struct inode_operations btrfs_dir_inode_operations = {
2975         .lookup         = btrfs_lookup,
2976         .create         = btrfs_create,
2977         .unlink         = btrfs_unlink,
2978         .link           = btrfs_link,
2979         .mkdir          = btrfs_mkdir,
2980         .rmdir          = btrfs_rmdir,
2981         .rename         = btrfs_rename,
2982         .symlink        = btrfs_symlink,
2983         .setattr        = btrfs_setattr,
2984         .mknod          = btrfs_mknod,
2985         .setxattr       = generic_setxattr,
2986         .getxattr       = generic_getxattr,
2987         .listxattr      = btrfs_listxattr,
2988         .removexattr    = generic_removexattr,
2989         .permission     = btrfs_permission,
2990 };
2991 static struct inode_operations btrfs_dir_ro_inode_operations = {
2992         .lookup         = btrfs_lookup,
2993         .permission     = btrfs_permission,
2994 };
2995 static struct file_operations btrfs_dir_file_operations = {
2996         .llseek         = generic_file_llseek,
2997         .read           = generic_read_dir,
2998         .readdir        = btrfs_readdir,
2999         .unlocked_ioctl = btrfs_ioctl,
3000 #ifdef CONFIG_COMPAT
3001         .compat_ioctl   = btrfs_ioctl,
3002 #endif
3003 };
3004
3005 static struct extent_io_ops btrfs_extent_io_ops = {
3006         .fill_delalloc = run_delalloc_range,
3007         .writepage_io_hook = btrfs_writepage_io_hook,
3008         .readpage_io_hook = btrfs_readpage_io_hook,
3009         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3010         .set_bit_hook = btrfs_set_bit_hook,
3011         .clear_bit_hook = btrfs_clear_bit_hook,
3012 };
3013
3014 static struct address_space_operations btrfs_aops = {
3015         .readpage       = btrfs_readpage,
3016         .writepage      = btrfs_writepage,
3017         .writepages     = btrfs_writepages,
3018         .readpages      = btrfs_readpages,
3019         .sync_page      = block_sync_page,
3020         .bmap           = btrfs_bmap,
3021         .invalidatepage = btrfs_invalidatepage,
3022         .releasepage    = btrfs_releasepage,
3023         .set_page_dirty = __set_page_dirty_nobuffers,
3024 };
3025
3026 static struct address_space_operations btrfs_symlink_aops = {
3027         .readpage       = btrfs_readpage,
3028         .writepage      = btrfs_writepage,
3029         .invalidatepage = btrfs_invalidatepage,
3030         .releasepage    = btrfs_releasepage,
3031 };
3032
3033 static struct inode_operations btrfs_file_inode_operations = {
3034         .truncate       = btrfs_truncate,
3035         .getattr        = btrfs_getattr,
3036         .setattr        = btrfs_setattr,
3037         .setxattr       = generic_setxattr,
3038         .getxattr       = generic_getxattr,
3039         .listxattr      = btrfs_listxattr,
3040         .removexattr    = generic_removexattr,
3041         .permission     = btrfs_permission,
3042 };
3043 static struct inode_operations btrfs_special_inode_operations = {
3044         .getattr        = btrfs_getattr,
3045         .setattr        = btrfs_setattr,
3046         .permission     = btrfs_permission,
3047 };
3048 static struct inode_operations btrfs_symlink_inode_operations = {
3049         .readlink       = generic_readlink,
3050         .follow_link    = page_follow_link_light,
3051         .put_link       = page_put_link,
3052         .permission     = btrfs_permission,
3053 };