]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/btrfs/inode.c
Btrfs: Enable delalloc accounting
[karo-tx-linux.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/buffer_head.h>
20 #include <linux/fs.h>
21 #include <linux/pagemap.h>
22 #include <linux/highmem.h>
23 #include <linux/time.h>
24 #include <linux/init.h>
25 #include <linux/string.h>
26 #include <linux/smp_lock.h>
27 #include <linux/backing-dev.h>
28 #include <linux/mpage.h>
29 #include <linux/swap.h>
30 #include <linux/writeback.h>
31 #include <linux/statfs.h>
32 #include <linux/compat.h>
33 #include <linux/bit_spinlock.h>
34 #include <linux/version.h>
35 #include <linux/xattr.h>
36 #include "ctree.h"
37 #include "disk-io.h"
38 #include "transaction.h"
39 #include "btrfs_inode.h"
40 #include "ioctl.h"
41 #include "print-tree.h"
42
43 struct btrfs_iget_args {
44         u64 ino;
45         struct btrfs_root *root;
46 };
47
48 static struct inode_operations btrfs_dir_inode_operations;
49 static struct inode_operations btrfs_symlink_inode_operations;
50 static struct inode_operations btrfs_dir_ro_inode_operations;
51 static struct inode_operations btrfs_special_inode_operations;
52 static struct inode_operations btrfs_file_inode_operations;
53 static struct address_space_operations btrfs_aops;
54 static struct address_space_operations btrfs_symlink_aops;
55 static struct file_operations btrfs_dir_file_operations;
56 static struct extent_io_ops btrfs_extent_io_ops;
57
58 static struct kmem_cache *btrfs_inode_cachep;
59 struct kmem_cache *btrfs_trans_handle_cachep;
60 struct kmem_cache *btrfs_transaction_cachep;
61 struct kmem_cache *btrfs_bit_radix_cachep;
62 struct kmem_cache *btrfs_path_cachep;
63
64 #define S_SHIFT 12
65 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
66         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
67         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
68         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
69         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
70         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
71         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
72         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
73 };
74
75 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
76                            int for_del)
77 {
78         u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
79         u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
80         u64 thresh;
81         int ret = 0;
82
83         if (for_del)
84                 thresh = total * 90;
85         else
86                 thresh = total * 85;
87
88         do_div(thresh, 100);
89
90         spin_lock(&root->fs_info->delalloc_lock);
91         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
92                 ret = -ENOSPC;
93         spin_unlock(&root->fs_info->delalloc_lock);
94         return ret;
95 }
96
97 static int cow_file_range(struct inode *inode, u64 start, u64 end)
98 {
99         struct btrfs_root *root = BTRFS_I(inode)->root;
100         struct btrfs_trans_handle *trans;
101         u64 alloc_hint = 0;
102         u64 num_bytes;
103         u64 cur_alloc_size;
104         u64 blocksize = root->sectorsize;
105         u64 orig_start = start;
106         u64 orig_num_bytes;
107         struct btrfs_key ins;
108         int ret;
109
110         trans = btrfs_start_transaction(root, 1);
111         BUG_ON(!trans);
112         btrfs_set_trans_block_group(trans, inode);
113
114         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
115         num_bytes = max(blocksize,  num_bytes);
116         ret = btrfs_drop_extents(trans, root, inode,
117                                  start, start + num_bytes, start, &alloc_hint);
118         orig_num_bytes = num_bytes;
119
120         if (alloc_hint == EXTENT_MAP_INLINE)
121                 goto out;
122
123         while(num_bytes > 0) {
124                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
125                 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
126                                          root->root_key.objectid,
127                                          trans->transid,
128                                          inode->i_ino, start, 0,
129                                          alloc_hint, (u64)-1, &ins, 1);
130                 if (ret) {
131                         WARN_ON(1);
132                         goto out;
133                 }
134                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
135                                                start, ins.objectid, ins.offset,
136                                                ins.offset);
137                 btrfs_check_file(root, inode);
138                 num_bytes -= cur_alloc_size;
139                 alloc_hint = ins.objectid + ins.offset;
140                 start += cur_alloc_size;
141         }
142         btrfs_drop_extent_cache(inode, orig_start,
143                                 orig_start + orig_num_bytes - 1);
144         btrfs_add_ordered_inode(inode);
145 out:
146         btrfs_end_transaction(trans, root);
147         return ret;
148 }
149
150 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
151 {
152         u64 extent_start;
153         u64 extent_end;
154         u64 bytenr;
155         u64 cow_end;
156         u64 loops = 0;
157         u64 total_fs_bytes;
158         struct btrfs_root *root = BTRFS_I(inode)->root;
159         struct extent_buffer *leaf;
160         int found_type;
161         struct btrfs_path *path;
162         struct btrfs_file_extent_item *item;
163         int ret;
164         int err;
165         struct btrfs_key found_key;
166
167         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
168         path = btrfs_alloc_path();
169         BUG_ON(!path);
170 again:
171         ret = btrfs_lookup_file_extent(NULL, root, path,
172                                        inode->i_ino, start, 0);
173         if (ret < 0) {
174                 btrfs_free_path(path);
175                 return ret;
176         }
177
178         cow_end = end;
179         if (ret != 0) {
180                 if (path->slots[0] == 0)
181                         goto not_found;
182                 path->slots[0]--;
183         }
184
185         leaf = path->nodes[0];
186         item = btrfs_item_ptr(leaf, path->slots[0],
187                               struct btrfs_file_extent_item);
188
189         /* are we inside the extent that was found? */
190         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
191         found_type = btrfs_key_type(&found_key);
192         if (found_key.objectid != inode->i_ino ||
193             found_type != BTRFS_EXTENT_DATA_KEY) {
194                 goto not_found;
195         }
196
197         found_type = btrfs_file_extent_type(leaf, item);
198         extent_start = found_key.offset;
199         if (found_type == BTRFS_FILE_EXTENT_REG) {
200                 u64 extent_num_bytes;
201
202                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
203                 extent_end = extent_start + extent_num_bytes;
204                 err = 0;
205
206                 if (loops && start != extent_start)
207                         goto not_found;
208
209                 if (start < extent_start || start >= extent_end)
210                         goto not_found;
211
212                 cow_end = min(end, extent_end - 1);
213                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
214                 if (bytenr == 0)
215                         goto not_found;
216
217                 /*
218                  * we may be called by the resizer, make sure we're inside
219                  * the limits of the FS
220                  */
221                 if (bytenr + extent_num_bytes > total_fs_bytes)
222                         goto not_found;
223
224                 if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) {
225                         goto not_found;
226                 }
227
228                 start = extent_end;
229         } else {
230                 goto not_found;
231         }
232 loop:
233         if (start > end) {
234                 btrfs_free_path(path);
235                 return 0;
236         }
237         btrfs_release_path(root, path);
238         loops++;
239         goto again;
240
241 not_found:
242         cow_file_range(inode, start, cow_end);
243         start = cow_end + 1;
244         goto loop;
245 }
246
247 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
248 {
249         struct btrfs_root *root = BTRFS_I(inode)->root;
250         int ret;
251         mutex_lock(&root->fs_info->fs_mutex);
252         if (btrfs_test_opt(root, NODATACOW) ||
253             btrfs_test_flag(inode, NODATACOW))
254                 ret = run_delalloc_nocow(inode, start, end);
255         else
256                 ret = cow_file_range(inode, start, end);
257
258         mutex_unlock(&root->fs_info->fs_mutex);
259         return ret;
260 }
261
262 int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end,
263                        unsigned long old, unsigned long bits)
264 {
265         if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
266                 struct btrfs_root *root = BTRFS_I(inode)->root;
267                 spin_lock(&root->fs_info->delalloc_lock);
268                 root->fs_info->delalloc_bytes += end - start + 1;
269                 spin_unlock(&root->fs_info->delalloc_lock);
270         }
271         return 0;
272 }
273
274 int btrfs_clear_bit_hook(struct inode *inode, u64 start, u64 end,
275                          unsigned long old, unsigned long bits)
276 {
277         if ((old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) {
278                 struct btrfs_root *root = BTRFS_I(inode)->root;
279                 spin_lock(&root->fs_info->delalloc_lock);
280                 if (end - start + 1 > root->fs_info->delalloc_bytes) {
281                         printk("warning: delalloc account %Lu %Lu\n",
282                                end - start + 1, root->fs_info->delalloc_bytes);
283                         root->fs_info->delalloc_bytes = 0;
284                 } else {
285                         root->fs_info->delalloc_bytes -= end - start + 1;
286                 }
287                 spin_unlock(&root->fs_info->delalloc_lock);
288         }
289         return 0;
290 }
291
292 int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end)
293 {
294         struct inode *inode = page->mapping->host;
295         struct btrfs_root *root = BTRFS_I(inode)->root;
296         struct btrfs_trans_handle *trans;
297         char *kaddr;
298         int ret = 0;
299         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
300         size_t offset = start - page_start;
301         if (btrfs_test_opt(root, NODATASUM) ||
302             btrfs_test_flag(inode, NODATASUM))
303                 return 0;
304         mutex_lock(&root->fs_info->fs_mutex);
305         trans = btrfs_start_transaction(root, 1);
306         btrfs_set_trans_block_group(trans, inode);
307         kaddr = kmap(page);
308         btrfs_csum_file_block(trans, root, inode, inode->i_ino,
309                               start, kaddr + offset, end - start + 1);
310         kunmap(page);
311         ret = btrfs_end_transaction(trans, root);
312         BUG_ON(ret);
313         mutex_unlock(&root->fs_info->fs_mutex);
314         return ret;
315 }
316
317 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
318 {
319         int ret = 0;
320         struct inode *inode = page->mapping->host;
321         struct btrfs_root *root = BTRFS_I(inode)->root;
322         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
323         struct btrfs_csum_item *item;
324         struct btrfs_path *path = NULL;
325         u32 csum;
326         if (btrfs_test_opt(root, NODATASUM) ||
327             btrfs_test_flag(inode, NODATASUM))
328                 return 0;
329         mutex_lock(&root->fs_info->fs_mutex);
330         path = btrfs_alloc_path();
331         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
332         if (IS_ERR(item)) {
333                 ret = PTR_ERR(item);
334                 /* a csum that isn't present is a preallocated region. */
335                 if (ret == -ENOENT || ret == -EFBIG)
336                         ret = 0;
337                 csum = 0;
338                 printk("no csum found for inode %lu start %Lu\n", inode->i_ino, start);
339                 goto out;
340         }
341         read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
342                            BTRFS_CRC32_SIZE);
343         set_state_private(io_tree, start, csum);
344 out:
345         if (path)
346                 btrfs_free_path(path);
347         mutex_unlock(&root->fs_info->fs_mutex);
348         return ret;
349 }
350
351 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end,
352                                struct extent_state *state)
353 {
354         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
355         struct inode *inode = page->mapping->host;
356         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
357         char *kaddr;
358         u64 private = ~(u32)0;
359         int ret;
360         struct btrfs_root *root = BTRFS_I(inode)->root;
361         u32 csum = ~(u32)0;
362         unsigned long flags;
363
364         if (btrfs_test_opt(root, NODATASUM) ||
365             btrfs_test_flag(inode, NODATASUM))
366                 return 0;
367
368         if (state->start == start) {
369                 private = state->private;
370                 ret = 0;
371         } else {
372                 ret = get_state_private(io_tree, start, &private);
373         }
374         local_irq_save(flags);
375         kaddr = kmap_atomic(page, KM_IRQ0);
376         if (ret) {
377                 goto zeroit;
378         }
379         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
380         btrfs_csum_final(csum, (char *)&csum);
381         if (csum != private) {
382                 goto zeroit;
383         }
384         kunmap_atomic(kaddr, KM_IRQ0);
385         local_irq_restore(flags);
386         return 0;
387
388 zeroit:
389         printk("btrfs csum failed ino %lu off %llu csum %u private %Lu\n",
390                page->mapping->host->i_ino, (unsigned long long)start, csum,
391                private);
392         memset(kaddr + offset, 1, end - start + 1);
393         flush_dcache_page(page);
394         kunmap_atomic(kaddr, KM_IRQ0);
395         local_irq_restore(flags);
396         return 0;
397 }
398
399 void btrfs_read_locked_inode(struct inode *inode)
400 {
401         struct btrfs_path *path;
402         struct extent_buffer *leaf;
403         struct btrfs_inode_item *inode_item;
404         struct btrfs_inode_timespec *tspec;
405         struct btrfs_root *root = BTRFS_I(inode)->root;
406         struct btrfs_key location;
407         u64 alloc_group_block;
408         u32 rdev;
409         int ret;
410
411         path = btrfs_alloc_path();
412         BUG_ON(!path);
413         mutex_lock(&root->fs_info->fs_mutex);
414         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
415
416         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
417         if (ret)
418                 goto make_bad;
419
420         leaf = path->nodes[0];
421         inode_item = btrfs_item_ptr(leaf, path->slots[0],
422                                     struct btrfs_inode_item);
423
424         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
425         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
426         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
427         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
428         inode->i_size = btrfs_inode_size(leaf, inode_item);
429
430         tspec = btrfs_inode_atime(inode_item);
431         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
432         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
433
434         tspec = btrfs_inode_mtime(inode_item);
435         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
436         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
437
438         tspec = btrfs_inode_ctime(inode_item);
439         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
440         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
441
442         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
443         inode->i_generation = btrfs_inode_generation(leaf, inode_item);
444         inode->i_rdev = 0;
445         rdev = btrfs_inode_rdev(leaf, inode_item);
446
447         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
448         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
449                                                        alloc_group_block);
450         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
451         if (!BTRFS_I(inode)->block_group) {
452                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
453                                                          NULL, 0, 0, 0);
454         }
455         btrfs_free_path(path);
456         inode_item = NULL;
457
458         mutex_unlock(&root->fs_info->fs_mutex);
459
460         switch (inode->i_mode & S_IFMT) {
461         case S_IFREG:
462                 inode->i_mapping->a_ops = &btrfs_aops;
463                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
464                 inode->i_fop = &btrfs_file_operations;
465                 inode->i_op = &btrfs_file_inode_operations;
466                 break;
467         case S_IFDIR:
468                 inode->i_fop = &btrfs_dir_file_operations;
469                 if (root == root->fs_info->tree_root)
470                         inode->i_op = &btrfs_dir_ro_inode_operations;
471                 else
472                         inode->i_op = &btrfs_dir_inode_operations;
473                 break;
474         case S_IFLNK:
475                 inode->i_op = &btrfs_symlink_inode_operations;
476                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
477                 break;
478         default:
479                 init_special_inode(inode, inode->i_mode, rdev);
480                 break;
481         }
482         return;
483
484 make_bad:
485         btrfs_release_path(root, path);
486         btrfs_free_path(path);
487         mutex_unlock(&root->fs_info->fs_mutex);
488         make_bad_inode(inode);
489 }
490
491 static void fill_inode_item(struct extent_buffer *leaf,
492                             struct btrfs_inode_item *item,
493                             struct inode *inode)
494 {
495         btrfs_set_inode_uid(leaf, item, inode->i_uid);
496         btrfs_set_inode_gid(leaf, item, inode->i_gid);
497         btrfs_set_inode_size(leaf, item, inode->i_size);
498         btrfs_set_inode_mode(leaf, item, inode->i_mode);
499         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
500
501         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
502                                inode->i_atime.tv_sec);
503         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
504                                 inode->i_atime.tv_nsec);
505
506         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
507                                inode->i_mtime.tv_sec);
508         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
509                                 inode->i_mtime.tv_nsec);
510
511         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
512                                inode->i_ctime.tv_sec);
513         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
514                                 inode->i_ctime.tv_nsec);
515
516         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
517         btrfs_set_inode_generation(leaf, item, inode->i_generation);
518         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
519         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
520         btrfs_set_inode_block_group(leaf, item,
521                                     BTRFS_I(inode)->block_group->key.objectid);
522 }
523
524 int btrfs_update_inode(struct btrfs_trans_handle *trans,
525                               struct btrfs_root *root,
526                               struct inode *inode)
527 {
528         struct btrfs_inode_item *inode_item;
529         struct btrfs_path *path;
530         struct extent_buffer *leaf;
531         int ret;
532
533         path = btrfs_alloc_path();
534         BUG_ON(!path);
535         ret = btrfs_lookup_inode(trans, root, path,
536                                  &BTRFS_I(inode)->location, 1);
537         if (ret) {
538                 if (ret > 0)
539                         ret = -ENOENT;
540                 goto failed;
541         }
542
543         leaf = path->nodes[0];
544         inode_item = btrfs_item_ptr(leaf, path->slots[0],
545                                   struct btrfs_inode_item);
546
547         fill_inode_item(leaf, inode_item, inode);
548         btrfs_mark_buffer_dirty(leaf);
549         btrfs_set_inode_last_trans(trans, inode);
550         ret = 0;
551 failed:
552         btrfs_release_path(root, path);
553         btrfs_free_path(path);
554         return ret;
555 }
556
557
558 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
559                               struct btrfs_root *root,
560                               struct inode *dir,
561                               struct dentry *dentry)
562 {
563         struct btrfs_path *path;
564         const char *name = dentry->d_name.name;
565         int name_len = dentry->d_name.len;
566         int ret = 0;
567         struct extent_buffer *leaf;
568         struct btrfs_dir_item *di;
569         struct btrfs_key key;
570
571         path = btrfs_alloc_path();
572         if (!path) {
573                 ret = -ENOMEM;
574                 goto err;
575         }
576
577         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
578                                     name, name_len, -1);
579         if (IS_ERR(di)) {
580                 ret = PTR_ERR(di);
581                 goto err;
582         }
583         if (!di) {
584                 ret = -ENOENT;
585                 goto err;
586         }
587         leaf = path->nodes[0];
588         btrfs_dir_item_key_to_cpu(leaf, di, &key);
589         ret = btrfs_delete_one_dir_name(trans, root, path, di);
590         if (ret)
591                 goto err;
592         btrfs_release_path(root, path);
593
594         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
595                                          key.objectid, name, name_len, -1);
596         if (IS_ERR(di)) {
597                 ret = PTR_ERR(di);
598                 goto err;
599         }
600         if (!di) {
601                 ret = -ENOENT;
602                 goto err;
603         }
604         ret = btrfs_delete_one_dir_name(trans, root, path, di);
605
606         dentry->d_inode->i_ctime = dir->i_ctime;
607         ret = btrfs_del_inode_ref(trans, root, name, name_len,
608                                   dentry->d_inode->i_ino,
609                                   dentry->d_parent->d_inode->i_ino);
610         if (ret) {
611                 printk("failed to delete reference to %.*s, "
612                        "inode %lu parent %lu\n", name_len, name,
613                        dentry->d_inode->i_ino,
614                        dentry->d_parent->d_inode->i_ino);
615         }
616 err:
617         btrfs_free_path(path);
618         if (!ret) {
619                 dir->i_size -= name_len * 2;
620                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
621                 btrfs_update_inode(trans, root, dir);
622 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
623                 dentry->d_inode->i_nlink--;
624 #else
625                 drop_nlink(dentry->d_inode);
626 #endif
627                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
628                 dir->i_sb->s_dirt = 1;
629         }
630         return ret;
631 }
632
633 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
634 {
635         struct btrfs_root *root;
636         struct btrfs_trans_handle *trans;
637         struct inode *inode = dentry->d_inode;
638         int ret;
639         unsigned long nr = 0;
640
641         root = BTRFS_I(dir)->root;
642         mutex_lock(&root->fs_info->fs_mutex);
643
644         ret = btrfs_check_free_space(root, 1, 1);
645         if (ret)
646                 goto fail;
647
648         trans = btrfs_start_transaction(root, 1);
649
650         btrfs_set_trans_block_group(trans, dir);
651         ret = btrfs_unlink_trans(trans, root, dir, dentry);
652         nr = trans->blocks_used;
653
654         if (inode->i_nlink == 0) {
655                 int found;
656                 /* if the inode isn't linked anywhere,
657                  * we don't need to worry about
658                  * data=ordered
659                  */
660                 found = btrfs_del_ordered_inode(inode);
661                 if (found == 1) {
662                         atomic_dec(&inode->i_count);
663                 }
664         }
665
666         btrfs_end_transaction(trans, root);
667 fail:
668         mutex_unlock(&root->fs_info->fs_mutex);
669         btrfs_btree_balance_dirty(root, nr);
670         btrfs_throttle(root);
671         return ret;
672 }
673
674 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
675 {
676         struct inode *inode = dentry->d_inode;
677         int err = 0;
678         int ret;
679         struct btrfs_root *root = BTRFS_I(dir)->root;
680         struct btrfs_trans_handle *trans;
681         unsigned long nr = 0;
682
683         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
684                 return -ENOTEMPTY;
685
686         mutex_lock(&root->fs_info->fs_mutex);
687         ret = btrfs_check_free_space(root, 1, 1);
688         if (ret)
689                 goto fail;
690
691         trans = btrfs_start_transaction(root, 1);
692         btrfs_set_trans_block_group(trans, dir);
693
694         /* now the directory is empty */
695         err = btrfs_unlink_trans(trans, root, dir, dentry);
696         if (!err) {
697                 inode->i_size = 0;
698         }
699
700         nr = trans->blocks_used;
701         ret = btrfs_end_transaction(trans, root);
702 fail:
703         mutex_unlock(&root->fs_info->fs_mutex);
704         btrfs_btree_balance_dirty(root, nr);
705         btrfs_throttle(root);
706
707         if (ret && !err)
708                 err = ret;
709         return err;
710 }
711
712 /*
713  * this can truncate away extent items, csum items and directory items.
714  * It starts at a high offset and removes keys until it can't find
715  * any higher than i_size.
716  *
717  * csum items that cross the new i_size are truncated to the new size
718  * as well.
719  */
720 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
721                                    struct btrfs_root *root,
722                                    struct inode *inode,
723                                    u32 min_type)
724 {
725         int ret;
726         struct btrfs_path *path;
727         struct btrfs_key key;
728         struct btrfs_key found_key;
729         u32 found_type;
730         struct extent_buffer *leaf;
731         struct btrfs_file_extent_item *fi;
732         u64 extent_start = 0;
733         u64 extent_num_bytes = 0;
734         u64 item_end = 0;
735         u64 root_gen = 0;
736         u64 root_owner = 0;
737         int found_extent;
738         int del_item;
739         int pending_del_nr = 0;
740         int pending_del_slot = 0;
741         int extent_type = -1;
742
743         btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
744         path = btrfs_alloc_path();
745         path->reada = -1;
746         BUG_ON(!path);
747
748         /* FIXME, add redo link to tree so we don't leak on crash */
749         key.objectid = inode->i_ino;
750         key.offset = (u64)-1;
751         key.type = (u8)-1;
752
753         btrfs_init_path(path);
754 search_again:
755         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
756         if (ret < 0) {
757                 goto error;
758         }
759         if (ret > 0) {
760                 BUG_ON(path->slots[0] == 0);
761                 path->slots[0]--;
762         }
763
764         while(1) {
765                 fi = NULL;
766                 leaf = path->nodes[0];
767                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
768                 found_type = btrfs_key_type(&found_key);
769
770                 if (found_key.objectid != inode->i_ino)
771                         break;
772
773                 if (found_type < min_type)
774                         break;
775
776                 item_end = found_key.offset;
777                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
778                         fi = btrfs_item_ptr(leaf, path->slots[0],
779                                             struct btrfs_file_extent_item);
780                         extent_type = btrfs_file_extent_type(leaf, fi);
781                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
782                                 item_end +=
783                                     btrfs_file_extent_num_bytes(leaf, fi);
784                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
785                                 struct btrfs_item *item = btrfs_item_nr(leaf,
786                                                                 path->slots[0]);
787                                 item_end += btrfs_file_extent_inline_len(leaf,
788                                                                          item);
789                         }
790                         item_end--;
791                 }
792                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
793                         ret = btrfs_csum_truncate(trans, root, path,
794                                                   inode->i_size);
795                         BUG_ON(ret);
796                 }
797                 if (item_end < inode->i_size) {
798                         if (found_type == BTRFS_DIR_ITEM_KEY) {
799                                 found_type = BTRFS_INODE_ITEM_KEY;
800                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
801                                 found_type = BTRFS_CSUM_ITEM_KEY;
802                         } else if (found_type == BTRFS_EXTENT_DATA_KEY) {
803                                 found_type = BTRFS_XATTR_ITEM_KEY;
804                         } else if (found_type == BTRFS_XATTR_ITEM_KEY) {
805                                 found_type = BTRFS_INODE_REF_KEY;
806                         } else if (found_type) {
807                                 found_type--;
808                         } else {
809                                 break;
810                         }
811                         btrfs_set_key_type(&key, found_type);
812                         goto next;
813                 }
814                 if (found_key.offset >= inode->i_size)
815                         del_item = 1;
816                 else
817                         del_item = 0;
818                 found_extent = 0;
819
820                 /* FIXME, shrink the extent if the ref count is only 1 */
821                 if (found_type != BTRFS_EXTENT_DATA_KEY)
822                         goto delete;
823
824                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
825                         u64 num_dec;
826                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
827                         if (!del_item) {
828                                 u64 orig_num_bytes =
829                                         btrfs_file_extent_num_bytes(leaf, fi);
830                                 extent_num_bytes = inode->i_size -
831                                         found_key.offset + root->sectorsize - 1;
832                                 extent_num_bytes = extent_num_bytes &
833                                         ~((u64)root->sectorsize - 1);
834                                 btrfs_set_file_extent_num_bytes(leaf, fi,
835                                                          extent_num_bytes);
836                                 num_dec = (orig_num_bytes -
837                                            extent_num_bytes) >> 9;
838                                 if (extent_start != 0) {
839                                         inode->i_blocks -= num_dec;
840                                 }
841                                 btrfs_mark_buffer_dirty(leaf);
842                         } else {
843                                 extent_num_bytes =
844                                         btrfs_file_extent_disk_num_bytes(leaf,
845                                                                          fi);
846                                 /* FIXME blocksize != 4096 */
847                                 num_dec = btrfs_file_extent_num_bytes(leaf,
848                                                                        fi) >> 9;
849                                 if (extent_start != 0) {
850                                         found_extent = 1;
851                                         inode->i_blocks -= num_dec;
852                                 }
853                                 root_gen = btrfs_header_generation(leaf);
854                                 root_owner = btrfs_header_owner(leaf);
855                         }
856                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE &&
857                            !del_item) {
858                         u32 newsize = inode->i_size - found_key.offset;
859                         newsize = btrfs_file_extent_calc_inline_size(newsize);
860                         ret = btrfs_truncate_item(trans, root, path,
861                                                   newsize, 1);
862                         BUG_ON(ret);
863                 }
864 delete:
865                 if (del_item) {
866                         if (!pending_del_nr) {
867                                 /* no pending yet, add ourselves */
868                                 pending_del_slot = path->slots[0];
869                                 pending_del_nr = 1;
870                         } else if (pending_del_nr &&
871                                    path->slots[0] + 1 == pending_del_slot) {
872                                 /* hop on the pending chunk */
873                                 pending_del_nr++;
874                                 pending_del_slot = path->slots[0];
875                         } else {
876                                 printk("bad pending slot %d pending_del_nr %d pending_del_slot %d\n", path->slots[0], pending_del_nr, pending_del_slot);
877                         }
878                 } else {
879                         break;
880                 }
881                 if (found_extent) {
882                         ret = btrfs_free_extent(trans, root, extent_start,
883                                                 extent_num_bytes,
884                                                 root_owner,
885                                                 root_gen, inode->i_ino,
886                                                 found_key.offset, 0);
887                         BUG_ON(ret);
888                 }
889 next:
890                 if (path->slots[0] == 0) {
891                         if (pending_del_nr)
892                                 goto del_pending;
893                         btrfs_release_path(root, path);
894                         goto search_again;
895                 }
896
897                 path->slots[0]--;
898                 if (pending_del_nr &&
899                     path->slots[0] + 1 != pending_del_slot) {
900                         struct btrfs_key debug;
901 del_pending:
902                         btrfs_item_key_to_cpu(path->nodes[0], &debug,
903                                               pending_del_slot);
904                         ret = btrfs_del_items(trans, root, path,
905                                               pending_del_slot,
906                                               pending_del_nr);
907                         BUG_ON(ret);
908                         pending_del_nr = 0;
909                         btrfs_release_path(root, path);
910                         goto search_again;
911                 }
912         }
913         ret = 0;
914 error:
915         if (pending_del_nr) {
916                 ret = btrfs_del_items(trans, root, path, pending_del_slot,
917                                       pending_del_nr);
918         }
919         btrfs_release_path(root, path);
920         btrfs_free_path(path);
921         inode->i_sb->s_dirt = 1;
922         return ret;
923 }
924
925 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
926                               size_t zero_start)
927 {
928         char *kaddr;
929         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
930         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
931         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
932         int ret = 0;
933
934         WARN_ON(!PageLocked(page));
935         set_page_extent_mapped(page);
936
937         lock_extent(io_tree, page_start, page_end, GFP_NOFS);
938         set_extent_delalloc(&BTRFS_I(inode)->io_tree, page_start,
939                             page_end, GFP_NOFS);
940
941         if (zero_start != PAGE_CACHE_SIZE) {
942                 kaddr = kmap(page);
943                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
944                 flush_dcache_page(page);
945                 kunmap(page);
946         }
947         set_page_dirty(page);
948         unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
949
950         return ret;
951 }
952
953 /*
954  * taken from block_truncate_page, but does cow as it zeros out
955  * any bytes left in the last page in the file.
956  */
957 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
958 {
959         struct inode *inode = mapping->host;
960         struct btrfs_root *root = BTRFS_I(inode)->root;
961         u32 blocksize = root->sectorsize;
962         pgoff_t index = from >> PAGE_CACHE_SHIFT;
963         unsigned offset = from & (PAGE_CACHE_SIZE-1);
964         struct page *page;
965         int ret = 0;
966         u64 page_start;
967
968         if ((offset & (blocksize - 1)) == 0)
969                 goto out;
970
971         ret = -ENOMEM;
972         page = grab_cache_page(mapping, index);
973         if (!page)
974                 goto out;
975         if (!PageUptodate(page)) {
976                 ret = btrfs_readpage(NULL, page);
977                 lock_page(page);
978                 if (!PageUptodate(page)) {
979                         ret = -EIO;
980                         goto out;
981                 }
982         }
983         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
984
985         ret = btrfs_cow_one_page(inode, page, offset);
986
987         unlock_page(page);
988         page_cache_release(page);
989 out:
990         return ret;
991 }
992
993 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
994 {
995         struct inode *inode = dentry->d_inode;
996         int err;
997
998         err = inode_change_ok(inode, attr);
999         if (err)
1000                 return err;
1001
1002         if (S_ISREG(inode->i_mode) &&
1003             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
1004                 struct btrfs_trans_handle *trans;
1005                 struct btrfs_root *root = BTRFS_I(inode)->root;
1006                 struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1007
1008                 u64 mask = root->sectorsize - 1;
1009                 u64 hole_start = (inode->i_size + mask) & ~mask;
1010                 u64 block_end = (attr->ia_size + mask) & ~mask;
1011                 u64 hole_size;
1012                 u64 alloc_hint = 0;
1013
1014                 if (attr->ia_size <= hole_start)
1015                         goto out;
1016
1017                 mutex_lock(&root->fs_info->fs_mutex);
1018                 err = btrfs_check_free_space(root, 1, 0);
1019                 mutex_unlock(&root->fs_info->fs_mutex);
1020                 if (err)
1021                         goto fail;
1022
1023                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
1024
1025                 lock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1026                 hole_size = block_end - hole_start;
1027
1028                 mutex_lock(&root->fs_info->fs_mutex);
1029                 trans = btrfs_start_transaction(root, 1);
1030                 btrfs_set_trans_block_group(trans, inode);
1031                 err = btrfs_drop_extents(trans, root, inode,
1032                                          hole_start, block_end, hole_start,
1033                                          &alloc_hint);
1034
1035                 if (alloc_hint != EXTENT_MAP_INLINE) {
1036                         err = btrfs_insert_file_extent(trans, root,
1037                                                        inode->i_ino,
1038                                                        hole_start, 0, 0,
1039                                                        hole_size);
1040                         btrfs_drop_extent_cache(inode, hole_start,
1041                                                 hole_size - 1);
1042                         btrfs_check_file(root, inode);
1043                 }
1044                 btrfs_end_transaction(trans, root);
1045                 mutex_unlock(&root->fs_info->fs_mutex);
1046                 unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
1047                 if (err)
1048                         return err;
1049         }
1050 out:
1051         err = inode_setattr(inode, attr);
1052 fail:
1053         return err;
1054 }
1055
1056 void btrfs_put_inode(struct inode *inode)
1057 {
1058         int ret;
1059
1060         if (!BTRFS_I(inode)->ordered_trans) {
1061                 return;
1062         }
1063
1064         if (mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY) ||
1065             mapping_tagged(inode->i_mapping, PAGECACHE_TAG_WRITEBACK))
1066                 return;
1067
1068         ret = btrfs_del_ordered_inode(inode);
1069         if (ret == 1) {
1070                 atomic_dec(&inode->i_count);
1071         }
1072 }
1073
1074 void btrfs_delete_inode(struct inode *inode)
1075 {
1076         struct btrfs_trans_handle *trans;
1077         struct btrfs_root *root = BTRFS_I(inode)->root;
1078         unsigned long nr;
1079         int ret;
1080
1081         truncate_inode_pages(&inode->i_data, 0);
1082         if (is_bad_inode(inode)) {
1083                 goto no_delete;
1084         }
1085
1086         inode->i_size = 0;
1087         mutex_lock(&root->fs_info->fs_mutex);
1088         trans = btrfs_start_transaction(root, 1);
1089
1090         btrfs_set_trans_block_group(trans, inode);
1091         ret = btrfs_truncate_in_trans(trans, root, inode, 0);
1092         if (ret)
1093                 goto no_delete_lock;
1094
1095         nr = trans->blocks_used;
1096         clear_inode(inode);
1097
1098         btrfs_end_transaction(trans, root);
1099         mutex_unlock(&root->fs_info->fs_mutex);
1100         btrfs_btree_balance_dirty(root, nr);
1101         btrfs_throttle(root);
1102         return;
1103
1104 no_delete_lock:
1105         nr = trans->blocks_used;
1106         btrfs_end_transaction(trans, root);
1107         mutex_unlock(&root->fs_info->fs_mutex);
1108         btrfs_btree_balance_dirty(root, nr);
1109         btrfs_throttle(root);
1110 no_delete:
1111         clear_inode(inode);
1112 }
1113
1114 /*
1115  * this returns the key found in the dir entry in the location pointer.
1116  * If no dir entries were found, location->objectid is 0.
1117  */
1118 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1119                                struct btrfs_key *location)
1120 {
1121         const char *name = dentry->d_name.name;
1122         int namelen = dentry->d_name.len;
1123         struct btrfs_dir_item *di;
1124         struct btrfs_path *path;
1125         struct btrfs_root *root = BTRFS_I(dir)->root;
1126         int ret = 0;
1127
1128         if (namelen == 1 && strcmp(name, ".") == 0) {
1129                 location->objectid = dir->i_ino;
1130                 location->type = BTRFS_INODE_ITEM_KEY;
1131                 location->offset = 0;
1132                 return 0;
1133         }
1134         path = btrfs_alloc_path();
1135         BUG_ON(!path);
1136
1137         if (namelen == 2 && strcmp(name, "..") == 0) {
1138                 struct btrfs_key key;
1139                 struct extent_buffer *leaf;
1140                 u32 nritems;
1141                 int slot;
1142
1143                 key.objectid = dir->i_ino;
1144                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1145                 key.offset = 0;
1146                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1147                 BUG_ON(ret == 0);
1148                 ret = 0;
1149
1150                 leaf = path->nodes[0];
1151                 slot = path->slots[0];
1152                 nritems = btrfs_header_nritems(leaf);
1153                 if (slot >= nritems)
1154                         goto out_err;
1155
1156                 btrfs_item_key_to_cpu(leaf, &key, slot);
1157                 if (key.objectid != dir->i_ino ||
1158                     key.type != BTRFS_INODE_REF_KEY) {
1159                         goto out_err;
1160                 }
1161                 location->objectid = key.offset;
1162                 location->type = BTRFS_INODE_ITEM_KEY;
1163                 location->offset = 0;
1164                 goto out;
1165         }
1166
1167         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1168                                     namelen, 0);
1169         if (IS_ERR(di))
1170                 ret = PTR_ERR(di);
1171         if (!di || IS_ERR(di)) {
1172                 goto out_err;
1173         }
1174         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1175 out:
1176         btrfs_free_path(path);
1177         return ret;
1178 out_err:
1179         location->objectid = 0;
1180         goto out;
1181 }
1182
1183 /*
1184  * when we hit a tree root in a directory, the btrfs part of the inode
1185  * needs to be changed to reflect the root directory of the tree root.  This
1186  * is kind of like crossing a mount point.
1187  */
1188 static int fixup_tree_root_location(struct btrfs_root *root,
1189                              struct btrfs_key *location,
1190                              struct btrfs_root **sub_root,
1191                              struct dentry *dentry)
1192 {
1193         struct btrfs_path *path;
1194         struct btrfs_root_item *ri;
1195
1196         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1197                 return 0;
1198         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1199                 return 0;
1200
1201         path = btrfs_alloc_path();
1202         BUG_ON(!path);
1203         mutex_lock(&root->fs_info->fs_mutex);
1204
1205         *sub_root = btrfs_read_fs_root(root->fs_info, location,
1206                                         dentry->d_name.name,
1207                                         dentry->d_name.len);
1208         if (IS_ERR(*sub_root))
1209                 return PTR_ERR(*sub_root);
1210
1211         ri = &(*sub_root)->root_item;
1212         location->objectid = btrfs_root_dirid(ri);
1213         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1214         location->offset = 0;
1215
1216         btrfs_free_path(path);
1217         mutex_unlock(&root->fs_info->fs_mutex);
1218         return 0;
1219 }
1220
1221 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1222 {
1223         struct btrfs_iget_args *args = p;
1224         inode->i_ino = args->ino;
1225         BTRFS_I(inode)->root = args->root;
1226         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1227         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1228                              inode->i_mapping, GFP_NOFS);
1229         return 0;
1230 }
1231
1232 static int btrfs_find_actor(struct inode *inode, void *opaque)
1233 {
1234         struct btrfs_iget_args *args = opaque;
1235         return (args->ino == inode->i_ino &&
1236                 args->root == BTRFS_I(inode)->root);
1237 }
1238
1239 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1240                             u64 root_objectid)
1241 {
1242         struct btrfs_iget_args args;
1243         args.ino = objectid;
1244         args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1245
1246         if (!args.root)
1247                 return NULL;
1248
1249         return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1250 }
1251
1252 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1253                                 struct btrfs_root *root)
1254 {
1255         struct inode *inode;
1256         struct btrfs_iget_args args;
1257         args.ino = objectid;
1258         args.root = root;
1259
1260         inode = iget5_locked(s, objectid, btrfs_find_actor,
1261                              btrfs_init_locked_inode,
1262                              (void *)&args);
1263         return inode;
1264 }
1265
1266 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1267                                    struct nameidata *nd)
1268 {
1269         struct inode * inode;
1270         struct btrfs_inode *bi = BTRFS_I(dir);
1271         struct btrfs_root *root = bi->root;
1272         struct btrfs_root *sub_root = root;
1273         struct btrfs_key location;
1274         int ret;
1275
1276         if (dentry->d_name.len > BTRFS_NAME_LEN)
1277                 return ERR_PTR(-ENAMETOOLONG);
1278
1279         mutex_lock(&root->fs_info->fs_mutex);
1280         ret = btrfs_inode_by_name(dir, dentry, &location);
1281         mutex_unlock(&root->fs_info->fs_mutex);
1282
1283         if (ret < 0)
1284                 return ERR_PTR(ret);
1285
1286         inode = NULL;
1287         if (location.objectid) {
1288                 ret = fixup_tree_root_location(root, &location, &sub_root,
1289                                                 dentry);
1290                 if (ret < 0)
1291                         return ERR_PTR(ret);
1292                 if (ret > 0)
1293                         return ERR_PTR(-ENOENT);
1294                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1295                                           sub_root);
1296                 if (!inode)
1297                         return ERR_PTR(-EACCES);
1298                 if (inode->i_state & I_NEW) {
1299                         /* the inode and parent dir are two different roots */
1300                         if (sub_root != root) {
1301                                 igrab(inode);
1302                                 sub_root->inode = inode;
1303                         }
1304                         BTRFS_I(inode)->root = sub_root;
1305                         memcpy(&BTRFS_I(inode)->location, &location,
1306                                sizeof(location));
1307                         btrfs_read_locked_inode(inode);
1308                         unlock_new_inode(inode);
1309                 }
1310         }
1311         return d_splice_alias(inode, dentry);
1312 }
1313
1314 static unsigned char btrfs_filetype_table[] = {
1315         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1316 };
1317
1318 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1319 {
1320         struct inode *inode = filp->f_dentry->d_inode;
1321         struct btrfs_root *root = BTRFS_I(inode)->root;
1322         struct btrfs_item *item;
1323         struct btrfs_dir_item *di;
1324         struct btrfs_key key;
1325         struct btrfs_key found_key;
1326         struct btrfs_path *path;
1327         int ret;
1328         u32 nritems;
1329         struct extent_buffer *leaf;
1330         int slot;
1331         int advance;
1332         unsigned char d_type;
1333         int over = 0;
1334         u32 di_cur;
1335         u32 di_total;
1336         u32 di_len;
1337         int key_type = BTRFS_DIR_INDEX_KEY;
1338         char tmp_name[32];
1339         char *name_ptr;
1340         int name_len;
1341
1342         /* FIXME, use a real flag for deciding about the key type */
1343         if (root->fs_info->tree_root == root)
1344                 key_type = BTRFS_DIR_ITEM_KEY;
1345
1346         /* special case for "." */
1347         if (filp->f_pos == 0) {
1348                 over = filldir(dirent, ".", 1,
1349                                1, inode->i_ino,
1350                                DT_DIR);
1351                 if (over)
1352                         return 0;
1353                 filp->f_pos = 1;
1354         }
1355
1356         mutex_lock(&root->fs_info->fs_mutex);
1357         key.objectid = inode->i_ino;
1358         path = btrfs_alloc_path();
1359         path->reada = 2;
1360
1361         /* special case for .., just use the back ref */
1362         if (filp->f_pos == 1) {
1363                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1364                 key.offset = 0;
1365                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1366                 BUG_ON(ret == 0);
1367                 leaf = path->nodes[0];
1368                 slot = path->slots[0];
1369                 nritems = btrfs_header_nritems(leaf);
1370                 if (slot >= nritems) {
1371                         btrfs_release_path(root, path);
1372                         goto read_dir_items;
1373                 }
1374                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1375                 btrfs_release_path(root, path);
1376                 if (found_key.objectid != key.objectid ||
1377                     found_key.type != BTRFS_INODE_REF_KEY)
1378                         goto read_dir_items;
1379                 over = filldir(dirent, "..", 2,
1380                                2, found_key.offset, DT_DIR);
1381                 if (over)
1382                         goto nopos;
1383                 filp->f_pos = 2;
1384         }
1385
1386 read_dir_items:
1387         btrfs_set_key_type(&key, key_type);
1388         key.offset = filp->f_pos;
1389
1390         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1391         if (ret < 0)
1392                 goto err;
1393         advance = 0;
1394         while(1) {
1395                 leaf = path->nodes[0];
1396                 nritems = btrfs_header_nritems(leaf);
1397                 slot = path->slots[0];
1398                 if (advance || slot >= nritems) {
1399                         if (slot >= nritems -1) {
1400                                 ret = btrfs_next_leaf(root, path);
1401                                 if (ret)
1402                                         break;
1403                                 leaf = path->nodes[0];
1404                                 nritems = btrfs_header_nritems(leaf);
1405                                 slot = path->slots[0];
1406                         } else {
1407                                 slot++;
1408                                 path->slots[0]++;
1409                         }
1410                 }
1411                 advance = 1;
1412                 item = btrfs_item_nr(leaf, slot);
1413                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1414
1415                 if (found_key.objectid != key.objectid)
1416                         break;
1417                 if (btrfs_key_type(&found_key) != key_type)
1418                         break;
1419                 if (found_key.offset < filp->f_pos)
1420                         continue;
1421
1422                 filp->f_pos = found_key.offset;
1423                 advance = 1;
1424                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1425                 di_cur = 0;
1426                 di_total = btrfs_item_size(leaf, item);
1427                 while(di_cur < di_total) {
1428                         struct btrfs_key location;
1429
1430                         name_len = btrfs_dir_name_len(leaf, di);
1431                         if (name_len < 32) {
1432                                 name_ptr = tmp_name;
1433                         } else {
1434                                 name_ptr = kmalloc(name_len, GFP_NOFS);
1435                                 BUG_ON(!name_ptr);
1436                         }
1437                         read_extent_buffer(leaf, name_ptr,
1438                                            (unsigned long)(di + 1), name_len);
1439
1440                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1441                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
1442                         over = filldir(dirent, name_ptr, name_len,
1443                                        found_key.offset,
1444                                        location.objectid,
1445                                        d_type);
1446
1447                         if (name_ptr != tmp_name)
1448                                 kfree(name_ptr);
1449
1450                         if (over)
1451                                 goto nopos;
1452                         di_len = btrfs_dir_name_len(leaf, di) +
1453                                 btrfs_dir_data_len(leaf, di) +sizeof(*di);
1454                         di_cur += di_len;
1455                         di = (struct btrfs_dir_item *)((char *)di + di_len);
1456                 }
1457         }
1458         filp->f_pos = INT_LIMIT(typeof(filp->f_pos));
1459 nopos:
1460         ret = 0;
1461 err:
1462         btrfs_release_path(root, path);
1463         btrfs_free_path(path);
1464         mutex_unlock(&root->fs_info->fs_mutex);
1465         return ret;
1466 }
1467
1468 int btrfs_write_inode(struct inode *inode, int wait)
1469 {
1470         struct btrfs_root *root = BTRFS_I(inode)->root;
1471         struct btrfs_trans_handle *trans;
1472         int ret = 0;
1473
1474         if (wait) {
1475                 mutex_lock(&root->fs_info->fs_mutex);
1476                 trans = btrfs_start_transaction(root, 1);
1477                 btrfs_set_trans_block_group(trans, inode);
1478                 ret = btrfs_commit_transaction(trans, root);
1479                 mutex_unlock(&root->fs_info->fs_mutex);
1480         }
1481         return ret;
1482 }
1483
1484 /*
1485  * This is somewhat expensive, updating the tree every time the
1486  * inode changes.  But, it is most likely to find the inode in cache.
1487  * FIXME, needs more benchmarking...there are no reasons other than performance
1488  * to keep or drop this code.
1489  */
1490 void btrfs_dirty_inode(struct inode *inode)
1491 {
1492         struct btrfs_root *root = BTRFS_I(inode)->root;
1493         struct btrfs_trans_handle *trans;
1494
1495         mutex_lock(&root->fs_info->fs_mutex);
1496         trans = btrfs_start_transaction(root, 1);
1497         btrfs_set_trans_block_group(trans, inode);
1498         btrfs_update_inode(trans, root, inode);
1499         btrfs_end_transaction(trans, root);
1500         mutex_unlock(&root->fs_info->fs_mutex);
1501 }
1502
1503 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1504                                      struct btrfs_root *root,
1505                                      const char *name, int name_len,
1506                                      u64 ref_objectid,
1507                                      u64 objectid,
1508                                      struct btrfs_block_group_cache *group,
1509                                      int mode)
1510 {
1511         struct inode *inode;
1512         struct btrfs_inode_item *inode_item;
1513         struct btrfs_key *location;
1514         struct btrfs_path *path;
1515         struct btrfs_inode_ref *ref;
1516         struct btrfs_key key[2];
1517         u32 sizes[2];
1518         unsigned long ptr;
1519         int ret;
1520         int owner;
1521
1522         path = btrfs_alloc_path();
1523         BUG_ON(!path);
1524
1525         inode = new_inode(root->fs_info->sb);
1526         if (!inode)
1527                 return ERR_PTR(-ENOMEM);
1528
1529         extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1530         extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1531                              inode->i_mapping, GFP_NOFS);
1532         BTRFS_I(inode)->root = root;
1533
1534         if (mode & S_IFDIR)
1535                 owner = 0;
1536         else
1537                 owner = 1;
1538         group = btrfs_find_block_group(root, group, 0, 0, owner);
1539         BTRFS_I(inode)->block_group = group;
1540         BTRFS_I(inode)->flags = 0;
1541
1542         key[0].objectid = objectid;
1543         btrfs_set_key_type(&key[0], BTRFS_INODE_ITEM_KEY);
1544         key[0].offset = 0;
1545
1546         key[1].objectid = objectid;
1547         btrfs_set_key_type(&key[1], BTRFS_INODE_REF_KEY);
1548         key[1].offset = ref_objectid;
1549
1550         sizes[0] = sizeof(struct btrfs_inode_item);
1551         sizes[1] = name_len + sizeof(*ref);
1552
1553         ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2);
1554         if (ret != 0)
1555                 goto fail;
1556
1557         if (objectid > root->highest_inode)
1558                 root->highest_inode = objectid;
1559
1560         inode->i_uid = current->fsuid;
1561         inode->i_gid = current->fsgid;
1562         inode->i_mode = mode;
1563         inode->i_ino = objectid;
1564         inode->i_blocks = 0;
1565         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1566         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1567                                   struct btrfs_inode_item);
1568         fill_inode_item(path->nodes[0], inode_item, inode);
1569
1570         ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1,
1571                              struct btrfs_inode_ref);
1572         btrfs_set_inode_ref_name_len(path->nodes[0], ref, name_len);
1573         ptr = (unsigned long)(ref + 1);
1574         write_extent_buffer(path->nodes[0], name, ptr, name_len);
1575
1576         btrfs_mark_buffer_dirty(path->nodes[0]);
1577         btrfs_free_path(path);
1578
1579         location = &BTRFS_I(inode)->location;
1580         location->objectid = objectid;
1581         location->offset = 0;
1582         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1583
1584         insert_inode_hash(inode);
1585         return inode;
1586 fail:
1587         btrfs_free_path(path);
1588         return ERR_PTR(ret);
1589 }
1590
1591 static inline u8 btrfs_inode_type(struct inode *inode)
1592 {
1593         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1594 }
1595
1596 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1597                             struct dentry *dentry, struct inode *inode,
1598                             int add_backref)
1599 {
1600         int ret;
1601         struct btrfs_key key;
1602         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1603         struct inode *parent_inode;
1604
1605         key.objectid = inode->i_ino;
1606         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1607         key.offset = 0;
1608
1609         ret = btrfs_insert_dir_item(trans, root,
1610                                     dentry->d_name.name, dentry->d_name.len,
1611                                     dentry->d_parent->d_inode->i_ino,
1612                                     &key, btrfs_inode_type(inode));
1613         if (ret == 0) {
1614                 if (add_backref) {
1615                         ret = btrfs_insert_inode_ref(trans, root,
1616                                              dentry->d_name.name,
1617                                              dentry->d_name.len,
1618                                              inode->i_ino,
1619                                              dentry->d_parent->d_inode->i_ino);
1620                 }
1621                 parent_inode = dentry->d_parent->d_inode;
1622                 parent_inode->i_size += dentry->d_name.len * 2;
1623                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1624                 ret = btrfs_update_inode(trans, root,
1625                                          dentry->d_parent->d_inode);
1626         }
1627         return ret;
1628 }
1629
1630 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1631                             struct dentry *dentry, struct inode *inode,
1632                             int backref)
1633 {
1634         int err = btrfs_add_link(trans, dentry, inode, backref);
1635         if (!err) {
1636                 d_instantiate(dentry, inode);
1637                 return 0;
1638         }
1639         if (err > 0)
1640                 err = -EEXIST;
1641         return err;
1642 }
1643
1644 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1645                         int mode, dev_t rdev)
1646 {
1647         struct btrfs_trans_handle *trans;
1648         struct btrfs_root *root = BTRFS_I(dir)->root;
1649         struct inode *inode = NULL;
1650         int err;
1651         int drop_inode = 0;
1652         u64 objectid;
1653         unsigned long nr = 0;
1654
1655         if (!new_valid_dev(rdev))
1656                 return -EINVAL;
1657
1658         mutex_lock(&root->fs_info->fs_mutex);
1659         err = btrfs_check_free_space(root, 1, 0);
1660         if (err)
1661                 goto fail;
1662
1663         trans = btrfs_start_transaction(root, 1);
1664         btrfs_set_trans_block_group(trans, dir);
1665
1666         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1667         if (err) {
1668                 err = -ENOSPC;
1669                 goto out_unlock;
1670         }
1671
1672         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1673                                 dentry->d_name.len,
1674                                 dentry->d_parent->d_inode->i_ino, objectid,
1675                                 BTRFS_I(dir)->block_group, mode);
1676         err = PTR_ERR(inode);
1677         if (IS_ERR(inode))
1678                 goto out_unlock;
1679
1680         btrfs_set_trans_block_group(trans, inode);
1681         err = btrfs_add_nondir(trans, dentry, inode, 0);
1682         if (err)
1683                 drop_inode = 1;
1684         else {
1685                 inode->i_op = &btrfs_special_inode_operations;
1686                 init_special_inode(inode, inode->i_mode, rdev);
1687                 btrfs_update_inode(trans, root, inode);
1688         }
1689         dir->i_sb->s_dirt = 1;
1690         btrfs_update_inode_block_group(trans, inode);
1691         btrfs_update_inode_block_group(trans, dir);
1692 out_unlock:
1693         nr = trans->blocks_used;
1694         btrfs_end_transaction(trans, root);
1695 fail:
1696         mutex_unlock(&root->fs_info->fs_mutex);
1697
1698         if (drop_inode) {
1699                 inode_dec_link_count(inode);
1700                 iput(inode);
1701         }
1702         btrfs_btree_balance_dirty(root, nr);
1703         btrfs_throttle(root);
1704         return err;
1705 }
1706
1707 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1708                         int mode, struct nameidata *nd)
1709 {
1710         struct btrfs_trans_handle *trans;
1711         struct btrfs_root *root = BTRFS_I(dir)->root;
1712         struct inode *inode = NULL;
1713         int err;
1714         int drop_inode = 0;
1715         unsigned long nr = 0;
1716         u64 objectid;
1717
1718         mutex_lock(&root->fs_info->fs_mutex);
1719         err = btrfs_check_free_space(root, 1, 0);
1720         if (err)
1721                 goto fail;
1722         trans = btrfs_start_transaction(root, 1);
1723         btrfs_set_trans_block_group(trans, dir);
1724
1725         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1726         if (err) {
1727                 err = -ENOSPC;
1728                 goto out_unlock;
1729         }
1730
1731         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1732                                 dentry->d_name.len,
1733                                 dentry->d_parent->d_inode->i_ino,
1734                                 objectid, BTRFS_I(dir)->block_group, mode);
1735         err = PTR_ERR(inode);
1736         if (IS_ERR(inode))
1737                 goto out_unlock;
1738
1739         btrfs_set_trans_block_group(trans, inode);
1740         err = btrfs_add_nondir(trans, dentry, inode, 0);
1741         if (err)
1742                 drop_inode = 1;
1743         else {
1744                 inode->i_mapping->a_ops = &btrfs_aops;
1745                 inode->i_fop = &btrfs_file_operations;
1746                 inode->i_op = &btrfs_file_inode_operations;
1747                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
1748                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
1749                                      inode->i_mapping, GFP_NOFS);
1750                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
1751         }
1752         dir->i_sb->s_dirt = 1;
1753         btrfs_update_inode_block_group(trans, inode);
1754         btrfs_update_inode_block_group(trans, dir);
1755 out_unlock:
1756         nr = trans->blocks_used;
1757         btrfs_end_transaction(trans, root);
1758 fail:
1759         mutex_unlock(&root->fs_info->fs_mutex);
1760
1761         if (drop_inode) {
1762                 inode_dec_link_count(inode);
1763                 iput(inode);
1764         }
1765         btrfs_btree_balance_dirty(root, nr);
1766         btrfs_throttle(root);
1767         return err;
1768 }
1769
1770 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1771                       struct dentry *dentry)
1772 {
1773         struct btrfs_trans_handle *trans;
1774         struct btrfs_root *root = BTRFS_I(dir)->root;
1775         struct inode *inode = old_dentry->d_inode;
1776         unsigned long nr = 0;
1777         int err;
1778         int drop_inode = 0;
1779
1780         if (inode->i_nlink == 0)
1781                 return -ENOENT;
1782
1783 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1784         inode->i_nlink++;
1785 #else
1786         inc_nlink(inode);
1787 #endif
1788         mutex_lock(&root->fs_info->fs_mutex);
1789         err = btrfs_check_free_space(root, 1, 0);
1790         if (err)
1791                 goto fail;
1792         trans = btrfs_start_transaction(root, 1);
1793
1794         btrfs_set_trans_block_group(trans, dir);
1795         atomic_inc(&inode->i_count);
1796         err = btrfs_add_nondir(trans, dentry, inode, 1);
1797
1798         if (err)
1799                 drop_inode = 1;
1800
1801         dir->i_sb->s_dirt = 1;
1802         btrfs_update_inode_block_group(trans, dir);
1803         err = btrfs_update_inode(trans, root, inode);
1804
1805         if (err)
1806                 drop_inode = 1;
1807
1808         nr = trans->blocks_used;
1809         btrfs_end_transaction(trans, root);
1810 fail:
1811         mutex_unlock(&root->fs_info->fs_mutex);
1812
1813         if (drop_inode) {
1814                 inode_dec_link_count(inode);
1815                 iput(inode);
1816         }
1817         btrfs_btree_balance_dirty(root, nr);
1818         btrfs_throttle(root);
1819         return err;
1820 }
1821
1822 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1823 {
1824         struct inode *inode;
1825         struct btrfs_trans_handle *trans;
1826         struct btrfs_root *root = BTRFS_I(dir)->root;
1827         int err = 0;
1828         int drop_on_err = 0;
1829         u64 objectid;
1830         unsigned long nr = 1;
1831
1832         mutex_lock(&root->fs_info->fs_mutex);
1833         err = btrfs_check_free_space(root, 1, 0);
1834         if (err)
1835                 goto out_unlock;
1836
1837         trans = btrfs_start_transaction(root, 1);
1838         btrfs_set_trans_block_group(trans, dir);
1839
1840         if (IS_ERR(trans)) {
1841                 err = PTR_ERR(trans);
1842                 goto out_unlock;
1843         }
1844
1845         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1846         if (err) {
1847                 err = -ENOSPC;
1848                 goto out_unlock;
1849         }
1850
1851         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
1852                                 dentry->d_name.len,
1853                                 dentry->d_parent->d_inode->i_ino, objectid,
1854                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1855         if (IS_ERR(inode)) {
1856                 err = PTR_ERR(inode);
1857                 goto out_fail;
1858         }
1859
1860         drop_on_err = 1;
1861         inode->i_op = &btrfs_dir_inode_operations;
1862         inode->i_fop = &btrfs_dir_file_operations;
1863         btrfs_set_trans_block_group(trans, inode);
1864
1865         inode->i_size = 0;
1866         err = btrfs_update_inode(trans, root, inode);
1867         if (err)
1868                 goto out_fail;
1869
1870         err = btrfs_add_link(trans, dentry, inode, 0);
1871         if (err)
1872                 goto out_fail;
1873
1874         d_instantiate(dentry, inode);
1875         drop_on_err = 0;
1876         dir->i_sb->s_dirt = 1;
1877         btrfs_update_inode_block_group(trans, inode);
1878         btrfs_update_inode_block_group(trans, dir);
1879
1880 out_fail:
1881         nr = trans->blocks_used;
1882         btrfs_end_transaction(trans, root);
1883
1884 out_unlock:
1885         mutex_unlock(&root->fs_info->fs_mutex);
1886         if (drop_on_err)
1887                 iput(inode);
1888         btrfs_btree_balance_dirty(root, nr);
1889         btrfs_throttle(root);
1890         return err;
1891 }
1892
1893 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
1894                                     size_t pg_offset, u64 start, u64 len,
1895                                     int create)
1896 {
1897         int ret;
1898         int err = 0;
1899         u64 bytenr;
1900         u64 extent_start = 0;
1901         u64 extent_end = 0;
1902         u64 objectid = inode->i_ino;
1903         u32 found_type;
1904         struct btrfs_path *path;
1905         struct btrfs_root *root = BTRFS_I(inode)->root;
1906         struct btrfs_file_extent_item *item;
1907         struct extent_buffer *leaf;
1908         struct btrfs_key found_key;
1909         struct extent_map *em = NULL;
1910         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1911         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
1912         struct btrfs_trans_handle *trans = NULL;
1913
1914         path = btrfs_alloc_path();
1915         BUG_ON(!path);
1916         mutex_lock(&root->fs_info->fs_mutex);
1917
1918 again:
1919         spin_lock(&em_tree->lock);
1920         em = lookup_extent_mapping(em_tree, start, len);
1921         spin_unlock(&em_tree->lock);
1922
1923         if (em) {
1924                 if (em->start > start) {
1925                         printk("get_extent lookup [%Lu %Lu] em [%Lu %Lu]\n",
1926                                start, len, em->start, em->len);
1927                         WARN_ON(1);
1928                 }
1929                 if (em->block_start == EXTENT_MAP_INLINE && page)
1930                         free_extent_map(em);
1931                 else
1932                         goto out;
1933         }
1934         em = alloc_extent_map(GFP_NOFS);
1935         if (!em) {
1936                 err = -ENOMEM;
1937                 goto out;
1938         }
1939
1940         em->start = EXTENT_MAP_HOLE;
1941         em->len = (u64)-1;
1942         em->bdev = inode->i_sb->s_bdev;
1943         ret = btrfs_lookup_file_extent(trans, root, path,
1944                                        objectid, start, trans != NULL);
1945         if (ret < 0) {
1946                 err = ret;
1947                 goto out;
1948         }
1949
1950         if (ret != 0) {
1951                 if (path->slots[0] == 0)
1952                         goto not_found;
1953                 path->slots[0]--;
1954         }
1955
1956         leaf = path->nodes[0];
1957         item = btrfs_item_ptr(leaf, path->slots[0],
1958                               struct btrfs_file_extent_item);
1959         /* are we inside the extent that was found? */
1960         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1961         found_type = btrfs_key_type(&found_key);
1962         if (found_key.objectid != objectid ||
1963             found_type != BTRFS_EXTENT_DATA_KEY) {
1964                 goto not_found;
1965         }
1966
1967         found_type = btrfs_file_extent_type(leaf, item);
1968         extent_start = found_key.offset;
1969         if (found_type == BTRFS_FILE_EXTENT_REG) {
1970                 extent_end = extent_start +
1971                        btrfs_file_extent_num_bytes(leaf, item);
1972                 err = 0;
1973                 if (start < extent_start || start >= extent_end) {
1974                         em->start = start;
1975                         if (start < extent_start) {
1976                                 if (start + len <= extent_start)
1977                                         goto not_found;
1978                                 em->len = extent_end - extent_start;
1979                         } else {
1980                                 em->len = len;
1981                         }
1982                         goto not_found_em;
1983                 }
1984                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
1985                 if (bytenr == 0) {
1986                         em->start = extent_start;
1987                         em->len = extent_end - extent_start;
1988                         em->block_start = EXTENT_MAP_HOLE;
1989                         goto insert;
1990                 }
1991                 bytenr += btrfs_file_extent_offset(leaf, item);
1992                 em->block_start = bytenr;
1993                 em->start = extent_start;
1994                 em->len = extent_end - extent_start;
1995                 goto insert;
1996         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1997                 u64 page_start;
1998                 unsigned long ptr;
1999                 char *map;
2000                 size_t size;
2001                 size_t extent_offset;
2002                 size_t copy_size;
2003
2004                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
2005                                                     path->slots[0]));
2006                 extent_end = (extent_start + size + root->sectorsize - 1) &
2007                         ~((u64)root->sectorsize - 1);
2008                 if (start < extent_start || start >= extent_end) {
2009                         em->start = start;
2010                         if (start < extent_start) {
2011                                 if (start + len <= extent_start)
2012                                         goto not_found;
2013                                 em->len = extent_end - extent_start;
2014                         } else {
2015                                 em->len = len;
2016                         }
2017                         goto not_found_em;
2018                 }
2019                 em->block_start = EXTENT_MAP_INLINE;
2020
2021                 if (!page) {
2022                         em->start = extent_start;
2023                         em->len = size;
2024                         goto out;
2025                 }
2026
2027                 page_start = page_offset(page) + pg_offset;
2028                 extent_offset = page_start - extent_start;
2029                 copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
2030                                 size - extent_offset);
2031                 em->start = extent_start + extent_offset;
2032                 em->len = (copy_size + root->sectorsize - 1) &
2033                         ~((u64)root->sectorsize - 1);
2034                 map = kmap(page);
2035                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
2036                 if (create == 0 && !PageUptodate(page)) {
2037                         read_extent_buffer(leaf, map + pg_offset, ptr,
2038                                            copy_size);
2039                         flush_dcache_page(page);
2040                 } else if (create && PageUptodate(page)) {
2041                         if (!trans) {
2042                                 kunmap(page);
2043                                 free_extent_map(em);
2044                                 em = NULL;
2045                                 btrfs_release_path(root, path);
2046                                 trans = btrfs_start_transaction(root, 1);
2047                                 goto again;
2048                         }
2049                         write_extent_buffer(leaf, map + pg_offset, ptr,
2050                                             copy_size);
2051                         btrfs_mark_buffer_dirty(leaf);
2052                 }
2053                 kunmap(page);
2054                 set_extent_uptodate(io_tree, em->start,
2055                                     extent_map_end(em) - 1, GFP_NOFS);
2056                 goto insert;
2057         } else {
2058                 printk("unkknown found_type %d\n", found_type);
2059                 WARN_ON(1);
2060         }
2061 not_found:
2062         em->start = start;
2063         em->len = len;
2064 not_found_em:
2065         em->block_start = EXTENT_MAP_HOLE;
2066 insert:
2067         btrfs_release_path(root, path);
2068         if (em->start > start || extent_map_end(em) <= start) {
2069                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->len, start, len);
2070                 err = -EIO;
2071                 goto out;
2072         }
2073
2074         err = 0;
2075         spin_lock(&em_tree->lock);
2076         ret = add_extent_mapping(em_tree, em);
2077         if (ret == -EEXIST) {
2078                 free_extent_map(em);
2079                 em = lookup_extent_mapping(em_tree, start, len);
2080                 if (!em) {
2081                         err = -EIO;
2082                         printk("failing to insert %Lu %Lu\n", start, len);
2083                 }
2084         }
2085         spin_unlock(&em_tree->lock);
2086 out:
2087         btrfs_free_path(path);
2088         if (trans) {
2089                 ret = btrfs_end_transaction(trans, root);
2090                 if (!err)
2091                         err = ret;
2092         }
2093         mutex_unlock(&root->fs_info->fs_mutex);
2094         if (err) {
2095                 free_extent_map(em);
2096                 WARN_ON(1);
2097                 return ERR_PTR(err);
2098         }
2099         return em;
2100 }
2101
2102 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
2103 {
2104         return extent_bmap(mapping, iblock, btrfs_get_extent);
2105 }
2106
2107 int btrfs_readpage(struct file *file, struct page *page)
2108 {
2109         struct extent_io_tree *tree;
2110         tree = &BTRFS_I(page->mapping->host)->io_tree;
2111         return extent_read_full_page(tree, page, btrfs_get_extent);
2112 }
2113
2114 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
2115 {
2116         struct extent_io_tree *tree;
2117
2118
2119         if (current->flags & PF_MEMALLOC) {
2120                 redirty_page_for_writepage(wbc, page);
2121                 unlock_page(page);
2122                 return 0;
2123         }
2124         tree = &BTRFS_I(page->mapping->host)->io_tree;
2125         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2126 }
2127
2128 static int btrfs_writepages(struct address_space *mapping,
2129                             struct writeback_control *wbc)
2130 {
2131         struct extent_io_tree *tree;
2132         tree = &BTRFS_I(mapping->host)->io_tree;
2133         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2134 }
2135
2136 static int
2137 btrfs_readpages(struct file *file, struct address_space *mapping,
2138                 struct list_head *pages, unsigned nr_pages)
2139 {
2140         struct extent_io_tree *tree;
2141         tree = &BTRFS_I(mapping->host)->io_tree;
2142         return extent_readpages(tree, mapping, pages, nr_pages,
2143                                 btrfs_get_extent);
2144 }
2145
2146 static int btrfs_releasepage(struct page *page, gfp_t gfp_flags)
2147 {
2148         struct extent_io_tree *tree;
2149         struct extent_map_tree *map;
2150         int ret;
2151
2152         tree = &BTRFS_I(page->mapping->host)->io_tree;
2153         map = &BTRFS_I(page->mapping->host)->extent_tree;
2154         ret = try_release_extent_mapping(map, tree, page, gfp_flags);
2155         if (ret == 1) {
2156                 ClearPagePrivate(page);
2157                 set_page_private(page, 0);
2158                 page_cache_release(page);
2159         }
2160         return ret;
2161 }
2162
2163 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2164 {
2165         struct extent_io_tree *tree;
2166
2167         tree = &BTRFS_I(page->mapping->host)->io_tree;
2168         extent_invalidatepage(tree, page, offset);
2169         btrfs_releasepage(page, GFP_NOFS);
2170 }
2171
2172 /*
2173  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2174  * called from a page fault handler when a page is first dirtied. Hence we must
2175  * be careful to check for EOF conditions here. We set the page up correctly
2176  * for a written page which means we get ENOSPC checking when writing into
2177  * holes and correct delalloc and unwritten extent mapping on filesystems that
2178  * support these features.
2179  *
2180  * We are not allowed to take the i_mutex here so we have to play games to
2181  * protect against truncate races as the page could now be beyond EOF.  Because
2182  * vmtruncate() writes the inode size before removing pages, once we have the
2183  * page lock we can determine safely if the page is beyond EOF. If it is not
2184  * beyond EOF, then the page is guaranteed safe against truncation until we
2185  * unlock the page.
2186  */
2187 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2188 {
2189         struct inode *inode = fdentry(vma->vm_file)->d_inode;
2190         struct btrfs_root *root = BTRFS_I(inode)->root;
2191         unsigned long end;
2192         loff_t size;
2193         int ret;
2194         u64 page_start;
2195
2196         mutex_lock(&root->fs_info->fs_mutex);
2197         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2198         mutex_unlock(&root->fs_info->fs_mutex);
2199         if (ret)
2200                 goto out;
2201
2202         ret = -EINVAL;
2203
2204         lock_page(page);
2205         wait_on_page_writeback(page);
2206         size = i_size_read(inode);
2207         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2208
2209         if ((page->mapping != inode->i_mapping) ||
2210             (page_start > size)) {
2211                 /* page got truncated out from underneath us */
2212                 goto out_unlock;
2213         }
2214
2215         /* page is wholly or partially inside EOF */
2216         if (page_start + PAGE_CACHE_SIZE > size)
2217                 end = size & ~PAGE_CACHE_MASK;
2218         else
2219                 end = PAGE_CACHE_SIZE;
2220
2221         ret = btrfs_cow_one_page(inode, page, end);
2222
2223 out_unlock:
2224         unlock_page(page);
2225 out:
2226         return ret;
2227 }
2228
2229 static void btrfs_truncate(struct inode *inode)
2230 {
2231         struct btrfs_root *root = BTRFS_I(inode)->root;
2232         int ret;
2233         struct btrfs_trans_handle *trans;
2234         unsigned long nr;
2235
2236         if (!S_ISREG(inode->i_mode))
2237                 return;
2238         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2239                 return;
2240
2241         btrfs_truncate_page(inode->i_mapping, inode->i_size);
2242
2243         mutex_lock(&root->fs_info->fs_mutex);
2244         trans = btrfs_start_transaction(root, 1);
2245         btrfs_set_trans_block_group(trans, inode);
2246
2247         /* FIXME, add redo link to tree so we don't leak on crash */
2248         ret = btrfs_truncate_in_trans(trans, root, inode,
2249                                       BTRFS_EXTENT_DATA_KEY);
2250         btrfs_update_inode(trans, root, inode);
2251         nr = trans->blocks_used;
2252
2253         ret = btrfs_end_transaction(trans, root);
2254         BUG_ON(ret);
2255         mutex_unlock(&root->fs_info->fs_mutex);
2256         btrfs_btree_balance_dirty(root, nr);
2257         btrfs_throttle(root);
2258 }
2259
2260 static int noinline create_subvol(struct btrfs_root *root, char *name,
2261                                   int namelen)
2262 {
2263         struct btrfs_trans_handle *trans;
2264         struct btrfs_key key;
2265         struct btrfs_root_item root_item;
2266         struct btrfs_inode_item *inode_item;
2267         struct extent_buffer *leaf;
2268         struct btrfs_root *new_root = root;
2269         struct inode *inode;
2270         struct inode *dir;
2271         int ret;
2272         int err;
2273         u64 objectid;
2274         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2275         unsigned long nr = 1;
2276
2277         mutex_lock(&root->fs_info->fs_mutex);
2278         ret = btrfs_check_free_space(root, 1, 0);
2279         if (ret)
2280                 goto fail_commit;
2281
2282         trans = btrfs_start_transaction(root, 1);
2283         BUG_ON(!trans);
2284
2285         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2286                                        0, &objectid);
2287         if (ret)
2288                 goto fail;
2289
2290         leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2291                                         objectid, trans->transid, 0, 0,
2292                                         0, 0);
2293         if (IS_ERR(leaf))
2294                 return PTR_ERR(leaf);
2295
2296         btrfs_set_header_nritems(leaf, 0);
2297         btrfs_set_header_level(leaf, 0);
2298         btrfs_set_header_bytenr(leaf, leaf->start);
2299         btrfs_set_header_generation(leaf, trans->transid);
2300         btrfs_set_header_owner(leaf, objectid);
2301
2302         write_extent_buffer(leaf, root->fs_info->fsid,
2303                             (unsigned long)btrfs_header_fsid(leaf),
2304                             BTRFS_FSID_SIZE);
2305         btrfs_mark_buffer_dirty(leaf);
2306
2307         inode_item = &root_item.inode;
2308         memset(inode_item, 0, sizeof(*inode_item));
2309         inode_item->generation = cpu_to_le64(1);
2310         inode_item->size = cpu_to_le64(3);
2311         inode_item->nlink = cpu_to_le32(1);
2312         inode_item->nblocks = cpu_to_le64(1);
2313         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2314
2315         btrfs_set_root_bytenr(&root_item, leaf->start);
2316         btrfs_set_root_level(&root_item, 0);
2317         btrfs_set_root_refs(&root_item, 1);
2318         btrfs_set_root_used(&root_item, 0);
2319
2320         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2321         root_item.drop_level = 0;
2322
2323         free_extent_buffer(leaf);
2324         leaf = NULL;
2325
2326         btrfs_set_root_dirid(&root_item, new_dirid);
2327
2328         key.objectid = objectid;
2329         key.offset = 1;
2330         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2331         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2332                                 &root_item);
2333         if (ret)
2334                 goto fail;
2335
2336         /*
2337          * insert the directory item
2338          */
2339         key.offset = (u64)-1;
2340         dir = root->fs_info->sb->s_root->d_inode;
2341         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2342                                     name, namelen, dir->i_ino, &key,
2343                                     BTRFS_FT_DIR);
2344         if (ret)
2345                 goto fail;
2346
2347         ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2348                              name, namelen, objectid,
2349                              root->fs_info->sb->s_root->d_inode->i_ino);
2350         if (ret)
2351                 goto fail;
2352
2353         ret = btrfs_commit_transaction(trans, root);
2354         if (ret)
2355                 goto fail_commit;
2356
2357         new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2358         BUG_ON(!new_root);
2359
2360         trans = btrfs_start_transaction(new_root, 1);
2361         BUG_ON(!trans);
2362
2363         inode = btrfs_new_inode(trans, new_root, "..", 2, new_dirid,
2364                                 new_dirid,
2365                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2366         if (IS_ERR(inode))
2367                 goto fail;
2368         inode->i_op = &btrfs_dir_inode_operations;
2369         inode->i_fop = &btrfs_dir_file_operations;
2370         new_root->inode = inode;
2371
2372         ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2373                                      new_dirid);
2374         inode->i_nlink = 1;
2375         inode->i_size = 0;
2376         ret = btrfs_update_inode(trans, new_root, inode);
2377         if (ret)
2378                 goto fail;
2379 fail:
2380         nr = trans->blocks_used;
2381         err = btrfs_commit_transaction(trans, new_root);
2382         if (err && !ret)
2383                 ret = err;
2384 fail_commit:
2385         mutex_unlock(&root->fs_info->fs_mutex);
2386         btrfs_btree_balance_dirty(root, nr);
2387         btrfs_throttle(root);
2388         return ret;
2389 }
2390
2391 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2392 {
2393         struct btrfs_pending_snapshot *pending_snapshot;
2394         struct btrfs_trans_handle *trans;
2395         int ret;
2396         int err;
2397         unsigned long nr = 0;
2398
2399         if (!root->ref_cows)
2400                 return -EINVAL;
2401
2402         mutex_lock(&root->fs_info->fs_mutex);
2403         ret = btrfs_check_free_space(root, 1, 0);
2404         if (ret)
2405                 goto fail_unlock;
2406
2407         pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2408         if (!pending_snapshot) {
2409                 ret = -ENOMEM;
2410                 goto fail_unlock;
2411         }
2412         pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
2413         if (!pending_snapshot->name) {
2414                 ret = -ENOMEM;
2415                 kfree(pending_snapshot);
2416                 goto fail_unlock;
2417         }
2418         memcpy(pending_snapshot->name, name, namelen);
2419         pending_snapshot->name[namelen] = '\0';
2420         trans = btrfs_start_transaction(root, 1);
2421         BUG_ON(!trans);
2422         pending_snapshot->root = root;
2423         list_add(&pending_snapshot->list,
2424                  &trans->transaction->pending_snapshots);
2425         ret = btrfs_update_inode(trans, root, root->inode);
2426         err = btrfs_commit_transaction(trans, root);
2427
2428 fail_unlock:
2429         mutex_unlock(&root->fs_info->fs_mutex);
2430         btrfs_btree_balance_dirty(root, nr);
2431         btrfs_throttle(root);
2432         return ret;
2433 }
2434
2435 unsigned long btrfs_force_ra(struct address_space *mapping,
2436                               struct file_ra_state *ra, struct file *file,
2437                               pgoff_t offset, pgoff_t last_index)
2438 {
2439         pgoff_t req_size;
2440
2441 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2442         req_size = last_index - offset + 1;
2443         offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2444         return offset;
2445 #else
2446         req_size = min(last_index - offset + 1, (pgoff_t)128);
2447         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2448         return offset + req_size;
2449 #endif
2450 }
2451
2452 int btrfs_defrag_file(struct file *file) {
2453         struct inode *inode = fdentry(file)->d_inode;
2454         struct btrfs_root *root = BTRFS_I(inode)->root;
2455         struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
2456         struct page *page;
2457         unsigned long last_index;
2458         unsigned long ra_index = 0;
2459         u64 page_start;
2460         u64 page_end;
2461         unsigned long i;
2462         int ret;
2463
2464         mutex_lock(&root->fs_info->fs_mutex);
2465         ret = btrfs_check_free_space(root, inode->i_size, 0);
2466         mutex_unlock(&root->fs_info->fs_mutex);
2467         if (ret)
2468                 return -ENOSPC;
2469
2470         mutex_lock(&inode->i_mutex);
2471         last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2472         for (i = 0; i <= last_index; i++) {
2473                 if (i == ra_index) {
2474                         ra_index = btrfs_force_ra(inode->i_mapping,
2475                                                   &file->f_ra,
2476                                                   file, ra_index, last_index);
2477                 }
2478                 page = grab_cache_page(inode->i_mapping, i);
2479                 if (!page)
2480                         goto out_unlock;
2481                 if (!PageUptodate(page)) {
2482                         btrfs_readpage(NULL, page);
2483                         lock_page(page);
2484                         if (!PageUptodate(page)) {
2485                                 unlock_page(page);
2486                                 page_cache_release(page);
2487                                 goto out_unlock;
2488                         }
2489                 }
2490                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2491                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2492
2493                 lock_extent(io_tree, page_start, page_end, GFP_NOFS);
2494                 set_extent_delalloc(io_tree, page_start,
2495                                     page_end, GFP_NOFS);
2496
2497                 unlock_extent(io_tree, page_start, page_end, GFP_NOFS);
2498                 set_page_dirty(page);
2499                 unlock_page(page);
2500                 page_cache_release(page);
2501                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2502         }
2503
2504 out_unlock:
2505         mutex_unlock(&inode->i_mutex);
2506         return 0;
2507 }
2508
2509 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2510 {
2511         u64 new_size;
2512         u64 old_size;
2513         struct btrfs_ioctl_vol_args *vol_args;
2514         struct btrfs_trans_handle *trans;
2515         char *sizestr;
2516         int ret = 0;
2517         int namelen;
2518         int mod = 0;
2519
2520         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2521
2522         if (!vol_args)
2523                 return -ENOMEM;
2524
2525         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2526                 ret = -EFAULT;
2527                 goto out;
2528         }
2529         namelen = strlen(vol_args->name);
2530         if (namelen > BTRFS_VOL_NAME_MAX) {
2531                 ret = -EINVAL;
2532                 goto out;
2533         }
2534
2535         sizestr = vol_args->name;
2536         if (!strcmp(sizestr, "max"))
2537                 new_size = root->fs_info->sb->s_bdev->bd_inode->i_size;
2538         else {
2539                 if (sizestr[0] == '-') {
2540                         mod = -1;
2541                         sizestr++;
2542                 } else if (sizestr[0] == '+') {
2543                         mod = 1;
2544                         sizestr++;
2545                 }
2546                 new_size = btrfs_parse_size(sizestr);
2547                 if (new_size == 0) {
2548                         ret = -EINVAL;
2549                         goto out;
2550                 }
2551         }
2552
2553         mutex_lock(&root->fs_info->fs_mutex);
2554         old_size = btrfs_super_total_bytes(&root->fs_info->super_copy);
2555
2556         if (mod < 0) {
2557                 if (new_size > old_size) {
2558                         ret = -EINVAL;
2559                         goto out_unlock;
2560                 }
2561                 new_size = old_size - new_size;
2562         } else if (mod > 0) {
2563                 new_size = old_size + new_size;
2564         }
2565
2566         if (new_size < 256 * 1024 * 1024) {
2567                 ret = -EINVAL;
2568                 goto out_unlock;
2569         }
2570         if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) {
2571                 ret = -EFBIG;
2572                 goto out_unlock;
2573         }
2574
2575         do_div(new_size, root->sectorsize);
2576         new_size *= root->sectorsize;
2577
2578 printk("new size is %Lu\n", new_size);
2579         if (new_size > old_size) {
2580                 trans = btrfs_start_transaction(root, 1);
2581                 ret = btrfs_grow_extent_tree(trans, root, new_size);
2582                 btrfs_commit_transaction(trans, root);
2583         } else {
2584                 ret = btrfs_shrink_extent_tree(root, new_size);
2585         }
2586
2587 out_unlock:
2588         mutex_unlock(&root->fs_info->fs_mutex);
2589 out:
2590         kfree(vol_args);
2591         return ret;
2592 }
2593
2594 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
2595                                             void __user *arg)
2596 {
2597         struct btrfs_ioctl_vol_args *vol_args;
2598         struct btrfs_dir_item *di;
2599         struct btrfs_path *path;
2600         u64 root_dirid;
2601         int namelen;
2602         int ret;
2603
2604         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2605
2606         if (!vol_args)
2607                 return -ENOMEM;
2608
2609         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2610                 ret = -EFAULT;
2611                 goto out;
2612         }
2613
2614         namelen = strlen(vol_args->name);
2615         if (namelen > BTRFS_VOL_NAME_MAX) {
2616                 ret = -EINVAL;
2617                 goto out;
2618         }
2619         if (strchr(vol_args->name, '/')) {
2620                 ret = -EINVAL;
2621                 goto out;
2622         }
2623
2624         path = btrfs_alloc_path();
2625         if (!path) {
2626                 ret = -ENOMEM;
2627                 goto out;
2628         }
2629
2630         root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2631         mutex_lock(&root->fs_info->fs_mutex);
2632         di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2633                             path, root_dirid,
2634                             vol_args->name, namelen, 0);
2635         mutex_unlock(&root->fs_info->fs_mutex);
2636         btrfs_free_path(path);
2637
2638         if (di && !IS_ERR(di)) {
2639                 ret = -EEXIST;
2640                 goto out;
2641         }
2642
2643         if (IS_ERR(di)) {
2644                 ret = PTR_ERR(di);
2645                 goto out;
2646         }
2647
2648         if (root == root->fs_info->tree_root)
2649                 ret = create_subvol(root, vol_args->name, namelen);
2650         else
2651                 ret = create_snapshot(root, vol_args->name, namelen);
2652 out:
2653         kfree(vol_args);
2654         return ret;
2655 }
2656
2657 static int btrfs_ioctl_defrag(struct file *file)
2658 {
2659         struct inode *inode = fdentry(file)->d_inode;
2660         struct btrfs_root *root = BTRFS_I(inode)->root;
2661
2662         switch (inode->i_mode & S_IFMT) {
2663         case S_IFDIR:
2664                 mutex_lock(&root->fs_info->fs_mutex);
2665                 btrfs_defrag_root(root, 0);
2666                 btrfs_defrag_root(root->fs_info->extent_root, 0);
2667                 mutex_unlock(&root->fs_info->fs_mutex);
2668                 break;
2669         case S_IFREG:
2670                 btrfs_defrag_file(file);
2671                 break;
2672         }
2673
2674         return 0;
2675 }
2676
2677 long btrfs_ioctl(struct file *file, unsigned int
2678                 cmd, unsigned long arg)
2679 {
2680         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
2681
2682         switch (cmd) {
2683         case BTRFS_IOC_SNAP_CREATE:
2684                 return btrfs_ioctl_snap_create(root, (void __user *)arg);
2685         case BTRFS_IOC_DEFRAG:
2686                 return btrfs_ioctl_defrag(file);
2687         case BTRFS_IOC_RESIZE:
2688                 return btrfs_ioctl_resize(root, (void __user *)arg);
2689         }
2690
2691         return -ENOTTY;
2692 }
2693
2694 /*
2695  * Called inside transaction, so use GFP_NOFS
2696  */
2697 struct inode *btrfs_alloc_inode(struct super_block *sb)
2698 {
2699         struct btrfs_inode *ei;
2700
2701         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2702         if (!ei)
2703                 return NULL;
2704         ei->last_trans = 0;
2705         ei->ordered_trans = 0;
2706         return &ei->vfs_inode;
2707 }
2708
2709 void btrfs_destroy_inode(struct inode *inode)
2710 {
2711         WARN_ON(!list_empty(&inode->i_dentry));
2712         WARN_ON(inode->i_data.nrpages);
2713
2714         btrfs_drop_extent_cache(inode, 0, (u64)-1);
2715         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2716 }
2717
2718 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2719 static void init_once(struct kmem_cache * cachep, void *foo)
2720 #else
2721 static void init_once(void * foo, struct kmem_cache * cachep,
2722                       unsigned long flags)
2723 #endif
2724 {
2725         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2726
2727         inode_init_once(&ei->vfs_inode);
2728 }
2729
2730 void btrfs_destroy_cachep(void)
2731 {
2732         if (btrfs_inode_cachep)
2733                 kmem_cache_destroy(btrfs_inode_cachep);
2734         if (btrfs_trans_handle_cachep)
2735                 kmem_cache_destroy(btrfs_trans_handle_cachep);
2736         if (btrfs_transaction_cachep)
2737                 kmem_cache_destroy(btrfs_transaction_cachep);
2738         if (btrfs_bit_radix_cachep)
2739                 kmem_cache_destroy(btrfs_bit_radix_cachep);
2740         if (btrfs_path_cachep)
2741                 kmem_cache_destroy(btrfs_path_cachep);
2742 }
2743
2744 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
2745                                        unsigned long extra_flags,
2746 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2747                                        void (*ctor)(struct kmem_cache *, void *)
2748 #else
2749                                        void (*ctor)(void *, struct kmem_cache *,
2750                                                     unsigned long)
2751 #endif
2752                                      )
2753 {
2754         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
2755                                  SLAB_MEM_SPREAD | extra_flags), ctor
2756 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2757                                  ,NULL
2758 #endif
2759                                 );
2760 }
2761
2762 int btrfs_init_cachep(void)
2763 {
2764         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
2765                                           sizeof(struct btrfs_inode),
2766                                           0, init_once);
2767         if (!btrfs_inode_cachep)
2768                 goto fail;
2769         btrfs_trans_handle_cachep =
2770                         btrfs_cache_create("btrfs_trans_handle_cache",
2771                                            sizeof(struct btrfs_trans_handle),
2772                                            0, NULL);
2773         if (!btrfs_trans_handle_cachep)
2774                 goto fail;
2775         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
2776                                              sizeof(struct btrfs_transaction),
2777                                              0, NULL);
2778         if (!btrfs_transaction_cachep)
2779                 goto fail;
2780         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
2781                                          sizeof(struct btrfs_path),
2782                                          0, NULL);
2783         if (!btrfs_path_cachep)
2784                 goto fail;
2785         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
2786                                               SLAB_DESTROY_BY_RCU, NULL);
2787         if (!btrfs_bit_radix_cachep)
2788                 goto fail;
2789         return 0;
2790 fail:
2791         btrfs_destroy_cachep();
2792         return -ENOMEM;
2793 }
2794
2795 static int btrfs_getattr(struct vfsmount *mnt,
2796                          struct dentry *dentry, struct kstat *stat)
2797 {
2798         struct inode *inode = dentry->d_inode;
2799         generic_fillattr(inode, stat);
2800         stat->blksize = PAGE_CACHE_SIZE;
2801         return 0;
2802 }
2803
2804 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2805                            struct inode * new_dir,struct dentry *new_dentry)
2806 {
2807         struct btrfs_trans_handle *trans;
2808         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2809         struct inode *new_inode = new_dentry->d_inode;
2810         struct inode *old_inode = old_dentry->d_inode;
2811         struct timespec ctime = CURRENT_TIME;
2812         struct btrfs_path *path;
2813         int ret;
2814
2815         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2816             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2817                 return -ENOTEMPTY;
2818         }
2819
2820         mutex_lock(&root->fs_info->fs_mutex);
2821         ret = btrfs_check_free_space(root, 1, 0);
2822         if (ret)
2823                 goto out_unlock;
2824
2825         trans = btrfs_start_transaction(root, 1);
2826
2827         btrfs_set_trans_block_group(trans, new_dir);
2828         path = btrfs_alloc_path();
2829         if (!path) {
2830                 ret = -ENOMEM;
2831                 goto out_fail;
2832         }
2833
2834         old_dentry->d_inode->i_nlink++;
2835         old_dir->i_ctime = old_dir->i_mtime = ctime;
2836         new_dir->i_ctime = new_dir->i_mtime = ctime;
2837         old_inode->i_ctime = ctime;
2838
2839         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2840         if (ret)
2841                 goto out_fail;
2842
2843         if (new_inode) {
2844                 new_inode->i_ctime = CURRENT_TIME;
2845                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2846                 if (ret)
2847                         goto out_fail;
2848         }
2849         ret = btrfs_add_link(trans, new_dentry, old_inode, 1);
2850         if (ret)
2851                 goto out_fail;
2852
2853 out_fail:
2854         btrfs_free_path(path);
2855         btrfs_end_transaction(trans, root);
2856 out_unlock:
2857         mutex_unlock(&root->fs_info->fs_mutex);
2858         return ret;
2859 }
2860
2861 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2862                          const char *symname)
2863 {
2864         struct btrfs_trans_handle *trans;
2865         struct btrfs_root *root = BTRFS_I(dir)->root;
2866         struct btrfs_path *path;
2867         struct btrfs_key key;
2868         struct inode *inode = NULL;
2869         int err;
2870         int drop_inode = 0;
2871         u64 objectid;
2872         int name_len;
2873         int datasize;
2874         unsigned long ptr;
2875         struct btrfs_file_extent_item *ei;
2876         struct extent_buffer *leaf;
2877         unsigned long nr = 0;
2878
2879         name_len = strlen(symname) + 1;
2880         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2881                 return -ENAMETOOLONG;
2882
2883         mutex_lock(&root->fs_info->fs_mutex);
2884         err = btrfs_check_free_space(root, 1, 0);
2885         if (err)
2886                 goto out_fail;
2887
2888         trans = btrfs_start_transaction(root, 1);
2889         btrfs_set_trans_block_group(trans, dir);
2890
2891         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2892         if (err) {
2893                 err = -ENOSPC;
2894                 goto out_unlock;
2895         }
2896
2897         inode = btrfs_new_inode(trans, root, dentry->d_name.name,
2898                                 dentry->d_name.len,
2899                                 dentry->d_parent->d_inode->i_ino, objectid,
2900                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2901         err = PTR_ERR(inode);
2902         if (IS_ERR(inode))
2903                 goto out_unlock;
2904
2905         btrfs_set_trans_block_group(trans, inode);
2906         err = btrfs_add_nondir(trans, dentry, inode, 0);
2907         if (err)
2908                 drop_inode = 1;
2909         else {
2910                 inode->i_mapping->a_ops = &btrfs_aops;
2911                 inode->i_fop = &btrfs_file_operations;
2912                 inode->i_op = &btrfs_file_inode_operations;
2913                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS);
2914                 extent_io_tree_init(&BTRFS_I(inode)->io_tree,
2915                                      inode->i_mapping, GFP_NOFS);
2916                 BTRFS_I(inode)->io_tree.ops = &btrfs_extent_io_ops;
2917         }
2918         dir->i_sb->s_dirt = 1;
2919         btrfs_update_inode_block_group(trans, inode);
2920         btrfs_update_inode_block_group(trans, dir);
2921         if (drop_inode)
2922                 goto out_unlock;
2923
2924         path = btrfs_alloc_path();
2925         BUG_ON(!path);
2926         key.objectid = inode->i_ino;
2927         key.offset = 0;
2928         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2929         datasize = btrfs_file_extent_calc_inline_size(name_len);
2930         err = btrfs_insert_empty_item(trans, root, path, &key,
2931                                       datasize);
2932         if (err) {
2933                 drop_inode = 1;
2934                 goto out_unlock;
2935         }
2936         leaf = path->nodes[0];
2937         ei = btrfs_item_ptr(leaf, path->slots[0],
2938                             struct btrfs_file_extent_item);
2939         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
2940         btrfs_set_file_extent_type(leaf, ei,
2941                                    BTRFS_FILE_EXTENT_INLINE);
2942         ptr = btrfs_file_extent_inline_start(ei);
2943         write_extent_buffer(leaf, symname, ptr, name_len);
2944         btrfs_mark_buffer_dirty(leaf);
2945         btrfs_free_path(path);
2946
2947         inode->i_op = &btrfs_symlink_inode_operations;
2948         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2949         inode->i_size = name_len - 1;
2950         err = btrfs_update_inode(trans, root, inode);
2951         if (err)
2952                 drop_inode = 1;
2953
2954 out_unlock:
2955         nr = trans->blocks_used;
2956         btrfs_end_transaction(trans, root);
2957 out_fail:
2958         mutex_unlock(&root->fs_info->fs_mutex);
2959         if (drop_inode) {
2960                 inode_dec_link_count(inode);
2961                 iput(inode);
2962         }
2963         btrfs_btree_balance_dirty(root, nr);
2964         btrfs_throttle(root);
2965         return err;
2966 }
2967 static int btrfs_permission(struct inode *inode, int mask,
2968                             struct nameidata *nd)
2969 {
2970         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
2971                 return -EACCES;
2972         return generic_permission(inode, mask, NULL);
2973 }
2974
2975 static struct inode_operations btrfs_dir_inode_operations = {
2976         .lookup         = btrfs_lookup,
2977         .create         = btrfs_create,
2978         .unlink         = btrfs_unlink,
2979         .link           = btrfs_link,
2980         .mkdir          = btrfs_mkdir,
2981         .rmdir          = btrfs_rmdir,
2982         .rename         = btrfs_rename,
2983         .symlink        = btrfs_symlink,
2984         .setattr        = btrfs_setattr,
2985         .mknod          = btrfs_mknod,
2986         .setxattr       = generic_setxattr,
2987         .getxattr       = generic_getxattr,
2988         .listxattr      = btrfs_listxattr,
2989         .removexattr    = generic_removexattr,
2990         .permission     = btrfs_permission,
2991 };
2992 static struct inode_operations btrfs_dir_ro_inode_operations = {
2993         .lookup         = btrfs_lookup,
2994         .permission     = btrfs_permission,
2995 };
2996 static struct file_operations btrfs_dir_file_operations = {
2997         .llseek         = generic_file_llseek,
2998         .read           = generic_read_dir,
2999         .readdir        = btrfs_readdir,
3000         .unlocked_ioctl = btrfs_ioctl,
3001 #ifdef CONFIG_COMPAT
3002         .compat_ioctl   = btrfs_ioctl,
3003 #endif
3004 };
3005
3006 static struct extent_io_ops btrfs_extent_io_ops = {
3007         .fill_delalloc = run_delalloc_range,
3008         .writepage_io_hook = btrfs_writepage_io_hook,
3009         .readpage_io_hook = btrfs_readpage_io_hook,
3010         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
3011         .set_bit_hook = btrfs_set_bit_hook,
3012         .clear_bit_hook = btrfs_clear_bit_hook,
3013 };
3014
3015 static struct address_space_operations btrfs_aops = {
3016         .readpage       = btrfs_readpage,
3017         .writepage      = btrfs_writepage,
3018         .writepages     = btrfs_writepages,
3019         .readpages      = btrfs_readpages,
3020         .sync_page      = block_sync_page,
3021         .bmap           = btrfs_bmap,
3022         .invalidatepage = btrfs_invalidatepage,
3023         .releasepage    = btrfs_releasepage,
3024         .set_page_dirty = __set_page_dirty_nobuffers,
3025 };
3026
3027 static struct address_space_operations btrfs_symlink_aops = {
3028         .readpage       = btrfs_readpage,
3029         .writepage      = btrfs_writepage,
3030         .invalidatepage = btrfs_invalidatepage,
3031         .releasepage    = btrfs_releasepage,
3032 };
3033
3034 static struct inode_operations btrfs_file_inode_operations = {
3035         .truncate       = btrfs_truncate,
3036         .getattr        = btrfs_getattr,
3037         .setattr        = btrfs_setattr,
3038         .setxattr       = generic_setxattr,
3039         .getxattr       = generic_getxattr,
3040         .listxattr      = btrfs_listxattr,
3041         .removexattr    = generic_removexattr,
3042         .permission     = btrfs_permission,
3043 };
3044 static struct inode_operations btrfs_special_inode_operations = {
3045         .getattr        = btrfs_getattr,
3046         .setattr        = btrfs_setattr,
3047         .permission     = btrfs_permission,
3048 };
3049 static struct inode_operations btrfs_symlink_inode_operations = {
3050         .readlink       = generic_readlink,
3051         .follow_link    = page_follow_link_light,
3052         .put_link       = page_put_link,
3053         .permission     = btrfs_permission,
3054 };