]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/btrfs/inode.c
Btrfs: Disable delalloc accounting for now
[karo-tx-linux.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/buffer_head.h>
20 #include <linux/fs.h>
21 #include <linux/pagemap.h>
22 #include <linux/highmem.h>
23 #include <linux/time.h>
24 #include <linux/init.h>
25 #include <linux/string.h>
26 #include <linux/smp_lock.h>
27 #include <linux/backing-dev.h>
28 #include <linux/mpage.h>
29 #include <linux/swap.h>
30 #include <linux/writeback.h>
31 #include <linux/statfs.h>
32 #include <linux/compat.h>
33 #include <linux/bit_spinlock.h>
34 #include <linux/version.h>
35 #include <linux/xattr.h>
36 #include "ctree.h"
37 #include "disk-io.h"
38 #include "transaction.h"
39 #include "btrfs_inode.h"
40 #include "ioctl.h"
41 #include "print-tree.h"
42
43 struct btrfs_iget_args {
44         u64 ino;
45         struct btrfs_root *root;
46 };
47
48 static struct inode_operations btrfs_dir_inode_operations;
49 static struct inode_operations btrfs_symlink_inode_operations;
50 static struct inode_operations btrfs_dir_ro_inode_operations;
51 static struct inode_operations btrfs_special_inode_operations;
52 static struct inode_operations btrfs_file_inode_operations;
53 static struct address_space_operations btrfs_aops;
54 static struct address_space_operations btrfs_symlink_aops;
55 static struct file_operations btrfs_dir_file_operations;
56 static struct extent_map_ops btrfs_extent_map_ops;
57
58 static struct kmem_cache *btrfs_inode_cachep;
59 struct kmem_cache *btrfs_trans_handle_cachep;
60 struct kmem_cache *btrfs_transaction_cachep;
61 struct kmem_cache *btrfs_bit_radix_cachep;
62 struct kmem_cache *btrfs_path_cachep;
63
64 #define S_SHIFT 12
65 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
66         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
67         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
68         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
69         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
70         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
71         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
72         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
73 };
74
75 int btrfs_check_free_space(struct btrfs_root *root, u64 num_required,
76                            int for_del)
77 {
78         u64 total = btrfs_super_total_bytes(&root->fs_info->super_copy);
79         u64 used = btrfs_super_bytes_used(&root->fs_info->super_copy);
80         u64 thresh;
81         int ret = 0;
82
83         return 0;
84
85         if (for_del)
86                 thresh = total * 90;
87         else
88                 thresh = total * 85;
89
90         do_div(thresh, 100);
91
92         spin_lock(&root->fs_info->delalloc_lock);
93         if (used + root->fs_info->delalloc_bytes + num_required > thresh)
94                 ret = -ENOSPC;
95         spin_unlock(&root->fs_info->delalloc_lock);
96         return ret;
97 }
98
99 static int cow_file_range(struct inode *inode, u64 start, u64 end)
100 {
101         struct btrfs_root *root = BTRFS_I(inode)->root;
102         struct btrfs_trans_handle *trans;
103         u64 alloc_hint = 0;
104         u64 num_bytes;
105         u64 cur_alloc_size;
106         u64 blocksize = root->sectorsize;
107         struct btrfs_key ins;
108         int ret;
109
110         trans = btrfs_start_transaction(root, 1);
111         BUG_ON(!trans);
112         btrfs_set_trans_block_group(trans, inode);
113
114         num_bytes = (end - start + blocksize) & ~(blocksize - 1);
115         num_bytes = max(blocksize,  num_bytes);
116         ret = btrfs_drop_extents(trans, root, inode,
117                                  start, start + num_bytes, start, &alloc_hint);
118
119         if (alloc_hint == EXTENT_MAP_INLINE)
120                 goto out;
121
122         while(num_bytes > 0) {
123                 cur_alloc_size = min(num_bytes, root->fs_info->max_extent);
124                 ret = btrfs_alloc_extent(trans, root, cur_alloc_size,
125                                          root->root_key.objectid,
126                                          trans->transid,
127                                          inode->i_ino, start, 0,
128                                          alloc_hint, (u64)-1, &ins, 1);
129                 if (ret) {
130                         WARN_ON(1);
131                         goto out;
132                 }
133                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
134                                                start, ins.objectid, ins.offset,
135                                                ins.offset);
136                 num_bytes -= cur_alloc_size;
137                 alloc_hint = ins.objectid + ins.offset;
138                 start += cur_alloc_size;
139         }
140         btrfs_add_ordered_inode(inode);
141 out:
142         btrfs_end_transaction(trans, root);
143         return ret;
144 }
145
146 static int run_delalloc_nocow(struct inode *inode, u64 start, u64 end)
147 {
148         u64 extent_start;
149         u64 extent_end;
150         u64 bytenr;
151         u64 cow_end;
152         u64 loops = 0;
153         u64 total_fs_bytes;
154         struct btrfs_root *root = BTRFS_I(inode)->root;
155         struct extent_buffer *leaf;
156         int found_type;
157         struct btrfs_path *path;
158         struct btrfs_file_extent_item *item;
159         int ret;
160         int err;
161         struct btrfs_key found_key;
162
163         total_fs_bytes = btrfs_super_total_bytes(&root->fs_info->super_copy);
164         path = btrfs_alloc_path();
165         BUG_ON(!path);
166 again:
167         ret = btrfs_lookup_file_extent(NULL, root, path,
168                                        inode->i_ino, start, 0);
169         if (ret < 0) {
170                 btrfs_free_path(path);
171                 return ret;
172         }
173
174         cow_end = end;
175         if (ret != 0) {
176                 if (path->slots[0] == 0)
177                         goto not_found;
178                 path->slots[0]--;
179         }
180
181         leaf = path->nodes[0];
182         item = btrfs_item_ptr(leaf, path->slots[0],
183                               struct btrfs_file_extent_item);
184
185         /* are we inside the extent that was found? */
186         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
187         found_type = btrfs_key_type(&found_key);
188         if (found_key.objectid != inode->i_ino ||
189             found_type != BTRFS_EXTENT_DATA_KEY) {
190                 goto not_found;
191         }
192
193         found_type = btrfs_file_extent_type(leaf, item);
194         extent_start = found_key.offset;
195         if (found_type == BTRFS_FILE_EXTENT_REG) {
196                 u64 extent_num_bytes;
197
198                 extent_num_bytes = btrfs_file_extent_num_bytes(leaf, item);
199                 extent_end = extent_start + extent_num_bytes;
200                 err = 0;
201
202                 if (loops && start != extent_start)
203                         goto not_found;
204
205                 if (start < extent_start || start >= extent_end)
206                         goto not_found;
207
208                 cow_end = min(end, extent_end - 1);
209                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
210                 if (bytenr == 0)
211                         goto not_found;
212
213                 /*
214                  * we may be called by the resizer, make sure we're inside
215                  * the limits of the FS
216                  */
217                 if (bytenr + extent_num_bytes > total_fs_bytes)
218                         goto not_found;
219
220                 if (btrfs_count_snapshots_in_path(root, path, bytenr) != 1) {
221                         goto not_found;
222                 }
223
224                 start = extent_end;
225         } else {
226                 goto not_found;
227         }
228 loop:
229         if (start > end) {
230                 btrfs_free_path(path);
231                 return 0;
232         }
233         btrfs_release_path(root, path);
234         loops++;
235         goto again;
236
237 not_found:
238         cow_file_range(inode, start, cow_end);
239         start = cow_end + 1;
240         goto loop;
241 }
242
243 static int run_delalloc_range(struct inode *inode, u64 start, u64 end)
244 {
245         struct btrfs_root *root = BTRFS_I(inode)->root;
246         u64 num_bytes;
247         int ret;
248         mutex_lock(&root->fs_info->fs_mutex);
249         if (btrfs_test_opt(root, NODATACOW) ||
250             btrfs_test_flag(inode, NODATACOW))
251                 ret = run_delalloc_nocow(inode, start, end);
252         else
253                 ret = cow_file_range(inode, start, end);
254
255         spin_lock(&root->fs_info->delalloc_lock);
256         num_bytes = end + 1 - start;
257         if (root->fs_info->delalloc_bytes < num_bytes) {
258                 printk("delalloc accounting error total %llu sub %llu\n",
259                        root->fs_info->delalloc_bytes, num_bytes);
260         } else {
261                 root->fs_info->delalloc_bytes -= num_bytes;
262         }
263         spin_unlock(&root->fs_info->delalloc_lock);
264
265         mutex_unlock(&root->fs_info->fs_mutex);
266         return ret;
267 }
268
269 int btrfs_writepage_io_hook(struct page *page, u64 start, u64 end)
270 {
271         struct inode *inode = page->mapping->host;
272         struct btrfs_root *root = BTRFS_I(inode)->root;
273         struct btrfs_trans_handle *trans;
274         char *kaddr;
275         int ret = 0;
276         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
277         size_t offset = start - page_start;
278         if (btrfs_test_opt(root, NODATASUM) ||
279             btrfs_test_flag(inode, NODATASUM))
280                 return 0;
281         mutex_lock(&root->fs_info->fs_mutex);
282         trans = btrfs_start_transaction(root, 1);
283         btrfs_set_trans_block_group(trans, inode);
284         kaddr = kmap(page);
285         btrfs_csum_file_block(trans, root, inode, inode->i_ino,
286                               start, kaddr + offset, end - start + 1);
287         kunmap(page);
288         ret = btrfs_end_transaction(trans, root);
289         BUG_ON(ret);
290         mutex_unlock(&root->fs_info->fs_mutex);
291         return ret;
292 }
293
294 int btrfs_readpage_io_hook(struct page *page, u64 start, u64 end)
295 {
296         int ret = 0;
297         struct inode *inode = page->mapping->host;
298         struct btrfs_root *root = BTRFS_I(inode)->root;
299         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
300         struct btrfs_csum_item *item;
301         struct btrfs_path *path = NULL;
302         u32 csum;
303         if (btrfs_test_opt(root, NODATASUM) ||
304             btrfs_test_flag(inode, NODATASUM))
305                 return 0;
306         mutex_lock(&root->fs_info->fs_mutex);
307         path = btrfs_alloc_path();
308         item = btrfs_lookup_csum(NULL, root, path, inode->i_ino, start, 0);
309         if (IS_ERR(item)) {
310                 ret = PTR_ERR(item);
311                 /* a csum that isn't present is a preallocated region. */
312                 if (ret == -ENOENT || ret == -EFBIG)
313                         ret = 0;
314                 csum = 0;
315                 goto out;
316         }
317         read_extent_buffer(path->nodes[0], &csum, (unsigned long)item,
318                            BTRFS_CRC32_SIZE);
319         set_state_private(em_tree, start, csum);
320 out:
321         if (path)
322                 btrfs_free_path(path);
323         mutex_unlock(&root->fs_info->fs_mutex);
324         return ret;
325 }
326
327 int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end)
328 {
329         size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT);
330         struct inode *inode = page->mapping->host;
331         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
332         char *kaddr;
333         u64 private;
334         int ret;
335         struct btrfs_root *root = BTRFS_I(inode)->root;
336         u32 csum = ~(u32)0;
337         unsigned long flags;
338         if (btrfs_test_opt(root, NODATASUM) ||
339             btrfs_test_flag(inode, NODATASUM))
340                 return 0;
341         ret = get_state_private(em_tree, start, &private);
342         local_irq_save(flags);
343         kaddr = kmap_atomic(page, KM_IRQ0);
344         if (ret) {
345                 goto zeroit;
346         }
347         csum = btrfs_csum_data(root, kaddr + offset, csum,  end - start + 1);
348         btrfs_csum_final(csum, (char *)&csum);
349         if (csum != private) {
350                 goto zeroit;
351         }
352         kunmap_atomic(kaddr, KM_IRQ0);
353         local_irq_restore(flags);
354         return 0;
355
356 zeroit:
357         printk("btrfs csum failed ino %lu off %llu\n",
358                page->mapping->host->i_ino, (unsigned long long)start);
359         memset(kaddr + offset, 1, end - start + 1);
360         flush_dcache_page(page);
361         kunmap_atomic(kaddr, KM_IRQ0);
362         local_irq_restore(flags);
363         return 0;
364 }
365
366 void btrfs_read_locked_inode(struct inode *inode)
367 {
368         struct btrfs_path *path;
369         struct extent_buffer *leaf;
370         struct btrfs_inode_item *inode_item;
371         struct btrfs_inode_timespec *tspec;
372         struct btrfs_root *root = BTRFS_I(inode)->root;
373         struct btrfs_key location;
374         u64 alloc_group_block;
375         u32 rdev;
376         int ret;
377
378         path = btrfs_alloc_path();
379         BUG_ON(!path);
380         mutex_lock(&root->fs_info->fs_mutex);
381         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
382
383         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
384         if (ret)
385                 goto make_bad;
386
387         leaf = path->nodes[0];
388         inode_item = btrfs_item_ptr(leaf, path->slots[0],
389                                     struct btrfs_inode_item);
390
391         inode->i_mode = btrfs_inode_mode(leaf, inode_item);
392         inode->i_nlink = btrfs_inode_nlink(leaf, inode_item);
393         inode->i_uid = btrfs_inode_uid(leaf, inode_item);
394         inode->i_gid = btrfs_inode_gid(leaf, inode_item);
395         inode->i_size = btrfs_inode_size(leaf, inode_item);
396
397         tspec = btrfs_inode_atime(inode_item);
398         inode->i_atime.tv_sec = btrfs_timespec_sec(leaf, tspec);
399         inode->i_atime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
400
401         tspec = btrfs_inode_mtime(inode_item);
402         inode->i_mtime.tv_sec = btrfs_timespec_sec(leaf, tspec);
403         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
404
405         tspec = btrfs_inode_ctime(inode_item);
406         inode->i_ctime.tv_sec = btrfs_timespec_sec(leaf, tspec);
407         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(leaf, tspec);
408
409         inode->i_blocks = btrfs_inode_nblocks(leaf, inode_item);
410         inode->i_generation = btrfs_inode_generation(leaf, inode_item);
411         inode->i_rdev = 0;
412         rdev = btrfs_inode_rdev(leaf, inode_item);
413
414         alloc_group_block = btrfs_inode_block_group(leaf, inode_item);
415         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
416                                                        alloc_group_block);
417         BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
418         if (!BTRFS_I(inode)->block_group) {
419                 BTRFS_I(inode)->block_group = btrfs_find_block_group(root,
420                                                          NULL, 0, 0, 0);
421         }
422         btrfs_free_path(path);
423         inode_item = NULL;
424
425         mutex_unlock(&root->fs_info->fs_mutex);
426
427         switch (inode->i_mode & S_IFMT) {
428         case S_IFREG:
429                 inode->i_mapping->a_ops = &btrfs_aops;
430                 BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
431                 inode->i_fop = &btrfs_file_operations;
432                 inode->i_op = &btrfs_file_inode_operations;
433                 break;
434         case S_IFDIR:
435                 inode->i_fop = &btrfs_dir_file_operations;
436                 if (root == root->fs_info->tree_root)
437                         inode->i_op = &btrfs_dir_ro_inode_operations;
438                 else
439                         inode->i_op = &btrfs_dir_inode_operations;
440                 break;
441         case S_IFLNK:
442                 inode->i_op = &btrfs_symlink_inode_operations;
443                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
444                 break;
445         default:
446                 init_special_inode(inode, inode->i_mode, rdev);
447                 break;
448         }
449         return;
450
451 make_bad:
452         btrfs_release_path(root, path);
453         btrfs_free_path(path);
454         mutex_unlock(&root->fs_info->fs_mutex);
455         make_bad_inode(inode);
456 }
457
458 static void fill_inode_item(struct extent_buffer *leaf,
459                             struct btrfs_inode_item *item,
460                             struct inode *inode)
461 {
462         btrfs_set_inode_uid(leaf, item, inode->i_uid);
463         btrfs_set_inode_gid(leaf, item, inode->i_gid);
464         btrfs_set_inode_size(leaf, item, inode->i_size);
465         btrfs_set_inode_mode(leaf, item, inode->i_mode);
466         btrfs_set_inode_nlink(leaf, item, inode->i_nlink);
467
468         btrfs_set_timespec_sec(leaf, btrfs_inode_atime(item),
469                                inode->i_atime.tv_sec);
470         btrfs_set_timespec_nsec(leaf, btrfs_inode_atime(item),
471                                 inode->i_atime.tv_nsec);
472
473         btrfs_set_timespec_sec(leaf, btrfs_inode_mtime(item),
474                                inode->i_mtime.tv_sec);
475         btrfs_set_timespec_nsec(leaf, btrfs_inode_mtime(item),
476                                 inode->i_mtime.tv_nsec);
477
478         btrfs_set_timespec_sec(leaf, btrfs_inode_ctime(item),
479                                inode->i_ctime.tv_sec);
480         btrfs_set_timespec_nsec(leaf, btrfs_inode_ctime(item),
481                                 inode->i_ctime.tv_nsec);
482
483         btrfs_set_inode_nblocks(leaf, item, inode->i_blocks);
484         btrfs_set_inode_generation(leaf, item, inode->i_generation);
485         btrfs_set_inode_rdev(leaf, item, inode->i_rdev);
486         btrfs_set_inode_flags(leaf, item, BTRFS_I(inode)->flags);
487         btrfs_set_inode_block_group(leaf, item,
488                                     BTRFS_I(inode)->block_group->key.objectid);
489 }
490
491 int btrfs_update_inode(struct btrfs_trans_handle *trans,
492                               struct btrfs_root *root,
493                               struct inode *inode)
494 {
495         struct btrfs_inode_item *inode_item;
496         struct btrfs_path *path;
497         struct extent_buffer *leaf;
498         int ret;
499
500         path = btrfs_alloc_path();
501         BUG_ON(!path);
502         ret = btrfs_lookup_inode(trans, root, path,
503                                  &BTRFS_I(inode)->location, 1);
504         if (ret) {
505                 if (ret > 0)
506                         ret = -ENOENT;
507                 goto failed;
508         }
509
510         leaf = path->nodes[0];
511         inode_item = btrfs_item_ptr(leaf, path->slots[0],
512                                   struct btrfs_inode_item);
513
514         fill_inode_item(leaf, inode_item, inode);
515         btrfs_mark_buffer_dirty(leaf);
516         btrfs_set_inode_last_trans(trans, inode);
517         ret = 0;
518 failed:
519         btrfs_release_path(root, path);
520         btrfs_free_path(path);
521         return ret;
522 }
523
524
525 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
526                               struct btrfs_root *root,
527                               struct inode *dir,
528                               struct dentry *dentry)
529 {
530         struct btrfs_path *path;
531         const char *name = dentry->d_name.name;
532         int name_len = dentry->d_name.len;
533         int ret = 0;
534         struct extent_buffer *leaf;
535         struct btrfs_dir_item *di;
536         struct btrfs_key key;
537
538         path = btrfs_alloc_path();
539         if (!path) {
540                 ret = -ENOMEM;
541                 goto err;
542         }
543
544         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
545                                     name, name_len, -1);
546         if (IS_ERR(di)) {
547                 ret = PTR_ERR(di);
548                 goto err;
549         }
550         if (!di) {
551                 ret = -ENOENT;
552                 goto err;
553         }
554         leaf = path->nodes[0];
555         btrfs_dir_item_key_to_cpu(leaf, di, &key);
556         ret = btrfs_delete_one_dir_name(trans, root, path, di);
557         if (ret)
558                 goto err;
559         btrfs_release_path(root, path);
560
561         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
562                                          key.objectid, name, name_len, -1);
563         if (IS_ERR(di)) {
564                 ret = PTR_ERR(di);
565                 goto err;
566         }
567         if (!di) {
568                 ret = -ENOENT;
569                 goto err;
570         }
571         ret = btrfs_delete_one_dir_name(trans, root, path, di);
572
573         dentry->d_inode->i_ctime = dir->i_ctime;
574         ret = btrfs_del_inode_ref(trans, root, name, name_len,
575                                   dentry->d_inode->i_ino,
576                                   dentry->d_parent->d_inode->i_ino);
577         if (ret) {
578                 printk("failed to delete reference to %.*s, "
579                        "inode %lu parent %lu\n", name_len, name,
580                        dentry->d_inode->i_ino,
581                        dentry->d_parent->d_inode->i_ino);
582         }
583 err:
584         btrfs_free_path(path);
585         if (!ret) {
586                 dir->i_size -= name_len * 2;
587                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
588                 btrfs_update_inode(trans, root, dir);
589 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
590                 dentry->d_inode->i_nlink--;
591 #else
592                 drop_nlink(dentry->d_inode);
593 #endif
594                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
595                 dir->i_sb->s_dirt = 1;
596         }
597         return ret;
598 }
599
600 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
601 {
602         struct btrfs_root *root;
603         struct btrfs_trans_handle *trans;
604         int ret;
605         unsigned long nr = 0;
606
607         root = BTRFS_I(dir)->root;
608         mutex_lock(&root->fs_info->fs_mutex);
609
610         ret = btrfs_check_free_space(root, 1, 1);
611         if (ret)
612                 goto fail;
613
614         trans = btrfs_start_transaction(root, 1);
615
616         btrfs_set_trans_block_group(trans, dir);
617         ret = btrfs_unlink_trans(trans, root, dir, dentry);
618         nr = trans->blocks_used;
619
620         btrfs_end_transaction(trans, root);
621 fail:
622         mutex_unlock(&root->fs_info->fs_mutex);
623         btrfs_btree_balance_dirty(root, nr);
624         btrfs_throttle(root);
625         return ret;
626 }
627
628 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
629 {
630         struct inode *inode = dentry->d_inode;
631         int err = 0;
632         int ret;
633         struct btrfs_root *root = BTRFS_I(dir)->root;
634         struct btrfs_trans_handle *trans;
635         unsigned long nr = 0;
636
637         if (inode->i_size > BTRFS_EMPTY_DIR_SIZE)
638                 return -ENOTEMPTY;
639
640         mutex_lock(&root->fs_info->fs_mutex);
641         ret = btrfs_check_free_space(root, 1, 1);
642         if (ret)
643                 goto fail;
644
645         trans = btrfs_start_transaction(root, 1);
646         btrfs_set_trans_block_group(trans, dir);
647
648         /* now the directory is empty */
649         err = btrfs_unlink_trans(trans, root, dir, dentry);
650         if (!err) {
651                 inode->i_size = 0;
652         }
653
654         nr = trans->blocks_used;
655         ret = btrfs_end_transaction(trans, root);
656 fail:
657         mutex_unlock(&root->fs_info->fs_mutex);
658         btrfs_btree_balance_dirty(root, nr);
659         btrfs_throttle(root);
660
661         if (ret && !err)
662                 err = ret;
663         return err;
664 }
665
666 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
667                             struct btrfs_root *root,
668                             struct inode *inode)
669 {
670         struct btrfs_path *path;
671         int ret;
672
673         clear_inode(inode);
674
675         path = btrfs_alloc_path();
676         BUG_ON(!path);
677         ret = btrfs_lookup_inode(trans, root, path,
678                                  &BTRFS_I(inode)->location, -1);
679         if (ret > 0)
680                 ret = -ENOENT;
681         if (!ret)
682                 ret = btrfs_del_item(trans, root, path);
683         btrfs_free_path(path);
684         return ret;
685 }
686
687 /*
688  * this can truncate away extent items, csum items and directory items.
689  * It starts at a high offset and removes keys until it can't find
690  * any higher than i_size.
691  *
692  * csum items that cross the new i_size are truncated to the new size
693  * as well.
694  */
695 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
696                                    struct btrfs_root *root,
697                                    struct inode *inode)
698 {
699         int ret;
700         struct btrfs_path *path;
701         struct btrfs_key key;
702         struct btrfs_key found_key;
703         u32 found_type;
704         struct extent_buffer *leaf;
705         struct btrfs_file_extent_item *fi;
706         u64 extent_start = 0;
707         u64 extent_num_bytes = 0;
708         u64 item_end = 0;
709         u64 root_gen = 0;
710         u64 root_owner = 0;
711         int found_extent;
712         int del_item;
713         int extent_type = -1;
714
715         btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
716         path = btrfs_alloc_path();
717         path->reada = -1;
718         BUG_ON(!path);
719
720         /* FIXME, add redo link to tree so we don't leak on crash */
721         key.objectid = inode->i_ino;
722         key.offset = (u64)-1;
723         key.type = (u8)-1;
724
725         while(1) {
726                 btrfs_init_path(path);
727                 fi = NULL;
728                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
729                 if (ret < 0) {
730                         goto error;
731                 }
732                 if (ret > 0) {
733                         BUG_ON(path->slots[0] == 0);
734                         path->slots[0]--;
735                 }
736                 leaf = path->nodes[0];
737                 btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
738                 found_type = btrfs_key_type(&found_key);
739
740                 if (found_key.objectid != inode->i_ino)
741                         break;
742
743                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
744                     found_type != BTRFS_DIR_ITEM_KEY &&
745                     found_type != BTRFS_DIR_INDEX_KEY &&
746                     found_type != BTRFS_EXTENT_DATA_KEY)
747                         break;
748
749                 item_end = found_key.offset;
750                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
751                         fi = btrfs_item_ptr(leaf, path->slots[0],
752                                             struct btrfs_file_extent_item);
753                         extent_type = btrfs_file_extent_type(leaf, fi);
754                         if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
755                                 item_end +=
756                                     btrfs_file_extent_num_bytes(leaf, fi);
757                         } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
758                                 struct btrfs_item *item = btrfs_item_nr(leaf,
759                                                                 path->slots[0]);
760                                 item_end += btrfs_file_extent_inline_len(leaf,
761                                                                          item);
762                         }
763                         item_end--;
764                 }
765                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
766                         ret = btrfs_csum_truncate(trans, root, path,
767                                                   inode->i_size);
768                         BUG_ON(ret);
769                 }
770                 if (item_end < inode->i_size) {
771                         if (found_type == BTRFS_DIR_ITEM_KEY) {
772                                 found_type = BTRFS_INODE_ITEM_KEY;
773                         } else if (found_type == BTRFS_EXTENT_ITEM_KEY) {
774                                 found_type = BTRFS_CSUM_ITEM_KEY;
775                         } else if (found_type) {
776                                 found_type--;
777                         } else {
778                                 break;
779                         }
780                         btrfs_set_key_type(&key, found_type);
781                         btrfs_release_path(root, path);
782                         continue;
783                 }
784                 if (found_key.offset >= inode->i_size)
785                         del_item = 1;
786                 else
787                         del_item = 0;
788                 found_extent = 0;
789
790                 /* FIXME, shrink the extent if the ref count is only 1 */
791                 if (found_type != BTRFS_EXTENT_DATA_KEY)
792                         goto delete;
793
794                 if (extent_type != BTRFS_FILE_EXTENT_INLINE) {
795                         u64 num_dec;
796                         extent_start = btrfs_file_extent_disk_bytenr(leaf, fi);
797                         if (!del_item) {
798                                 u64 orig_num_bytes =
799                                         btrfs_file_extent_num_bytes(leaf, fi);
800                                 extent_num_bytes = inode->i_size -
801                                         found_key.offset + root->sectorsize - 1;
802                                 btrfs_set_file_extent_num_bytes(leaf, fi,
803                                                          extent_num_bytes);
804                                 num_dec = (orig_num_bytes -
805                                            extent_num_bytes) >> 9;
806                                 if (extent_start != 0) {
807                                         inode->i_blocks -= num_dec;
808                                 }
809                                 btrfs_mark_buffer_dirty(leaf);
810                         } else {
811                                 extent_num_bytes =
812                                         btrfs_file_extent_disk_num_bytes(leaf,
813                                                                          fi);
814                                 /* FIXME blocksize != 4096 */
815                                 num_dec = btrfs_file_extent_num_bytes(leaf,
816                                                                        fi) >> 9;
817                                 if (extent_start != 0) {
818                                         found_extent = 1;
819                                         inode->i_blocks -= num_dec;
820                                 }
821                                 root_gen = btrfs_header_generation(leaf);
822                                 root_owner = btrfs_header_owner(leaf);
823                         }
824                 } else if (extent_type == BTRFS_FILE_EXTENT_INLINE &&
825                            !del_item) {
826                         u32 newsize = inode->i_size - found_key.offset;
827                         newsize = btrfs_file_extent_calc_inline_size(newsize);
828                         ret = btrfs_truncate_item(trans, root, path,
829                                                   newsize, 1);
830                         BUG_ON(ret);
831                 }
832 delete:
833                 if (del_item) {
834                         ret = btrfs_del_item(trans, root, path);
835                         if (ret)
836                                 goto error;
837                 } else {
838                         break;
839                 }
840                 btrfs_release_path(root, path);
841                 if (found_extent) {
842                         ret = btrfs_free_extent(trans, root, extent_start,
843                                                 extent_num_bytes,
844                                                 root_owner,
845                                                 root_gen, inode->i_ino,
846                                                 found_key.offset, 0);
847                         BUG_ON(ret);
848                 }
849         }
850         ret = 0;
851 error:
852         btrfs_release_path(root, path);
853         btrfs_free_path(path);
854         inode->i_sb->s_dirt = 1;
855         return ret;
856 }
857
858 static int btrfs_cow_one_page(struct inode *inode, struct page *page,
859                               size_t zero_start)
860 {
861         char *kaddr;
862         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
863         struct btrfs_root *root = BTRFS_I(inode)->root;
864         u64 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
865         u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
866         u64 existing_delalloc;
867         u64 delalloc_start;
868         int ret = 0;
869
870         WARN_ON(!PageLocked(page));
871         set_page_extent_mapped(page);
872
873         lock_extent(em_tree, page_start, page_end, GFP_NOFS);
874         delalloc_start = page_start;
875         existing_delalloc = count_range_bits(&BTRFS_I(inode)->extent_tree,
876                                              &delalloc_start, page_end,
877                                              PAGE_CACHE_SIZE, EXTENT_DELALLOC);
878         set_extent_delalloc(&BTRFS_I(inode)->extent_tree, page_start,
879                             page_end, GFP_NOFS);
880
881         spin_lock(&root->fs_info->delalloc_lock);
882         root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE - existing_delalloc;
883         spin_unlock(&root->fs_info->delalloc_lock);
884
885         if (zero_start != PAGE_CACHE_SIZE) {
886                 kaddr = kmap(page);
887                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
888                 flush_dcache_page(page);
889                 kunmap(page);
890         }
891         set_page_dirty(page);
892         unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
893
894         return ret;
895 }
896
897 /*
898  * taken from block_truncate_page, but does cow as it zeros out
899  * any bytes left in the last page in the file.
900  */
901 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
902 {
903         struct inode *inode = mapping->host;
904         struct btrfs_root *root = BTRFS_I(inode)->root;
905         u32 blocksize = root->sectorsize;
906         pgoff_t index = from >> PAGE_CACHE_SHIFT;
907         unsigned offset = from & (PAGE_CACHE_SIZE-1);
908         struct page *page;
909         int ret = 0;
910         u64 page_start;
911
912         if ((offset & (blocksize - 1)) == 0)
913                 goto out;
914
915         ret = -ENOMEM;
916         page = grab_cache_page(mapping, index);
917         if (!page)
918                 goto out;
919         if (!PageUptodate(page)) {
920                 ret = btrfs_readpage(NULL, page);
921                 lock_page(page);
922                 if (!PageUptodate(page)) {
923                         ret = -EIO;
924                         goto out;
925                 }
926         }
927         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
928
929         ret = btrfs_cow_one_page(inode, page, offset);
930
931         unlock_page(page);
932         page_cache_release(page);
933 out:
934         return ret;
935 }
936
937 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
938 {
939         struct inode *inode = dentry->d_inode;
940         int err;
941
942         err = inode_change_ok(inode, attr);
943         if (err)
944                 return err;
945
946         if (S_ISREG(inode->i_mode) &&
947             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
948                 struct btrfs_trans_handle *trans;
949                 struct btrfs_root *root = BTRFS_I(inode)->root;
950                 struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
951
952                 u64 mask = root->sectorsize - 1;
953                 u64 pos = (inode->i_size + mask) & ~mask;
954                 u64 block_end = attr->ia_size | mask;
955                 u64 hole_size;
956                 u64 alloc_hint = 0;
957
958                 if (attr->ia_size <= pos)
959                         goto out;
960
961                 mutex_lock(&root->fs_info->fs_mutex);
962                 err = btrfs_check_free_space(root, 1, 0);
963                 mutex_unlock(&root->fs_info->fs_mutex);
964                 if (err)
965                         goto fail;
966
967                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
968
969                 lock_extent(em_tree, pos, block_end, GFP_NOFS);
970                 hole_size = (attr->ia_size - pos + mask) & ~mask;
971
972                 mutex_lock(&root->fs_info->fs_mutex);
973                 trans = btrfs_start_transaction(root, 1);
974                 btrfs_set_trans_block_group(trans, inode);
975                 err = btrfs_drop_extents(trans, root, inode,
976                                          pos, pos + hole_size, pos,
977                                          &alloc_hint);
978
979                 if (alloc_hint != EXTENT_MAP_INLINE) {
980                         err = btrfs_insert_file_extent(trans, root,
981                                                        inode->i_ino,
982                                                        pos, 0, 0, hole_size);
983                 }
984                 btrfs_end_transaction(trans, root);
985                 mutex_unlock(&root->fs_info->fs_mutex);
986                 unlock_extent(em_tree, pos, block_end, GFP_NOFS);
987                 if (err)
988                         return err;
989         }
990 out:
991         err = inode_setattr(inode, attr);
992 fail:
993         return err;
994 }
995
996 void btrfs_drop_inode(struct inode *inode)
997 {
998         if (!BTRFS_I(inode)->ordered_trans || inode->i_nlink) {
999                 generic_drop_inode(inode);
1000                 return;
1001         }
1002         /* FIXME, make sure this delete actually ends up in the transaction */
1003         btrfs_del_ordered_inode(inode);
1004         generic_drop_inode(inode);
1005 }
1006
1007 void btrfs_delete_inode(struct inode *inode)
1008 {
1009         struct btrfs_trans_handle *trans;
1010         struct btrfs_root *root = BTRFS_I(inode)->root;
1011         unsigned long nr;
1012         int ret;
1013
1014         truncate_inode_pages(&inode->i_data, 0);
1015         if (is_bad_inode(inode)) {
1016                 goto no_delete;
1017         }
1018
1019         inode->i_size = 0;
1020         mutex_lock(&root->fs_info->fs_mutex);
1021         trans = btrfs_start_transaction(root, 1);
1022
1023         btrfs_set_trans_block_group(trans, inode);
1024         ret = btrfs_truncate_in_trans(trans, root, inode);
1025         if (ret)
1026                 goto no_delete_lock;
1027         ret = btrfs_delete_xattrs(trans, root, inode);
1028         if (ret)
1029                 goto no_delete_lock;
1030         ret = btrfs_free_inode(trans, root, inode);
1031         if (ret)
1032                 goto no_delete_lock;
1033         nr = trans->blocks_used;
1034
1035         btrfs_end_transaction(trans, root);
1036         mutex_unlock(&root->fs_info->fs_mutex);
1037         btrfs_btree_balance_dirty(root, nr);
1038         btrfs_throttle(root);
1039         return;
1040
1041 no_delete_lock:
1042         nr = trans->blocks_used;
1043         btrfs_end_transaction(trans, root);
1044         mutex_unlock(&root->fs_info->fs_mutex);
1045         btrfs_btree_balance_dirty(root, nr);
1046         btrfs_throttle(root);
1047 no_delete:
1048         clear_inode(inode);
1049 }
1050
1051 /*
1052  * this returns the key found in the dir entry in the location pointer.
1053  * If no dir entries were found, location->objectid is 0.
1054  */
1055 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
1056                                struct btrfs_key *location)
1057 {
1058         const char *name = dentry->d_name.name;
1059         int namelen = dentry->d_name.len;
1060         struct btrfs_dir_item *di;
1061         struct btrfs_path *path;
1062         struct btrfs_root *root = BTRFS_I(dir)->root;
1063         int ret = 0;
1064
1065         if (namelen == 1 && strcmp(name, ".") == 0) {
1066                 location->objectid = dir->i_ino;
1067                 location->type = BTRFS_INODE_ITEM_KEY;
1068                 location->offset = 0;
1069                 return 0;
1070         }
1071         path = btrfs_alloc_path();
1072         BUG_ON(!path);
1073
1074         if (namelen == 2 && strcmp(name, "..") == 0) {
1075                 struct btrfs_key key;
1076                 struct extent_buffer *leaf;
1077                 u32 nritems;
1078                 int slot;
1079
1080                 key.objectid = dir->i_ino;
1081                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1082                 key.offset = 0;
1083                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1084                 BUG_ON(ret == 0);
1085                 ret = 0;
1086
1087                 leaf = path->nodes[0];
1088                 slot = path->slots[0];
1089                 nritems = btrfs_header_nritems(leaf);
1090                 if (slot >= nritems)
1091                         goto out_err;
1092
1093                 btrfs_item_key_to_cpu(leaf, &key, slot);
1094                 if (key.objectid != dir->i_ino ||
1095                     key.type != BTRFS_INODE_REF_KEY) {
1096                         goto out_err;
1097                 }
1098                 location->objectid = key.offset;
1099                 location->type = BTRFS_INODE_ITEM_KEY;
1100                 location->offset = 0;
1101                 goto out;
1102         }
1103
1104         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
1105                                     namelen, 0);
1106         if (IS_ERR(di))
1107                 ret = PTR_ERR(di);
1108         if (!di || IS_ERR(di)) {
1109                 goto out_err;
1110         }
1111         btrfs_dir_item_key_to_cpu(path->nodes[0], di, location);
1112 out:
1113         btrfs_free_path(path);
1114         return ret;
1115 out_err:
1116         location->objectid = 0;
1117         goto out;
1118 }
1119
1120 /*
1121  * when we hit a tree root in a directory, the btrfs part of the inode
1122  * needs to be changed to reflect the root directory of the tree root.  This
1123  * is kind of like crossing a mount point.
1124  */
1125 static int fixup_tree_root_location(struct btrfs_root *root,
1126                              struct btrfs_key *location,
1127                              struct btrfs_root **sub_root,
1128                              struct dentry *dentry)
1129 {
1130         struct btrfs_path *path;
1131         struct btrfs_root_item *ri;
1132
1133         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
1134                 return 0;
1135         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
1136                 return 0;
1137
1138         path = btrfs_alloc_path();
1139         BUG_ON(!path);
1140         mutex_lock(&root->fs_info->fs_mutex);
1141
1142         *sub_root = btrfs_read_fs_root(root->fs_info, location,
1143                                         dentry->d_name.name,
1144                                         dentry->d_name.len);
1145         if (IS_ERR(*sub_root))
1146                 return PTR_ERR(*sub_root);
1147
1148         ri = &(*sub_root)->root_item;
1149         location->objectid = btrfs_root_dirid(ri);
1150         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1151         location->offset = 0;
1152
1153         btrfs_free_path(path);
1154         mutex_unlock(&root->fs_info->fs_mutex);
1155         return 0;
1156 }
1157
1158 static int btrfs_init_locked_inode(struct inode *inode, void *p)
1159 {
1160         struct btrfs_iget_args *args = p;
1161         inode->i_ino = args->ino;
1162         BTRFS_I(inode)->root = args->root;
1163         extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1164                              inode->i_mapping, GFP_NOFS);
1165         return 0;
1166 }
1167
1168 static int btrfs_find_actor(struct inode *inode, void *opaque)
1169 {
1170         struct btrfs_iget_args *args = opaque;
1171         return (args->ino == inode->i_ino &&
1172                 args->root == BTRFS_I(inode)->root);
1173 }
1174
1175 struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
1176                             u64 root_objectid)
1177 {
1178         struct btrfs_iget_args args;
1179         args.ino = objectid;
1180         args.root = btrfs_lookup_fs_root(btrfs_sb(s)->fs_info, root_objectid);
1181
1182         if (!args.root)
1183                 return NULL;
1184
1185         return ilookup5(s, objectid, btrfs_find_actor, (void *)&args);
1186 }
1187
1188 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
1189                                 struct btrfs_root *root)
1190 {
1191         struct inode *inode;
1192         struct btrfs_iget_args args;
1193         args.ino = objectid;
1194         args.root = root;
1195
1196         inode = iget5_locked(s, objectid, btrfs_find_actor,
1197                              btrfs_init_locked_inode,
1198                              (void *)&args);
1199         return inode;
1200 }
1201
1202 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
1203                                    struct nameidata *nd)
1204 {
1205         struct inode * inode;
1206         struct btrfs_inode *bi = BTRFS_I(dir);
1207         struct btrfs_root *root = bi->root;
1208         struct btrfs_root *sub_root = root;
1209         struct btrfs_key location;
1210         int ret;
1211
1212         if (dentry->d_name.len > BTRFS_NAME_LEN)
1213                 return ERR_PTR(-ENAMETOOLONG);
1214
1215         mutex_lock(&root->fs_info->fs_mutex);
1216         ret = btrfs_inode_by_name(dir, dentry, &location);
1217         mutex_unlock(&root->fs_info->fs_mutex);
1218
1219         if (ret < 0)
1220                 return ERR_PTR(ret);
1221
1222         inode = NULL;
1223         if (location.objectid) {
1224                 ret = fixup_tree_root_location(root, &location, &sub_root,
1225                                                 dentry);
1226                 if (ret < 0)
1227                         return ERR_PTR(ret);
1228                 if (ret > 0)
1229                         return ERR_PTR(-ENOENT);
1230                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
1231                                           sub_root);
1232                 if (!inode)
1233                         return ERR_PTR(-EACCES);
1234                 if (inode->i_state & I_NEW) {
1235                         /* the inode and parent dir are two different roots */
1236                         if (sub_root != root) {
1237                                 igrab(inode);
1238                                 sub_root->inode = inode;
1239                         }
1240                         BTRFS_I(inode)->root = sub_root;
1241                         memcpy(&BTRFS_I(inode)->location, &location,
1242                                sizeof(location));
1243                         btrfs_read_locked_inode(inode);
1244                         unlock_new_inode(inode);
1245                 }
1246         }
1247         return d_splice_alias(inode, dentry);
1248 }
1249
1250 static unsigned char btrfs_filetype_table[] = {
1251         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
1252 };
1253
1254 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
1255 {
1256         struct inode *inode = filp->f_dentry->d_inode;
1257         struct btrfs_root *root = BTRFS_I(inode)->root;
1258         struct btrfs_item *item;
1259         struct btrfs_dir_item *di;
1260         struct btrfs_key key;
1261         struct btrfs_key found_key;
1262         struct btrfs_path *path;
1263         int ret;
1264         u32 nritems;
1265         struct extent_buffer *leaf;
1266         int slot;
1267         int advance;
1268         unsigned char d_type;
1269         int over = 0;
1270         u32 di_cur;
1271         u32 di_total;
1272         u32 di_len;
1273         int key_type = BTRFS_DIR_INDEX_KEY;
1274         char tmp_name[32];
1275         char *name_ptr;
1276         int name_len;
1277
1278         /* FIXME, use a real flag for deciding about the key type */
1279         if (root->fs_info->tree_root == root)
1280                 key_type = BTRFS_DIR_ITEM_KEY;
1281
1282         /* special case for "." */
1283         if (filp->f_pos == 0) {
1284                 over = filldir(dirent, ".", 1,
1285                                1, inode->i_ino,
1286                                DT_DIR);
1287                 if (over)
1288                         return 0;
1289                 filp->f_pos = 1;
1290         }
1291
1292         mutex_lock(&root->fs_info->fs_mutex);
1293         key.objectid = inode->i_ino;
1294         path = btrfs_alloc_path();
1295         path->reada = 2;
1296
1297         /* special case for .., just use the back ref */
1298         if (filp->f_pos == 1) {
1299                 btrfs_set_key_type(&key, BTRFS_INODE_REF_KEY);
1300                 key.offset = 0;
1301                 ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1302                 BUG_ON(ret == 0);
1303                 leaf = path->nodes[0];
1304                 slot = path->slots[0];
1305                 nritems = btrfs_header_nritems(leaf);
1306                 if (slot >= nritems) {
1307                         btrfs_release_path(root, path);
1308                         goto read_dir_items;
1309                 }
1310                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1311                 btrfs_release_path(root, path);
1312                 if (found_key.objectid != key.objectid ||
1313                     found_key.type != BTRFS_INODE_REF_KEY)
1314                         goto read_dir_items;
1315                 over = filldir(dirent, "..", 2,
1316                                2, found_key.offset, DT_DIR);
1317                 if (over)
1318                         goto nopos;
1319                 filp->f_pos = 2;
1320         }
1321
1322 read_dir_items:
1323         btrfs_set_key_type(&key, key_type);
1324         key.offset = filp->f_pos;
1325
1326         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1327         if (ret < 0)
1328                 goto err;
1329         advance = 0;
1330         while(1) {
1331                 leaf = path->nodes[0];
1332                 nritems = btrfs_header_nritems(leaf);
1333                 slot = path->slots[0];
1334                 if (advance || slot >= nritems) {
1335                         if (slot >= nritems -1) {
1336                                 ret = btrfs_next_leaf(root, path);
1337                                 if (ret)
1338                                         break;
1339                                 leaf = path->nodes[0];
1340                                 nritems = btrfs_header_nritems(leaf);
1341                                 slot = path->slots[0];
1342                         } else {
1343                                 slot++;
1344                                 path->slots[0]++;
1345                         }
1346                 }
1347                 advance = 1;
1348                 item = btrfs_item_nr(leaf, slot);
1349                 btrfs_item_key_to_cpu(leaf, &found_key, slot);
1350
1351                 if (found_key.objectid != key.objectid)
1352                         break;
1353                 if (btrfs_key_type(&found_key) != key_type)
1354                         break;
1355                 if (found_key.offset < filp->f_pos)
1356                         continue;
1357
1358                 filp->f_pos = found_key.offset;
1359                 advance = 1;
1360                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
1361                 di_cur = 0;
1362                 di_total = btrfs_item_size(leaf, item);
1363                 while(di_cur < di_total) {
1364                         struct btrfs_key location;
1365
1366                         name_len = btrfs_dir_name_len(leaf, di);
1367                         if (name_len < 32) {
1368                                 name_ptr = tmp_name;
1369                         } else {
1370                                 name_ptr = kmalloc(name_len, GFP_NOFS);
1371                                 BUG_ON(!name_ptr);
1372                         }
1373                         read_extent_buffer(leaf, name_ptr,
1374                                            (unsigned long)(di + 1), name_len);
1375
1376                         d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
1377                         btrfs_dir_item_key_to_cpu(leaf, di, &location);
1378                         over = filldir(dirent, name_ptr, name_len,
1379                                        found_key.offset,
1380                                        location.objectid,
1381                                        d_type);
1382
1383                         if (name_ptr != tmp_name)
1384                                 kfree(name_ptr);
1385
1386                         if (over)
1387                                 goto nopos;
1388                         di_len = btrfs_dir_name_len(leaf, di) +
1389                                 btrfs_dir_data_len(leaf, di) +sizeof(*di);
1390                         di_cur += di_len;
1391                         di = (struct btrfs_dir_item *)((char *)di + di_len);
1392                 }
1393         }
1394         filp->f_pos++;
1395 nopos:
1396         ret = 0;
1397 err:
1398         btrfs_release_path(root, path);
1399         btrfs_free_path(path);
1400         mutex_unlock(&root->fs_info->fs_mutex);
1401         return ret;
1402 }
1403
1404 int btrfs_write_inode(struct inode *inode, int wait)
1405 {
1406         struct btrfs_root *root = BTRFS_I(inode)->root;
1407         struct btrfs_trans_handle *trans;
1408         int ret = 0;
1409
1410         if (wait) {
1411                 mutex_lock(&root->fs_info->fs_mutex);
1412                 trans = btrfs_start_transaction(root, 1);
1413                 btrfs_set_trans_block_group(trans, inode);
1414                 ret = btrfs_commit_transaction(trans, root);
1415                 mutex_unlock(&root->fs_info->fs_mutex);
1416         }
1417         return ret;
1418 }
1419
1420 /*
1421  * This is somewhat expensive, updating the tree every time the
1422  * inode changes.  But, it is most likely to find the inode in cache.
1423  * FIXME, needs more benchmarking...there are no reasons other than performance
1424  * to keep or drop this code.
1425  */
1426 void btrfs_dirty_inode(struct inode *inode)
1427 {
1428         struct btrfs_root *root = BTRFS_I(inode)->root;
1429         struct btrfs_trans_handle *trans;
1430
1431         mutex_lock(&root->fs_info->fs_mutex);
1432         trans = btrfs_start_transaction(root, 1);
1433         btrfs_set_trans_block_group(trans, inode);
1434         btrfs_update_inode(trans, root, inode);
1435         btrfs_end_transaction(trans, root);
1436         mutex_unlock(&root->fs_info->fs_mutex);
1437 }
1438
1439 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
1440                                      struct btrfs_root *root,
1441                                      u64 objectid,
1442                                      struct btrfs_block_group_cache *group,
1443                                      int mode)
1444 {
1445         struct inode *inode;
1446         struct btrfs_inode_item *inode_item;
1447         struct btrfs_key *location;
1448         struct btrfs_path *path;
1449         int ret;
1450         int owner;
1451
1452         path = btrfs_alloc_path();
1453         BUG_ON(!path);
1454
1455         inode = new_inode(root->fs_info->sb);
1456         if (!inode)
1457                 return ERR_PTR(-ENOMEM);
1458
1459         extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1460                              inode->i_mapping, GFP_NOFS);
1461         BTRFS_I(inode)->root = root;
1462
1463         if (mode & S_IFDIR)
1464                 owner = 0;
1465         else
1466                 owner = 1;
1467         group = btrfs_find_block_group(root, group, 0, 0, owner);
1468         BTRFS_I(inode)->block_group = group;
1469         BTRFS_I(inode)->flags = 0;
1470         ret = btrfs_insert_empty_inode(trans, root, path, objectid);
1471         if (ret)
1472                 goto fail;
1473
1474         inode->i_uid = current->fsuid;
1475         inode->i_gid = current->fsgid;
1476         inode->i_mode = mode;
1477         inode->i_ino = objectid;
1478         inode->i_blocks = 0;
1479         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
1480         inode_item = btrfs_item_ptr(path->nodes[0], path->slots[0],
1481                                   struct btrfs_inode_item);
1482         fill_inode_item(path->nodes[0], inode_item, inode);
1483         btrfs_mark_buffer_dirty(path->nodes[0]);
1484         btrfs_free_path(path);
1485
1486         location = &BTRFS_I(inode)->location;
1487         location->objectid = objectid;
1488         location->offset = 0;
1489         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1490
1491         insert_inode_hash(inode);
1492         return inode;
1493 fail:
1494         btrfs_free_path(path);
1495         return ERR_PTR(ret);
1496 }
1497
1498 static inline u8 btrfs_inode_type(struct inode *inode)
1499 {
1500         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1501 }
1502
1503 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1504                             struct dentry *dentry, struct inode *inode)
1505 {
1506         int ret;
1507         struct btrfs_key key;
1508         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1509         struct inode *parent_inode;
1510
1511         key.objectid = inode->i_ino;
1512         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1513         key.offset = 0;
1514
1515         ret = btrfs_insert_dir_item(trans, root,
1516                                     dentry->d_name.name, dentry->d_name.len,
1517                                     dentry->d_parent->d_inode->i_ino,
1518                                     &key, btrfs_inode_type(inode));
1519         if (ret == 0) {
1520                 ret = btrfs_insert_inode_ref(trans, root,
1521                                      dentry->d_name.name,
1522                                      dentry->d_name.len,
1523                                      inode->i_ino,
1524                                      dentry->d_parent->d_inode->i_ino);
1525                 parent_inode = dentry->d_parent->d_inode;
1526                 parent_inode->i_size += dentry->d_name.len * 2;
1527                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1528                 ret = btrfs_update_inode(trans, root,
1529                                          dentry->d_parent->d_inode);
1530         }
1531         return ret;
1532 }
1533
1534 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1535                             struct dentry *dentry, struct inode *inode)
1536 {
1537         int err = btrfs_add_link(trans, dentry, inode);
1538         if (!err) {
1539                 d_instantiate(dentry, inode);
1540                 return 0;
1541         }
1542         if (err > 0)
1543                 err = -EEXIST;
1544         return err;
1545 }
1546
1547 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1548                         int mode, dev_t rdev)
1549 {
1550         struct btrfs_trans_handle *trans;
1551         struct btrfs_root *root = BTRFS_I(dir)->root;
1552         struct inode *inode = NULL;
1553         int err;
1554         int drop_inode = 0;
1555         u64 objectid;
1556         unsigned long nr = 0;
1557
1558         if (!new_valid_dev(rdev))
1559                 return -EINVAL;
1560
1561         mutex_lock(&root->fs_info->fs_mutex);
1562         err = btrfs_check_free_space(root, 1, 0);
1563         if (err)
1564                 goto fail;
1565
1566         trans = btrfs_start_transaction(root, 1);
1567         btrfs_set_trans_block_group(trans, dir);
1568
1569         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1570         if (err) {
1571                 err = -ENOSPC;
1572                 goto out_unlock;
1573         }
1574
1575         inode = btrfs_new_inode(trans, root, objectid,
1576                                 BTRFS_I(dir)->block_group, mode);
1577         err = PTR_ERR(inode);
1578         if (IS_ERR(inode))
1579                 goto out_unlock;
1580
1581         btrfs_set_trans_block_group(trans, inode);
1582         err = btrfs_add_nondir(trans, dentry, inode);
1583         if (err)
1584                 drop_inode = 1;
1585         else {
1586                 inode->i_op = &btrfs_special_inode_operations;
1587                 init_special_inode(inode, inode->i_mode, rdev);
1588                 btrfs_update_inode(trans, root, inode);
1589         }
1590         dir->i_sb->s_dirt = 1;
1591         btrfs_update_inode_block_group(trans, inode);
1592         btrfs_update_inode_block_group(trans, dir);
1593 out_unlock:
1594         nr = trans->blocks_used;
1595         btrfs_end_transaction(trans, root);
1596 fail:
1597         mutex_unlock(&root->fs_info->fs_mutex);
1598
1599         if (drop_inode) {
1600                 inode_dec_link_count(inode);
1601                 iput(inode);
1602         }
1603         btrfs_btree_balance_dirty(root, nr);
1604         btrfs_throttle(root);
1605         return err;
1606 }
1607
1608 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1609                         int mode, struct nameidata *nd)
1610 {
1611         struct btrfs_trans_handle *trans;
1612         struct btrfs_root *root = BTRFS_I(dir)->root;
1613         struct inode *inode = NULL;
1614         int err;
1615         int drop_inode = 0;
1616         unsigned long nr = 0;
1617         u64 objectid;
1618
1619         mutex_lock(&root->fs_info->fs_mutex);
1620         err = btrfs_check_free_space(root, 1, 0);
1621         if (err)
1622                 goto fail;
1623         trans = btrfs_start_transaction(root, 1);
1624         btrfs_set_trans_block_group(trans, dir);
1625
1626         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1627         if (err) {
1628                 err = -ENOSPC;
1629                 goto out_unlock;
1630         }
1631
1632         inode = btrfs_new_inode(trans, root, objectid,
1633                                 BTRFS_I(dir)->block_group, mode);
1634         err = PTR_ERR(inode);
1635         if (IS_ERR(inode))
1636                 goto out_unlock;
1637
1638         btrfs_set_trans_block_group(trans, inode);
1639         err = btrfs_add_nondir(trans, dentry, inode);
1640         if (err)
1641                 drop_inode = 1;
1642         else {
1643                 inode->i_mapping->a_ops = &btrfs_aops;
1644                 inode->i_fop = &btrfs_file_operations;
1645                 inode->i_op = &btrfs_file_inode_operations;
1646                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1647                                      inode->i_mapping, GFP_NOFS);
1648                 BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
1649         }
1650         dir->i_sb->s_dirt = 1;
1651         btrfs_update_inode_block_group(trans, inode);
1652         btrfs_update_inode_block_group(trans, dir);
1653 out_unlock:
1654         nr = trans->blocks_used;
1655         btrfs_end_transaction(trans, root);
1656 fail:
1657         mutex_unlock(&root->fs_info->fs_mutex);
1658
1659         if (drop_inode) {
1660                 inode_dec_link_count(inode);
1661                 iput(inode);
1662         }
1663         btrfs_btree_balance_dirty(root, nr);
1664         btrfs_throttle(root);
1665         return err;
1666 }
1667
1668 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1669                       struct dentry *dentry)
1670 {
1671         struct btrfs_trans_handle *trans;
1672         struct btrfs_root *root = BTRFS_I(dir)->root;
1673         struct inode *inode = old_dentry->d_inode;
1674         unsigned long nr = 0;
1675         int err;
1676         int drop_inode = 0;
1677
1678         if (inode->i_nlink == 0)
1679                 return -ENOENT;
1680
1681 #if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
1682         inode->i_nlink++;
1683 #else
1684         inc_nlink(inode);
1685 #endif
1686         mutex_lock(&root->fs_info->fs_mutex);
1687         err = btrfs_check_free_space(root, 1, 0);
1688         if (err)
1689                 goto fail;
1690         trans = btrfs_start_transaction(root, 1);
1691
1692         btrfs_set_trans_block_group(trans, dir);
1693         atomic_inc(&inode->i_count);
1694         err = btrfs_add_nondir(trans, dentry, inode);
1695
1696         if (err)
1697                 drop_inode = 1;
1698
1699         dir->i_sb->s_dirt = 1;
1700         btrfs_update_inode_block_group(trans, dir);
1701         err = btrfs_update_inode(trans, root, inode);
1702
1703         if (err)
1704                 drop_inode = 1;
1705
1706         nr = trans->blocks_used;
1707         btrfs_end_transaction(trans, root);
1708 fail:
1709         mutex_unlock(&root->fs_info->fs_mutex);
1710
1711         if (drop_inode) {
1712                 inode_dec_link_count(inode);
1713                 iput(inode);
1714         }
1715         btrfs_btree_balance_dirty(root, nr);
1716         btrfs_throttle(root);
1717         return err;
1718 }
1719
1720 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1721 {
1722         struct inode *inode;
1723         struct btrfs_trans_handle *trans;
1724         struct btrfs_root *root = BTRFS_I(dir)->root;
1725         int err = 0;
1726         int drop_on_err = 0;
1727         u64 objectid;
1728         unsigned long nr = 1;
1729
1730         mutex_lock(&root->fs_info->fs_mutex);
1731         err = btrfs_check_free_space(root, 1, 0);
1732         if (err)
1733                 goto out_unlock;
1734
1735         trans = btrfs_start_transaction(root, 1);
1736         btrfs_set_trans_block_group(trans, dir);
1737
1738         if (IS_ERR(trans)) {
1739                 err = PTR_ERR(trans);
1740                 goto out_unlock;
1741         }
1742
1743         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1744         if (err) {
1745                 err = -ENOSPC;
1746                 goto out_unlock;
1747         }
1748
1749         inode = btrfs_new_inode(trans, root, objectid,
1750                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1751         if (IS_ERR(inode)) {
1752                 err = PTR_ERR(inode);
1753                 goto out_fail;
1754         }
1755
1756         drop_on_err = 1;
1757         inode->i_op = &btrfs_dir_inode_operations;
1758         inode->i_fop = &btrfs_dir_file_operations;
1759         btrfs_set_trans_block_group(trans, inode);
1760
1761         inode->i_size = 0;
1762         err = btrfs_update_inode(trans, root, inode);
1763         if (err)
1764                 goto out_fail;
1765
1766         err = btrfs_add_link(trans, dentry, inode);
1767         if (err)
1768                 goto out_fail;
1769
1770         d_instantiate(dentry, inode);
1771         drop_on_err = 0;
1772         dir->i_sb->s_dirt = 1;
1773         btrfs_update_inode_block_group(trans, inode);
1774         btrfs_update_inode_block_group(trans, dir);
1775
1776 out_fail:
1777         nr = trans->blocks_used;
1778         btrfs_end_transaction(trans, root);
1779
1780 out_unlock:
1781         mutex_unlock(&root->fs_info->fs_mutex);
1782         if (drop_on_err)
1783                 iput(inode);
1784         btrfs_btree_balance_dirty(root, nr);
1785         btrfs_throttle(root);
1786         return err;
1787 }
1788
1789 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
1790                                     size_t page_offset, u64 start, u64 end,
1791                                     int create)
1792 {
1793         int ret;
1794         int err = 0;
1795         u64 bytenr;
1796         u64 extent_start = 0;
1797         u64 extent_end = 0;
1798         u64 objectid = inode->i_ino;
1799         u32 found_type;
1800         int failed_insert = 0;
1801         struct btrfs_path *path;
1802         struct btrfs_root *root = BTRFS_I(inode)->root;
1803         struct btrfs_file_extent_item *item;
1804         struct extent_buffer *leaf;
1805         struct btrfs_key found_key;
1806         struct extent_map *em = NULL;
1807         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1808         struct btrfs_trans_handle *trans = NULL;
1809
1810         path = btrfs_alloc_path();
1811         BUG_ON(!path);
1812         mutex_lock(&root->fs_info->fs_mutex);
1813
1814 again:
1815         em = lookup_extent_mapping(em_tree, start, end);
1816         if (em) {
1817                 if (em->start > start) {
1818                         printk("get_extent start %Lu em start %Lu\n",
1819                                start, em->start);
1820                         WARN_ON(1);
1821                 }
1822                 goto out;
1823         }
1824         if (!em) {
1825                 em = alloc_extent_map(GFP_NOFS);
1826                 if (!em) {
1827                         err = -ENOMEM;
1828                         goto out;
1829                 }
1830                 em->start = EXTENT_MAP_HOLE;
1831                 em->end = EXTENT_MAP_HOLE;
1832         }
1833         em->bdev = inode->i_sb->s_bdev;
1834         ret = btrfs_lookup_file_extent(trans, root, path,
1835                                        objectid, start, trans != NULL);
1836         if (ret < 0) {
1837                 err = ret;
1838                 goto out;
1839         }
1840
1841         if (ret != 0) {
1842                 if (path->slots[0] == 0)
1843                         goto not_found;
1844                 path->slots[0]--;
1845         }
1846
1847         leaf = path->nodes[0];
1848         item = btrfs_item_ptr(leaf, path->slots[0],
1849                               struct btrfs_file_extent_item);
1850         /* are we inside the extent that was found? */
1851         btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
1852         found_type = btrfs_key_type(&found_key);
1853         if (found_key.objectid != objectid ||
1854             found_type != BTRFS_EXTENT_DATA_KEY) {
1855                 goto not_found;
1856         }
1857
1858         found_type = btrfs_file_extent_type(leaf, item);
1859         extent_start = found_key.offset;
1860         if (found_type == BTRFS_FILE_EXTENT_REG) {
1861                 extent_end = extent_start +
1862                        btrfs_file_extent_num_bytes(leaf, item);
1863                 err = 0;
1864                 if (start < extent_start || start >= extent_end) {
1865                         em->start = start;
1866                         if (start < extent_start) {
1867                                 if (end < extent_start)
1868                                         goto not_found;
1869                                 em->end = extent_end - 1;
1870                         } else {
1871                                 em->end = end;
1872                         }
1873                         goto not_found_em;
1874                 }
1875                 bytenr = btrfs_file_extent_disk_bytenr(leaf, item);
1876                 if (bytenr == 0) {
1877                         em->start = extent_start;
1878                         em->end = extent_end - 1;
1879                         em->block_start = EXTENT_MAP_HOLE;
1880                         em->block_end = EXTENT_MAP_HOLE;
1881                         goto insert;
1882                 }
1883                 bytenr += btrfs_file_extent_offset(leaf, item);
1884                 em->block_start = bytenr;
1885                 em->block_end = em->block_start +
1886                         btrfs_file_extent_num_bytes(leaf, item) - 1;
1887                 em->start = extent_start;
1888                 em->end = extent_end - 1;
1889                 goto insert;
1890         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1891                 unsigned long ptr;
1892                 char *map;
1893                 size_t size;
1894                 size_t extent_offset;
1895                 size_t copy_size;
1896
1897                 size = btrfs_file_extent_inline_len(leaf, btrfs_item_nr(leaf,
1898                                                     path->slots[0]));
1899                 extent_end = (extent_start + size - 1) |
1900                         ((u64)root->sectorsize - 1);
1901                 if (start < extent_start || start >= extent_end) {
1902                         em->start = start;
1903                         if (start < extent_start) {
1904                                 if (end < extent_start)
1905                                         goto not_found;
1906                                 em->end = extent_end;
1907                         } else {
1908                                 em->end = end;
1909                         }
1910                         goto not_found_em;
1911                 }
1912                 em->block_start = EXTENT_MAP_INLINE;
1913                 em->block_end = EXTENT_MAP_INLINE;
1914
1915                 if (!page) {
1916                         em->start = extent_start;
1917                         em->end = extent_start + size - 1;
1918                         goto out;
1919                 }
1920
1921                 extent_offset = ((u64)page->index << PAGE_CACHE_SHIFT) -
1922                         extent_start + page_offset;
1923                 copy_size = min_t(u64, PAGE_CACHE_SIZE - page_offset,
1924                                 size - extent_offset);
1925                 em->start = extent_start + extent_offset;
1926                 em->end = (em->start + copy_size -1) |
1927                         ((u64)root->sectorsize -1);
1928                 map = kmap(page);
1929                 ptr = btrfs_file_extent_inline_start(item) + extent_offset;
1930                 if (create == 0 && !PageUptodate(page)) {
1931                         read_extent_buffer(leaf, map + page_offset, ptr,
1932                                            copy_size);
1933                         flush_dcache_page(page);
1934                 } else if (create && PageUptodate(page)) {
1935                         if (!trans) {
1936                                 kunmap(page);
1937                                 free_extent_map(em);
1938                                 em = NULL;
1939                                 btrfs_release_path(root, path);
1940                                 trans = btrfs_start_transaction(root, 1);
1941                                 goto again;
1942                         }
1943                         write_extent_buffer(leaf, map + page_offset, ptr,
1944                                             copy_size);
1945                         btrfs_mark_buffer_dirty(leaf);
1946                 }
1947                 kunmap(page);
1948                 set_extent_uptodate(em_tree, em->start, em->end, GFP_NOFS);
1949                 goto insert;
1950         } else {
1951                 printk("unkknown found_type %d\n", found_type);
1952                 WARN_ON(1);
1953         }
1954 not_found:
1955         em->start = start;
1956         em->end = end;
1957 not_found_em:
1958         em->block_start = EXTENT_MAP_HOLE;
1959         em->block_end = EXTENT_MAP_HOLE;
1960 insert:
1961         btrfs_release_path(root, path);
1962         if (em->start > start || em->end < start) {
1963                 printk("bad extent! em: [%Lu %Lu] passed [%Lu %Lu]\n", em->start, em->end, start, end);
1964                 err = -EIO;
1965                 goto out;
1966         }
1967         ret = add_extent_mapping(em_tree, em);
1968         if (ret == -EEXIST) {
1969                 free_extent_map(em);
1970                 em = NULL;
1971                 if (0 && failed_insert == 1) {
1972                         btrfs_drop_extent_cache(inode, start, end);
1973                 }
1974                 failed_insert++;
1975                 if (failed_insert > 5) {
1976                         printk("failing to insert %Lu %Lu\n", start, end);
1977                         err = -EIO;
1978                         goto out;
1979                 }
1980                 goto again;
1981         }
1982         err = 0;
1983 out:
1984         btrfs_free_path(path);
1985         if (trans) {
1986                 ret = btrfs_end_transaction(trans, root);
1987                 if (!err)
1988                         err = ret;
1989         }
1990         mutex_unlock(&root->fs_info->fs_mutex);
1991         if (err) {
1992                 free_extent_map(em);
1993                 WARN_ON(1);
1994                 return ERR_PTR(err);
1995         }
1996         return em;
1997 }
1998
1999 static sector_t btrfs_bmap(struct address_space *mapping, sector_t iblock)
2000 {
2001         return extent_bmap(mapping, iblock, btrfs_get_extent);
2002 }
2003
2004 int btrfs_readpage(struct file *file, struct page *page)
2005 {
2006         struct extent_map_tree *tree;
2007         tree = &BTRFS_I(page->mapping->host)->extent_tree;
2008         return extent_read_full_page(tree, page, btrfs_get_extent);
2009 }
2010
2011 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
2012 {
2013         struct extent_map_tree *tree;
2014
2015
2016         if (current->flags & PF_MEMALLOC) {
2017                 redirty_page_for_writepage(wbc, page);
2018                 unlock_page(page);
2019                 return 0;
2020         }
2021         tree = &BTRFS_I(page->mapping->host)->extent_tree;
2022         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
2023 }
2024
2025 static int btrfs_writepages(struct address_space *mapping,
2026                             struct writeback_control *wbc)
2027 {
2028         struct extent_map_tree *tree;
2029         tree = &BTRFS_I(mapping->host)->extent_tree;
2030         return extent_writepages(tree, mapping, btrfs_get_extent, wbc);
2031 }
2032
2033 static int
2034 btrfs_readpages(struct file *file, struct address_space *mapping,
2035                 struct list_head *pages, unsigned nr_pages)
2036 {
2037         struct extent_map_tree *tree;
2038         tree = &BTRFS_I(mapping->host)->extent_tree;
2039         return extent_readpages(tree, mapping, pages, nr_pages,
2040                                 btrfs_get_extent);
2041 }
2042
2043 static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
2044 {
2045         struct extent_map_tree *tree;
2046         int ret;
2047
2048         tree = &BTRFS_I(page->mapping->host)->extent_tree;
2049         ret = try_release_extent_mapping(tree, page);
2050         if (ret == 1) {
2051                 ClearPagePrivate(page);
2052                 set_page_private(page, 0);
2053                 page_cache_release(page);
2054         }
2055         return ret;
2056 }
2057
2058 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
2059 {
2060         struct extent_map_tree *tree;
2061
2062         tree = &BTRFS_I(page->mapping->host)->extent_tree;
2063         extent_invalidatepage(tree, page, offset);
2064         btrfs_releasepage(page, GFP_NOFS);
2065 }
2066
2067 /*
2068  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
2069  * called from a page fault handler when a page is first dirtied. Hence we must
2070  * be careful to check for EOF conditions here. We set the page up correctly
2071  * for a written page which means we get ENOSPC checking when writing into
2072  * holes and correct delalloc and unwritten extent mapping on filesystems that
2073  * support these features.
2074  *
2075  * We are not allowed to take the i_mutex here so we have to play games to
2076  * protect against truncate races as the page could now be beyond EOF.  Because
2077  * vmtruncate() writes the inode size before removing pages, once we have the
2078  * page lock we can determine safely if the page is beyond EOF. If it is not
2079  * beyond EOF, then the page is guaranteed safe against truncation until we
2080  * unlock the page.
2081  */
2082 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
2083 {
2084         struct inode *inode = fdentry(vma->vm_file)->d_inode;
2085         struct btrfs_root *root = BTRFS_I(inode)->root;
2086         unsigned long end;
2087         loff_t size;
2088         int ret;
2089         u64 page_start;
2090
2091         mutex_lock(&root->fs_info->fs_mutex);
2092         ret = btrfs_check_free_space(root, PAGE_CACHE_SIZE, 0);
2093         mutex_unlock(&root->fs_info->fs_mutex);
2094         if (ret)
2095                 goto out;
2096
2097         ret = -EINVAL;
2098
2099         lock_page(page);
2100         wait_on_page_writeback(page);
2101         size = i_size_read(inode);
2102         page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2103
2104         if ((page->mapping != inode->i_mapping) ||
2105             (page_start > size)) {
2106                 /* page got truncated out from underneath us */
2107                 goto out_unlock;
2108         }
2109
2110         /* page is wholly or partially inside EOF */
2111         if (page_start + PAGE_CACHE_SIZE > size)
2112                 end = size & ~PAGE_CACHE_MASK;
2113         else
2114                 end = PAGE_CACHE_SIZE;
2115
2116         ret = btrfs_cow_one_page(inode, page, end);
2117
2118 out_unlock:
2119         unlock_page(page);
2120 out:
2121         return ret;
2122 }
2123
2124 static void btrfs_truncate(struct inode *inode)
2125 {
2126         struct btrfs_root *root = BTRFS_I(inode)->root;
2127         int ret;
2128         struct btrfs_trans_handle *trans;
2129         unsigned long nr;
2130
2131         if (!S_ISREG(inode->i_mode))
2132                 return;
2133         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
2134                 return;
2135
2136         btrfs_truncate_page(inode->i_mapping, inode->i_size);
2137
2138         mutex_lock(&root->fs_info->fs_mutex);
2139         trans = btrfs_start_transaction(root, 1);
2140         btrfs_set_trans_block_group(trans, inode);
2141
2142         /* FIXME, add redo link to tree so we don't leak on crash */
2143         ret = btrfs_truncate_in_trans(trans, root, inode);
2144         btrfs_update_inode(trans, root, inode);
2145         nr = trans->blocks_used;
2146
2147         ret = btrfs_end_transaction(trans, root);
2148         BUG_ON(ret);
2149         mutex_unlock(&root->fs_info->fs_mutex);
2150         btrfs_btree_balance_dirty(root, nr);
2151         btrfs_throttle(root);
2152 }
2153
2154 static int noinline create_subvol(struct btrfs_root *root, char *name,
2155                                   int namelen)
2156 {
2157         struct btrfs_trans_handle *trans;
2158         struct btrfs_key key;
2159         struct btrfs_root_item root_item;
2160         struct btrfs_inode_item *inode_item;
2161         struct extent_buffer *leaf;
2162         struct btrfs_root *new_root = root;
2163         struct inode *inode;
2164         struct inode *dir;
2165         int ret;
2166         int err;
2167         u64 objectid;
2168         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
2169         unsigned long nr = 1;
2170
2171         mutex_lock(&root->fs_info->fs_mutex);
2172         ret = btrfs_check_free_space(root, 1, 0);
2173         if (ret)
2174                 goto fail_commit;
2175
2176         trans = btrfs_start_transaction(root, 1);
2177         BUG_ON(!trans);
2178
2179         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
2180                                        0, &objectid);
2181         if (ret)
2182                 goto fail;
2183
2184         leaf = __btrfs_alloc_free_block(trans, root, root->leafsize,
2185                                         objectid, trans->transid, 0, 0,
2186                                         0, 0);
2187         if (IS_ERR(leaf))
2188                 return PTR_ERR(leaf);
2189
2190         btrfs_set_header_nritems(leaf, 0);
2191         btrfs_set_header_level(leaf, 0);
2192         btrfs_set_header_bytenr(leaf, leaf->start);
2193         btrfs_set_header_generation(leaf, trans->transid);
2194         btrfs_set_header_owner(leaf, objectid);
2195
2196         write_extent_buffer(leaf, root->fs_info->fsid,
2197                             (unsigned long)btrfs_header_fsid(leaf),
2198                             BTRFS_FSID_SIZE);
2199         btrfs_mark_buffer_dirty(leaf);
2200
2201         inode_item = &root_item.inode;
2202         memset(inode_item, 0, sizeof(*inode_item));
2203         inode_item->generation = cpu_to_le64(1);
2204         inode_item->size = cpu_to_le64(3);
2205         inode_item->nlink = cpu_to_le32(1);
2206         inode_item->nblocks = cpu_to_le64(1);
2207         inode_item->mode = cpu_to_le32(S_IFDIR | 0755);
2208
2209         btrfs_set_root_bytenr(&root_item, leaf->start);
2210         btrfs_set_root_level(&root_item, 0);
2211         btrfs_set_root_refs(&root_item, 1);
2212         btrfs_set_root_used(&root_item, 0);
2213
2214         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
2215         root_item.drop_level = 0;
2216
2217         free_extent_buffer(leaf);
2218         leaf = NULL;
2219
2220         btrfs_set_root_dirid(&root_item, new_dirid);
2221
2222         key.objectid = objectid;
2223         key.offset = 1;
2224         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
2225         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
2226                                 &root_item);
2227         if (ret)
2228                 goto fail;
2229
2230         /*
2231          * insert the directory item
2232          */
2233         key.offset = (u64)-1;
2234         dir = root->fs_info->sb->s_root->d_inode;
2235         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
2236                                     name, namelen, dir->i_ino, &key,
2237                                     BTRFS_FT_DIR);
2238         if (ret)
2239                 goto fail;
2240
2241         ret = btrfs_insert_inode_ref(trans, root->fs_info->tree_root,
2242                              name, namelen, objectid,
2243                              root->fs_info->sb->s_root->d_inode->i_ino);
2244         if (ret)
2245                 goto fail;
2246
2247         ret = btrfs_commit_transaction(trans, root);
2248         if (ret)
2249                 goto fail_commit;
2250
2251         new_root = btrfs_read_fs_root(root->fs_info, &key, name, namelen);
2252         BUG_ON(!new_root);
2253
2254         trans = btrfs_start_transaction(new_root, 1);
2255         BUG_ON(!trans);
2256
2257         inode = btrfs_new_inode(trans, new_root, new_dirid,
2258                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
2259         if (IS_ERR(inode))
2260                 goto fail;
2261         inode->i_op = &btrfs_dir_inode_operations;
2262         inode->i_fop = &btrfs_dir_file_operations;
2263         new_root->inode = inode;
2264
2265         ret = btrfs_insert_inode_ref(trans, new_root, "..", 2, new_dirid,
2266                                      new_dirid);
2267         inode->i_nlink = 1;
2268         inode->i_size = 0;
2269         ret = btrfs_update_inode(trans, new_root, inode);
2270         if (ret)
2271                 goto fail;
2272 fail:
2273         nr = trans->blocks_used;
2274         err = btrfs_commit_transaction(trans, new_root);
2275         if (err && !ret)
2276                 ret = err;
2277 fail_commit:
2278         mutex_unlock(&root->fs_info->fs_mutex);
2279         btrfs_btree_balance_dirty(root, nr);
2280         btrfs_throttle(root);
2281         return ret;
2282 }
2283
2284 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
2285 {
2286         struct btrfs_pending_snapshot *pending_snapshot;
2287         struct btrfs_trans_handle *trans;
2288         int ret;
2289         int err;
2290         unsigned long nr = 0;
2291
2292         if (!root->ref_cows)
2293                 return -EINVAL;
2294
2295         mutex_lock(&root->fs_info->fs_mutex);
2296         ret = btrfs_check_free_space(root, 1, 0);
2297         if (ret)
2298                 goto fail_unlock;
2299
2300         pending_snapshot = kmalloc(sizeof(*pending_snapshot), GFP_NOFS);
2301         if (!pending_snapshot) {
2302                 ret = -ENOMEM;
2303                 goto fail_unlock;
2304         }
2305         pending_snapshot->name = kstrndup(name, namelen, GFP_NOFS);
2306         if (!pending_snapshot->name) {
2307                 ret = -ENOMEM;
2308                 kfree(pending_snapshot);
2309                 goto fail_unlock;
2310         }
2311         trans = btrfs_start_transaction(root, 1);
2312         BUG_ON(!trans);
2313
2314         pending_snapshot->root = root;
2315         list_add(&pending_snapshot->list,
2316                  &trans->transaction->pending_snapshots);
2317         ret = btrfs_update_inode(trans, root, root->inode);
2318         err = btrfs_commit_transaction(trans, root);
2319
2320 fail_unlock:
2321         mutex_unlock(&root->fs_info->fs_mutex);
2322         btrfs_btree_balance_dirty(root, nr);
2323         btrfs_throttle(root);
2324         return ret;
2325 }
2326
2327 unsigned long btrfs_force_ra(struct address_space *mapping,
2328                               struct file_ra_state *ra, struct file *file,
2329                               pgoff_t offset, pgoff_t last_index)
2330 {
2331         pgoff_t req_size;
2332
2333 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2334         req_size = last_index - offset + 1;
2335         offset = page_cache_readahead(mapping, ra, file, offset, req_size);
2336         return offset;
2337 #else
2338         req_size = min(last_index - offset + 1, (pgoff_t)128);
2339         page_cache_sync_readahead(mapping, ra, file, offset, req_size);
2340         return offset + req_size;
2341 #endif
2342 }
2343
2344 int btrfs_defrag_file(struct file *file) {
2345         struct inode *inode = fdentry(file)->d_inode;
2346         struct btrfs_root *root = BTRFS_I(inode)->root;
2347         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
2348         struct page *page;
2349         unsigned long last_index;
2350         unsigned long ra_index = 0;
2351         u64 page_start;
2352         u64 page_end;
2353         u64 delalloc_start;
2354         u64 existing_delalloc;
2355         unsigned long i;
2356         int ret;
2357
2358         mutex_lock(&root->fs_info->fs_mutex);
2359         ret = btrfs_check_free_space(root, inode->i_size, 0);
2360         mutex_unlock(&root->fs_info->fs_mutex);
2361         if (ret)
2362                 return -ENOSPC;
2363
2364         mutex_lock(&inode->i_mutex);
2365         last_index = inode->i_size >> PAGE_CACHE_SHIFT;
2366         for (i = 0; i <= last_index; i++) {
2367                 if (i == ra_index) {
2368                         ra_index = btrfs_force_ra(inode->i_mapping,
2369                                                   &file->f_ra,
2370                                                   file, ra_index, last_index);
2371                 }
2372                 page = grab_cache_page(inode->i_mapping, i);
2373                 if (!page)
2374                         goto out_unlock;
2375                 if (!PageUptodate(page)) {
2376                         btrfs_readpage(NULL, page);
2377                         lock_page(page);
2378                         if (!PageUptodate(page)) {
2379                                 unlock_page(page);
2380                                 page_cache_release(page);
2381                                 goto out_unlock;
2382                         }
2383                 }
2384                 page_start = (u64)page->index << PAGE_CACHE_SHIFT;
2385                 page_end = page_start + PAGE_CACHE_SIZE - 1;
2386
2387                 lock_extent(em_tree, page_start, page_end, GFP_NOFS);
2388                 delalloc_start = page_start;
2389                 existing_delalloc =
2390                         count_range_bits(&BTRFS_I(inode)->extent_tree,
2391                                          &delalloc_start, page_end,
2392                                          PAGE_CACHE_SIZE, EXTENT_DELALLOC);
2393                 set_extent_delalloc(em_tree, page_start,
2394                                     page_end, GFP_NOFS);
2395
2396                 spin_lock(&root->fs_info->delalloc_lock);
2397                 root->fs_info->delalloc_bytes += PAGE_CACHE_SIZE -
2398                                                  existing_delalloc;
2399                 spin_unlock(&root->fs_info->delalloc_lock);
2400
2401                 unlock_extent(em_tree, page_start, page_end, GFP_NOFS);
2402                 set_page_dirty(page);
2403                 unlock_page(page);
2404                 page_cache_release(page);
2405                 balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1);
2406         }
2407
2408 out_unlock:
2409         mutex_unlock(&inode->i_mutex);
2410         return 0;
2411 }
2412
2413 static int btrfs_ioctl_resize(struct btrfs_root *root, void __user *arg)
2414 {
2415         u64 new_size;
2416         u64 old_size;
2417         struct btrfs_ioctl_vol_args *vol_args;
2418         struct btrfs_trans_handle *trans;
2419         char *sizestr;
2420         int ret = 0;
2421         int namelen;
2422         int mod = 0;
2423
2424         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2425
2426         if (!vol_args)
2427                 return -ENOMEM;
2428
2429         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2430                 ret = -EFAULT;
2431                 goto out;
2432         }
2433         namelen = strlen(vol_args->name);
2434         if (namelen > BTRFS_VOL_NAME_MAX) {
2435                 ret = -EINVAL;
2436                 goto out;
2437         }
2438
2439         sizestr = vol_args->name;
2440         if (!strcmp(sizestr, "max"))
2441                 new_size = root->fs_info->sb->s_bdev->bd_inode->i_size;
2442         else {
2443                 if (sizestr[0] == '-') {
2444                         mod = -1;
2445                         sizestr++;
2446                 } else if (sizestr[0] == '+') {
2447                         mod = 1;
2448                         sizestr++;
2449                 }
2450                 new_size = btrfs_parse_size(sizestr);
2451                 if (new_size == 0) {
2452                         ret = -EINVAL;
2453                         goto out;
2454                 }
2455         }
2456
2457         mutex_lock(&root->fs_info->fs_mutex);
2458         old_size = btrfs_super_total_bytes(&root->fs_info->super_copy);
2459
2460         if (mod < 0) {
2461                 if (new_size > old_size) {
2462                         ret = -EINVAL;
2463                         goto out_unlock;
2464                 }
2465                 new_size = old_size - new_size;
2466         } else if (mod > 0) {
2467                 new_size = old_size + new_size;
2468         }
2469
2470         if (new_size < 256 * 1024 * 1024) {
2471                 ret = -EINVAL;
2472                 goto out_unlock;
2473         }
2474         if (new_size > root->fs_info->sb->s_bdev->bd_inode->i_size) {
2475                 ret = -EFBIG;
2476                 goto out_unlock;
2477         }
2478
2479         do_div(new_size, root->sectorsize);
2480         new_size *= root->sectorsize;
2481
2482 printk("new size is %Lu\n", new_size);
2483         if (new_size > old_size) {
2484                 trans = btrfs_start_transaction(root, 1);
2485                 ret = btrfs_grow_extent_tree(trans, root, new_size);
2486                 btrfs_commit_transaction(trans, root);
2487         } else {
2488                 ret = btrfs_shrink_extent_tree(root, new_size);
2489         }
2490
2491 out_unlock:
2492         mutex_unlock(&root->fs_info->fs_mutex);
2493 out:
2494         kfree(vol_args);
2495         return ret;
2496 }
2497
2498 static int noinline btrfs_ioctl_snap_create(struct btrfs_root *root,
2499                                             void __user *arg)
2500 {
2501         struct btrfs_ioctl_vol_args *vol_args;
2502         struct btrfs_dir_item *di;
2503         struct btrfs_path *path;
2504         u64 root_dirid;
2505         int namelen;
2506         int ret;
2507
2508         vol_args = kmalloc(sizeof(*vol_args), GFP_NOFS);
2509
2510         if (!vol_args)
2511                 return -ENOMEM;
2512
2513         if (copy_from_user(vol_args, arg, sizeof(*vol_args))) {
2514                 ret = -EFAULT;
2515                 goto out;
2516         }
2517
2518         namelen = strlen(vol_args->name);
2519         if (namelen > BTRFS_VOL_NAME_MAX) {
2520                 ret = -EINVAL;
2521                 goto out;
2522         }
2523         if (strchr(vol_args->name, '/')) {
2524                 ret = -EINVAL;
2525                 goto out;
2526         }
2527
2528         path = btrfs_alloc_path();
2529         if (!path) {
2530                 ret = -ENOMEM;
2531                 goto out;
2532         }
2533
2534         root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
2535         mutex_lock(&root->fs_info->fs_mutex);
2536         di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
2537                             path, root_dirid,
2538                             vol_args->name, namelen, 0);
2539         mutex_unlock(&root->fs_info->fs_mutex);
2540         btrfs_free_path(path);
2541
2542         if (di && !IS_ERR(di)) {
2543                 ret = -EEXIST;
2544                 goto out;
2545         }
2546
2547         if (IS_ERR(di)) {
2548                 ret = PTR_ERR(di);
2549                 goto out;
2550         }
2551
2552         if (root == root->fs_info->tree_root)
2553                 ret = create_subvol(root, vol_args->name, namelen);
2554         else
2555                 ret = create_snapshot(root, vol_args->name, namelen);
2556 out:
2557         kfree(vol_args);
2558         return ret;
2559 }
2560
2561 static int btrfs_ioctl_defrag(struct file *file)
2562 {
2563         struct inode *inode = fdentry(file)->d_inode;
2564         struct btrfs_root *root = BTRFS_I(inode)->root;
2565
2566         switch (inode->i_mode & S_IFMT) {
2567         case S_IFDIR:
2568                 mutex_lock(&root->fs_info->fs_mutex);
2569                 btrfs_defrag_root(root, 0);
2570                 btrfs_defrag_root(root->fs_info->extent_root, 0);
2571                 mutex_unlock(&root->fs_info->fs_mutex);
2572                 break;
2573         case S_IFREG:
2574                 btrfs_defrag_file(file);
2575                 break;
2576         }
2577
2578         return 0;
2579 }
2580
2581 long btrfs_ioctl(struct file *file, unsigned int
2582                 cmd, unsigned long arg)
2583 {
2584         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
2585
2586         switch (cmd) {
2587         case BTRFS_IOC_SNAP_CREATE:
2588                 return btrfs_ioctl_snap_create(root, (void __user *)arg);
2589         case BTRFS_IOC_DEFRAG:
2590                 return btrfs_ioctl_defrag(file);
2591         case BTRFS_IOC_RESIZE:
2592                 return btrfs_ioctl_resize(root, (void __user *)arg);
2593         }
2594
2595         return -ENOTTY;
2596 }
2597
2598 /*
2599  * Called inside transaction, so use GFP_NOFS
2600  */
2601 struct inode *btrfs_alloc_inode(struct super_block *sb)
2602 {
2603         struct btrfs_inode *ei;
2604
2605         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2606         if (!ei)
2607                 return NULL;
2608         ei->last_trans = 0;
2609         ei->ordered_trans = 0;
2610         return &ei->vfs_inode;
2611 }
2612
2613 void btrfs_destroy_inode(struct inode *inode)
2614 {
2615         WARN_ON(!list_empty(&inode->i_dentry));
2616         WARN_ON(inode->i_data.nrpages);
2617
2618         btrfs_drop_extent_cache(inode, 0, (u64)-1);
2619         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2620 }
2621
2622 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2623 static void init_once(struct kmem_cache * cachep, void *foo)
2624 #else
2625 static void init_once(void * foo, struct kmem_cache * cachep,
2626                       unsigned long flags)
2627 #endif
2628 {
2629         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2630
2631         inode_init_once(&ei->vfs_inode);
2632 }
2633
2634 void btrfs_destroy_cachep(void)
2635 {
2636         if (btrfs_inode_cachep)
2637                 kmem_cache_destroy(btrfs_inode_cachep);
2638         if (btrfs_trans_handle_cachep)
2639                 kmem_cache_destroy(btrfs_trans_handle_cachep);
2640         if (btrfs_transaction_cachep)
2641                 kmem_cache_destroy(btrfs_transaction_cachep);
2642         if (btrfs_bit_radix_cachep)
2643                 kmem_cache_destroy(btrfs_bit_radix_cachep);
2644         if (btrfs_path_cachep)
2645                 kmem_cache_destroy(btrfs_path_cachep);
2646 }
2647
2648 struct kmem_cache *btrfs_cache_create(const char *name, size_t size,
2649                                        unsigned long extra_flags,
2650 #if LINUX_VERSION_CODE > KERNEL_VERSION(2,6,23)
2651                                        void (*ctor)(struct kmem_cache *, void *)
2652 #else
2653                                        void (*ctor)(void *, struct kmem_cache *,
2654                                                     unsigned long)
2655 #endif
2656                                      )
2657 {
2658         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
2659                                  SLAB_MEM_SPREAD | extra_flags), ctor
2660 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2661                                  ,NULL
2662 #endif
2663                                 );
2664 }
2665
2666 int btrfs_init_cachep(void)
2667 {
2668         btrfs_inode_cachep = btrfs_cache_create("btrfs_inode_cache",
2669                                           sizeof(struct btrfs_inode),
2670                                           0, init_once);
2671         if (!btrfs_inode_cachep)
2672                 goto fail;
2673         btrfs_trans_handle_cachep =
2674                         btrfs_cache_create("btrfs_trans_handle_cache",
2675                                            sizeof(struct btrfs_trans_handle),
2676                                            0, NULL);
2677         if (!btrfs_trans_handle_cachep)
2678                 goto fail;
2679         btrfs_transaction_cachep = btrfs_cache_create("btrfs_transaction_cache",
2680                                              sizeof(struct btrfs_transaction),
2681                                              0, NULL);
2682         if (!btrfs_transaction_cachep)
2683                 goto fail;
2684         btrfs_path_cachep = btrfs_cache_create("btrfs_path_cache",
2685                                          sizeof(struct btrfs_path),
2686                                          0, NULL);
2687         if (!btrfs_path_cachep)
2688                 goto fail;
2689         btrfs_bit_radix_cachep = btrfs_cache_create("btrfs_radix", 256,
2690                                               SLAB_DESTROY_BY_RCU, NULL);
2691         if (!btrfs_bit_radix_cachep)
2692                 goto fail;
2693         return 0;
2694 fail:
2695         btrfs_destroy_cachep();
2696         return -ENOMEM;
2697 }
2698
2699 static int btrfs_getattr(struct vfsmount *mnt,
2700                          struct dentry *dentry, struct kstat *stat)
2701 {
2702         struct inode *inode = dentry->d_inode;
2703         generic_fillattr(inode, stat);
2704         stat->blksize = PAGE_CACHE_SIZE;
2705         return 0;
2706 }
2707
2708 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2709                            struct inode * new_dir,struct dentry *new_dentry)
2710 {
2711         struct btrfs_trans_handle *trans;
2712         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2713         struct inode *new_inode = new_dentry->d_inode;
2714         struct inode *old_inode = old_dentry->d_inode;
2715         struct timespec ctime = CURRENT_TIME;
2716         struct btrfs_path *path;
2717         int ret;
2718
2719         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2720             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2721                 return -ENOTEMPTY;
2722         }
2723
2724         mutex_lock(&root->fs_info->fs_mutex);
2725         ret = btrfs_check_free_space(root, 1, 0);
2726         if (ret)
2727                 goto out_unlock;
2728
2729         trans = btrfs_start_transaction(root, 1);
2730
2731         btrfs_set_trans_block_group(trans, new_dir);
2732         path = btrfs_alloc_path();
2733         if (!path) {
2734                 ret = -ENOMEM;
2735                 goto out_fail;
2736         }
2737
2738         old_dentry->d_inode->i_nlink++;
2739         old_dir->i_ctime = old_dir->i_mtime = ctime;
2740         new_dir->i_ctime = new_dir->i_mtime = ctime;
2741         old_inode->i_ctime = ctime;
2742
2743         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2744         if (ret)
2745                 goto out_fail;
2746
2747         if (new_inode) {
2748                 new_inode->i_ctime = CURRENT_TIME;
2749                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2750                 if (ret)
2751                         goto out_fail;
2752         }
2753         ret = btrfs_add_link(trans, new_dentry, old_inode);
2754         if (ret)
2755                 goto out_fail;
2756
2757 out_fail:
2758         btrfs_free_path(path);
2759         btrfs_end_transaction(trans, root);
2760 out_unlock:
2761         mutex_unlock(&root->fs_info->fs_mutex);
2762         return ret;
2763 }
2764
2765 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2766                          const char *symname)
2767 {
2768         struct btrfs_trans_handle *trans;
2769         struct btrfs_root *root = BTRFS_I(dir)->root;
2770         struct btrfs_path *path;
2771         struct btrfs_key key;
2772         struct inode *inode = NULL;
2773         int err;
2774         int drop_inode = 0;
2775         u64 objectid;
2776         int name_len;
2777         int datasize;
2778         unsigned long ptr;
2779         struct btrfs_file_extent_item *ei;
2780         struct extent_buffer *leaf;
2781         unsigned long nr = 0;
2782
2783         name_len = strlen(symname) + 1;
2784         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2785                 return -ENAMETOOLONG;
2786
2787         mutex_lock(&root->fs_info->fs_mutex);
2788         err = btrfs_check_free_space(root, 1, 0);
2789         if (err)
2790                 goto out_fail;
2791
2792         trans = btrfs_start_transaction(root, 1);
2793         btrfs_set_trans_block_group(trans, dir);
2794
2795         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2796         if (err) {
2797                 err = -ENOSPC;
2798                 goto out_unlock;
2799         }
2800
2801         inode = btrfs_new_inode(trans, root, objectid,
2802                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2803         err = PTR_ERR(inode);
2804         if (IS_ERR(inode))
2805                 goto out_unlock;
2806
2807         btrfs_set_trans_block_group(trans, inode);
2808         err = btrfs_add_nondir(trans, dentry, inode);
2809         if (err)
2810                 drop_inode = 1;
2811         else {
2812                 inode->i_mapping->a_ops = &btrfs_aops;
2813                 inode->i_fop = &btrfs_file_operations;
2814                 inode->i_op = &btrfs_file_inode_operations;
2815                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
2816                                      inode->i_mapping, GFP_NOFS);
2817                 BTRFS_I(inode)->extent_tree.ops = &btrfs_extent_map_ops;
2818         }
2819         dir->i_sb->s_dirt = 1;
2820         btrfs_update_inode_block_group(trans, inode);
2821         btrfs_update_inode_block_group(trans, dir);
2822         if (drop_inode)
2823                 goto out_unlock;
2824
2825         path = btrfs_alloc_path();
2826         BUG_ON(!path);
2827         key.objectid = inode->i_ino;
2828         key.offset = 0;
2829         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2830         datasize = btrfs_file_extent_calc_inline_size(name_len);
2831         err = btrfs_insert_empty_item(trans, root, path, &key,
2832                                       datasize);
2833         if (err) {
2834                 drop_inode = 1;
2835                 goto out_unlock;
2836         }
2837         leaf = path->nodes[0];
2838         ei = btrfs_item_ptr(leaf, path->slots[0],
2839                             struct btrfs_file_extent_item);
2840         btrfs_set_file_extent_generation(leaf, ei, trans->transid);
2841         btrfs_set_file_extent_type(leaf, ei,
2842                                    BTRFS_FILE_EXTENT_INLINE);
2843         ptr = btrfs_file_extent_inline_start(ei);
2844         write_extent_buffer(leaf, symname, ptr, name_len);
2845         btrfs_mark_buffer_dirty(leaf);
2846         btrfs_free_path(path);
2847
2848         inode->i_op = &btrfs_symlink_inode_operations;
2849         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2850         inode->i_size = name_len - 1;
2851         err = btrfs_update_inode(trans, root, inode);
2852         if (err)
2853                 drop_inode = 1;
2854
2855 out_unlock:
2856         nr = trans->blocks_used;
2857         btrfs_end_transaction(trans, root);
2858 out_fail:
2859         mutex_unlock(&root->fs_info->fs_mutex);
2860         if (drop_inode) {
2861                 inode_dec_link_count(inode);
2862                 iput(inode);
2863         }
2864         btrfs_btree_balance_dirty(root, nr);
2865         btrfs_throttle(root);
2866         return err;
2867 }
2868 static int btrfs_permission(struct inode *inode, int mask,
2869                             struct nameidata *nd)
2870 {
2871         if (btrfs_test_flag(inode, READONLY) && (mask & MAY_WRITE))
2872                 return -EACCES;
2873         return generic_permission(inode, mask, NULL);
2874 }
2875
2876 static struct inode_operations btrfs_dir_inode_operations = {
2877         .lookup         = btrfs_lookup,
2878         .create         = btrfs_create,
2879         .unlink         = btrfs_unlink,
2880         .link           = btrfs_link,
2881         .mkdir          = btrfs_mkdir,
2882         .rmdir          = btrfs_rmdir,
2883         .rename         = btrfs_rename,
2884         .symlink        = btrfs_symlink,
2885         .setattr        = btrfs_setattr,
2886         .mknod          = btrfs_mknod,
2887         .setxattr       = generic_setxattr,
2888         .getxattr       = generic_getxattr,
2889         .listxattr      = btrfs_listxattr,
2890         .removexattr    = generic_removexattr,
2891         .permission     = btrfs_permission,
2892 };
2893 static struct inode_operations btrfs_dir_ro_inode_operations = {
2894         .lookup         = btrfs_lookup,
2895         .permission     = btrfs_permission,
2896 };
2897 static struct file_operations btrfs_dir_file_operations = {
2898         .llseek         = generic_file_llseek,
2899         .read           = generic_read_dir,
2900         .readdir        = btrfs_readdir,
2901         .unlocked_ioctl = btrfs_ioctl,
2902 #ifdef CONFIG_COMPAT
2903         .compat_ioctl   = btrfs_ioctl,
2904 #endif
2905 };
2906
2907 static struct extent_map_ops btrfs_extent_map_ops = {
2908         .fill_delalloc = run_delalloc_range,
2909         .writepage_io_hook = btrfs_writepage_io_hook,
2910         .readpage_io_hook = btrfs_readpage_io_hook,
2911         .readpage_end_io_hook = btrfs_readpage_end_io_hook,
2912 };
2913
2914 static struct address_space_operations btrfs_aops = {
2915         .readpage       = btrfs_readpage,
2916         .writepage      = btrfs_writepage,
2917         .writepages     = btrfs_writepages,
2918         .readpages      = btrfs_readpages,
2919         .sync_page      = block_sync_page,
2920         .bmap           = btrfs_bmap,
2921         .invalidatepage = btrfs_invalidatepage,
2922         .releasepage    = btrfs_releasepage,
2923         .set_page_dirty = __set_page_dirty_nobuffers,
2924 };
2925
2926 static struct address_space_operations btrfs_symlink_aops = {
2927         .readpage       = btrfs_readpage,
2928         .writepage      = btrfs_writepage,
2929         .invalidatepage = btrfs_invalidatepage,
2930         .releasepage    = btrfs_releasepage,
2931 };
2932
2933 static struct inode_operations btrfs_file_inode_operations = {
2934         .truncate       = btrfs_truncate,
2935         .getattr        = btrfs_getattr,
2936         .setattr        = btrfs_setattr,
2937         .setxattr       = generic_setxattr,
2938         .getxattr       = generic_getxattr,
2939         .listxattr      = btrfs_listxattr,
2940         .removexattr    = generic_removexattr,
2941         .permission     = btrfs_permission,
2942 };
2943 static struct inode_operations btrfs_special_inode_operations = {
2944         .getattr        = btrfs_getattr,
2945         .setattr        = btrfs_setattr,
2946         .permission     = btrfs_permission,
2947 };
2948 static struct inode_operations btrfs_symlink_inode_operations = {
2949         .readlink       = generic_readlink,
2950         .follow_link    = page_follow_link_light,
2951         .put_link       = page_put_link,
2952         .permission     = btrfs_permission,
2953 };