]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/btrfs/inode.c
Btrfs: split up super.c
[mv-sheeva.git] / fs / btrfs / inode.c
1 #include <linux/module.h>
2 #include <linux/buffer_head.h>
3 #include <linux/fs.h>
4 #include <linux/pagemap.h>
5 #include <linux/highmem.h>
6 #include <linux/time.h>
7 #include <linux/init.h>
8 #include <linux/string.h>
9 #include <linux/smp_lock.h>
10 #include <linux/backing-dev.h>
11 #include <linux/mpage.h>
12 #include <linux/swap.h>
13 #include <linux/writeback.h>
14 #include <linux/statfs.h>
15 #include <linux/compat.h>
16 #include "ctree.h"
17 #include "disk-io.h"
18 #include "transaction.h"
19 #include "btrfs_inode.h"
20 #include "ioctl.h"
21 #include "print-tree.h"
22
23 struct btrfs_iget_args {
24         u64 ino;
25         struct btrfs_root *root;
26 };
27
28 static struct inode_operations btrfs_dir_inode_operations;
29 static struct inode_operations btrfs_symlink_inode_operations;
30 static struct inode_operations btrfs_dir_ro_inode_operations;
31 static struct inode_operations btrfs_file_inode_operations;
32 static struct address_space_operations btrfs_aops;
33 static struct address_space_operations btrfs_symlink_aops;
34 static struct file_operations btrfs_dir_file_operations;
35
36 static struct kmem_cache *btrfs_inode_cachep;
37 struct kmem_cache *btrfs_trans_handle_cachep;
38 struct kmem_cache *btrfs_transaction_cachep;
39 struct kmem_cache *btrfs_bit_radix_cachep;
40 struct kmem_cache *btrfs_path_cachep;
41
42 #define S_SHIFT 12
43 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
44         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
45         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
46         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
47         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
48         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
49         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
50         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
51 };
52
53 void btrfs_read_locked_inode(struct inode *inode)
54 {
55         struct btrfs_path *path;
56         struct btrfs_inode_item *inode_item;
57         struct btrfs_root *root = BTRFS_I(inode)->root;
58         struct btrfs_key location;
59         u64 alloc_group_block;
60         int ret;
61
62         path = btrfs_alloc_path();
63         BUG_ON(!path);
64         btrfs_init_path(path);
65         mutex_lock(&root->fs_info->fs_mutex);
66
67         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
68         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
69         if (ret) {
70                 btrfs_free_path(path);
71                 goto make_bad;
72         }
73         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
74                                   path->slots[0],
75                                   struct btrfs_inode_item);
76
77         inode->i_mode = btrfs_inode_mode(inode_item);
78         inode->i_nlink = btrfs_inode_nlink(inode_item);
79         inode->i_uid = btrfs_inode_uid(inode_item);
80         inode->i_gid = btrfs_inode_gid(inode_item);
81         inode->i_size = btrfs_inode_size(inode_item);
82         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
83         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
84         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
85         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
86         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
87         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
88         inode->i_blocks = btrfs_inode_nblocks(inode_item);
89         inode->i_generation = btrfs_inode_generation(inode_item);
90         alloc_group_block = btrfs_inode_block_group(inode_item);
91         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
92                                                        alloc_group_block);
93
94         btrfs_free_path(path);
95         inode_item = NULL;
96
97         mutex_unlock(&root->fs_info->fs_mutex);
98
99         switch (inode->i_mode & S_IFMT) {
100 #if 0
101         default:
102                 init_special_inode(inode, inode->i_mode,
103                                    btrfs_inode_rdev(inode_item));
104                 break;
105 #endif
106         case S_IFREG:
107                 inode->i_mapping->a_ops = &btrfs_aops;
108                 inode->i_fop = &btrfs_file_operations;
109                 inode->i_op = &btrfs_file_inode_operations;
110                 break;
111         case S_IFDIR:
112                 inode->i_fop = &btrfs_dir_file_operations;
113                 if (root == root->fs_info->tree_root)
114                         inode->i_op = &btrfs_dir_ro_inode_operations;
115                 else
116                         inode->i_op = &btrfs_dir_inode_operations;
117                 break;
118         case S_IFLNK:
119                 inode->i_op = &btrfs_symlink_inode_operations;
120                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
121                 break;
122         }
123         return;
124
125 make_bad:
126         btrfs_release_path(root, path);
127         btrfs_free_path(path);
128         mutex_unlock(&root->fs_info->fs_mutex);
129         make_bad_inode(inode);
130 }
131
132 static void fill_inode_item(struct btrfs_inode_item *item,
133                             struct inode *inode)
134 {
135         btrfs_set_inode_uid(item, inode->i_uid);
136         btrfs_set_inode_gid(item, inode->i_gid);
137         btrfs_set_inode_size(item, inode->i_size);
138         btrfs_set_inode_mode(item, inode->i_mode);
139         btrfs_set_inode_nlink(item, inode->i_nlink);
140         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
141         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
142         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
143         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
144         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
145         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
146         btrfs_set_inode_nblocks(item, inode->i_blocks);
147         btrfs_set_inode_generation(item, inode->i_generation);
148         btrfs_set_inode_block_group(item,
149                                     BTRFS_I(inode)->block_group->key.objectid);
150 }
151
152 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
153                               struct btrfs_root *root,
154                               struct inode *inode)
155 {
156         struct btrfs_inode_item *inode_item;
157         struct btrfs_path *path;
158         int ret;
159
160         path = btrfs_alloc_path();
161         BUG_ON(!path);
162         btrfs_init_path(path);
163         ret = btrfs_lookup_inode(trans, root, path,
164                                  &BTRFS_I(inode)->location, 1);
165         if (ret) {
166                 if (ret > 0)
167                         ret = -ENOENT;
168                 goto failed;
169         }
170
171         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
172                                   path->slots[0],
173                                   struct btrfs_inode_item);
174
175         fill_inode_item(inode_item, inode);
176         btrfs_mark_buffer_dirty(path->nodes[0]);
177         ret = 0;
178 failed:
179         btrfs_release_path(root, path);
180         btrfs_free_path(path);
181         return ret;
182 }
183
184
185 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
186                               struct btrfs_root *root,
187                               struct inode *dir,
188                               struct dentry *dentry)
189 {
190         struct btrfs_path *path;
191         const char *name = dentry->d_name.name;
192         int name_len = dentry->d_name.len;
193         int ret = 0;
194         u64 objectid;
195         struct btrfs_dir_item *di;
196
197         path = btrfs_alloc_path();
198         BUG_ON(!path);
199         btrfs_init_path(path);
200         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
201                                     name, name_len, -1);
202         if (IS_ERR(di)) {
203                 ret = PTR_ERR(di);
204                 goto err;
205         }
206         if (!di) {
207                 ret = -ENOENT;
208                 goto err;
209         }
210         objectid = btrfs_disk_key_objectid(&di->location);
211         ret = btrfs_delete_one_dir_name(trans, root, path, di);
212         BUG_ON(ret);
213         btrfs_release_path(root, path);
214
215         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
216                                          objectid, name, name_len, -1);
217         if (IS_ERR(di)) {
218                 ret = PTR_ERR(di);
219                 goto err;
220         }
221         if (!di) {
222                 ret = -ENOENT;
223                 goto err;
224         }
225         ret = btrfs_delete_one_dir_name(trans, root, path, di);
226         BUG_ON(ret);
227
228         dentry->d_inode->i_ctime = dir->i_ctime;
229 err:
230         btrfs_free_path(path);
231         if (!ret) {
232                 dir->i_size -= name_len * 2;
233                 btrfs_update_inode(trans, root, dir);
234                 drop_nlink(dentry->d_inode);
235                 btrfs_update_inode(trans, root, dentry->d_inode);
236                 dir->i_sb->s_dirt = 1;
237         }
238         return ret;
239 }
240
241 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
242 {
243         struct btrfs_root *root;
244         struct btrfs_trans_handle *trans;
245         int ret;
246
247         root = BTRFS_I(dir)->root;
248         mutex_lock(&root->fs_info->fs_mutex);
249         trans = btrfs_start_transaction(root, 1);
250         btrfs_set_trans_block_group(trans, dir);
251         ret = btrfs_unlink_trans(trans, root, dir, dentry);
252         btrfs_end_transaction(trans, root);
253         mutex_unlock(&root->fs_info->fs_mutex);
254         btrfs_btree_balance_dirty(root);
255         return ret;
256 }
257
258 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
259 {
260         struct inode *inode = dentry->d_inode;
261         int err;
262         int ret;
263         struct btrfs_root *root = BTRFS_I(dir)->root;
264         struct btrfs_path *path;
265         struct btrfs_key key;
266         struct btrfs_trans_handle *trans;
267         struct btrfs_key found_key;
268         int found_type;
269         struct btrfs_leaf *leaf;
270         char *goodnames = "..";
271
272         path = btrfs_alloc_path();
273         BUG_ON(!path);
274         btrfs_init_path(path);
275         mutex_lock(&root->fs_info->fs_mutex);
276         trans = btrfs_start_transaction(root, 1);
277         btrfs_set_trans_block_group(trans, dir);
278         key.objectid = inode->i_ino;
279         key.offset = (u64)-1;
280         key.flags = (u32)-1;
281         while(1) {
282                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
283                 if (ret < 0) {
284                         err = ret;
285                         goto out;
286                 }
287                 BUG_ON(ret == 0);
288                 if (path->slots[0] == 0) {
289                         err = -ENOENT;
290                         goto out;
291                 }
292                 path->slots[0]--;
293                 leaf = btrfs_buffer_leaf(path->nodes[0]);
294                 btrfs_disk_key_to_cpu(&found_key,
295                                       &leaf->items[path->slots[0]].key);
296                 found_type = btrfs_key_type(&found_key);
297                 if (found_key.objectid != inode->i_ino) {
298                         err = -ENOENT;
299                         goto out;
300                 }
301                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
302                      found_type != BTRFS_DIR_INDEX_KEY) ||
303                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
304                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
305                         err = -ENOTEMPTY;
306                         goto out;
307                 }
308                 ret = btrfs_del_item(trans, root, path);
309                 BUG_ON(ret);
310
311                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
312                         break;
313                 btrfs_release_path(root, path);
314         }
315         ret = 0;
316         btrfs_release_path(root, path);
317
318         /* now the directory is empty */
319         err = btrfs_unlink_trans(trans, root, dir, dentry);
320         if (!err) {
321                 inode->i_size = 0;
322         }
323 out:
324         btrfs_release_path(root, path);
325         btrfs_free_path(path);
326         mutex_unlock(&root->fs_info->fs_mutex);
327         ret = btrfs_end_transaction(trans, root);
328         btrfs_btree_balance_dirty(root);
329         if (ret && !err)
330                 err = ret;
331         return err;
332 }
333
334 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
335                             struct btrfs_root *root,
336                             struct inode *inode)
337 {
338         struct btrfs_path *path;
339         int ret;
340
341         clear_inode(inode);
342
343         path = btrfs_alloc_path();
344         BUG_ON(!path);
345         btrfs_init_path(path);
346         ret = btrfs_lookup_inode(trans, root, path,
347                                  &BTRFS_I(inode)->location, -1);
348         BUG_ON(ret);
349         ret = btrfs_del_item(trans, root, path);
350         BUG_ON(ret);
351         btrfs_free_path(path);
352         return ret;
353 }
354
355 /*
356  * truncates go from a high offset to a low offset.  So, walk
357  * from hi to lo in the node and issue readas.  Stop when you find
358  * keys from a different objectid
359  */
360 static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
361                            u64 objectid)
362 {
363         struct btrfs_node *node;
364         int i;
365         int nritems;
366         u64 item_objectid;
367         u64 blocknr;
368         int slot;
369         int ret;
370
371         if (!path->nodes[1])
372                 return;
373         node = btrfs_buffer_node(path->nodes[1]);
374         slot = path->slots[1];
375         if (slot == 0)
376                 return;
377         nritems = btrfs_header_nritems(&node->header);
378         for (i = slot - 1; i >= 0; i--) {
379                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
380                 if (item_objectid != objectid)
381                         break;
382                 blocknr = btrfs_node_blockptr(node, i);
383                 ret = readahead_tree_block(root, blocknr);
384                 if (ret)
385                         break;
386         }
387 }
388
389 /*
390  * this can truncate away extent items, csum items and directory items.
391  * It starts at a high offset and removes keys until it can't find
392  * any higher than i_size.
393  *
394  * csum items that cross the new i_size are truncated to the new size
395  * as well.
396  */
397 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
398                                    struct btrfs_root *root,
399                                    struct inode *inode)
400 {
401         int ret;
402         struct btrfs_path *path;
403         struct btrfs_key key;
404         struct btrfs_disk_key *found_key;
405         u32 found_type;
406         struct btrfs_leaf *leaf;
407         struct btrfs_file_extent_item *fi;
408         u64 extent_start = 0;
409         u64 extent_num_blocks = 0;
410         u64 item_end = 0;
411         int found_extent;
412         int del_item;
413
414         path = btrfs_alloc_path();
415         BUG_ON(!path);
416         /* FIXME, add redo link to tree so we don't leak on crash */
417         key.objectid = inode->i_ino;
418         key.offset = (u64)-1;
419         key.flags = (u32)-1;
420         while(1) {
421                 btrfs_init_path(path);
422                 fi = NULL;
423                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
424                 if (ret < 0) {
425                         goto error;
426                 }
427                 if (ret > 0) {
428                         BUG_ON(path->slots[0] == 0);
429                         path->slots[0]--;
430                 }
431                 reada_truncate(root, path, inode->i_ino);
432                 leaf = btrfs_buffer_leaf(path->nodes[0]);
433                 found_key = &leaf->items[path->slots[0]].key;
434                 found_type = btrfs_disk_key_type(found_key);
435
436                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
437                         break;
438                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
439                     found_type != BTRFS_DIR_ITEM_KEY &&
440                     found_type != BTRFS_DIR_INDEX_KEY &&
441                     found_type != BTRFS_EXTENT_DATA_KEY)
442                         break;
443
444                 item_end = btrfs_disk_key_offset(found_key);
445                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
446                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
447                                             path->slots[0],
448                                             struct btrfs_file_extent_item);
449                         if (btrfs_file_extent_type(fi) !=
450                             BTRFS_FILE_EXTENT_INLINE) {
451                                 item_end += btrfs_file_extent_num_blocks(fi) <<
452                                                 inode->i_blkbits;
453                         }
454                 }
455                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
456                         ret = btrfs_csum_truncate(trans, root, path,
457                                                   inode->i_size);
458                         BUG_ON(ret);
459                 }
460                 if (item_end < inode->i_size) {
461                         if (found_type) {
462                                 btrfs_set_key_type(&key, found_type - 1);
463                                 continue;
464                         }
465                         break;
466                 }
467                 if (btrfs_disk_key_offset(found_key) >= inode->i_size)
468                         del_item = 1;
469                 else
470                         del_item = 0;
471                 found_extent = 0;
472
473                 /* FIXME, shrink the extent if the ref count is only 1 */
474                 if (found_type == BTRFS_EXTENT_DATA_KEY &&
475                            btrfs_file_extent_type(fi) !=
476                            BTRFS_FILE_EXTENT_INLINE) {
477                         u64 num_dec;
478                         if (!del_item) {
479                                 u64 orig_num_blocks =
480                                         btrfs_file_extent_num_blocks(fi);
481                                 extent_num_blocks = inode->i_size -
482                                         btrfs_disk_key_offset(found_key) +
483                                         root->blocksize - 1;
484                                 extent_num_blocks >>= inode->i_blkbits;
485                                 btrfs_set_file_extent_num_blocks(fi,
486                                                          extent_num_blocks);
487                                 inode->i_blocks -= (orig_num_blocks -
488                                         extent_num_blocks) << 3;
489                                 mark_buffer_dirty(path->nodes[0]);
490                         } else {
491                                 extent_start =
492                                         btrfs_file_extent_disk_blocknr(fi);
493                                 extent_num_blocks =
494                                         btrfs_file_extent_disk_num_blocks(fi);
495                                 /* FIXME blocksize != 4096 */
496                                 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
497                                 if (extent_start != 0) {
498                                         found_extent = 1;
499                                         inode->i_blocks -= num_dec;
500                                 }
501                         }
502                 }
503                 if (del_item) {
504                         ret = btrfs_del_item(trans, root, path);
505                         BUG_ON(ret);
506                 } else {
507                         break;
508                 }
509                 btrfs_release_path(root, path);
510                 if (found_extent) {
511                         ret = btrfs_free_extent(trans, root, extent_start,
512                                                 extent_num_blocks, 0);
513                         BUG_ON(ret);
514                 }
515         }
516         ret = 0;
517 error:
518         btrfs_release_path(root, path);
519         btrfs_free_path(path);
520         inode->i_sb->s_dirt = 1;
521         return ret;
522 }
523
524 /*
525  * taken from block_truncate_page, but does cow as it zeros out
526  * any bytes left in the last page in the file.
527  */
528 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
529 {
530         struct inode *inode = mapping->host;
531         unsigned blocksize = 1 << inode->i_blkbits;
532         pgoff_t index = from >> PAGE_CACHE_SHIFT;
533         unsigned offset = from & (PAGE_CACHE_SIZE-1);
534         struct page *page;
535         char *kaddr;
536         int ret = 0;
537         struct btrfs_root *root = BTRFS_I(inode)->root;
538         u64 alloc_hint = 0;
539         struct btrfs_key ins;
540         struct btrfs_trans_handle *trans;
541
542         if ((offset & (blocksize - 1)) == 0)
543                 goto out;
544
545         ret = -ENOMEM;
546         page = grab_cache_page(mapping, index);
547         if (!page)
548                 goto out;
549
550         if (!PageUptodate(page)) {
551                 ret = mpage_readpage(page, btrfs_get_block);
552                 lock_page(page);
553                 if (!PageUptodate(page)) {
554                         ret = -EIO;
555                         goto out;
556                 }
557         }
558         mutex_lock(&root->fs_info->fs_mutex);
559         trans = btrfs_start_transaction(root, 1);
560         btrfs_set_trans_block_group(trans, inode);
561
562         ret = btrfs_drop_extents(trans, root, inode,
563                                  page->index << PAGE_CACHE_SHIFT,
564                                  (page->index + 1) << PAGE_CACHE_SHIFT,
565                                  &alloc_hint);
566         BUG_ON(ret);
567         ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
568                                  alloc_hint, (u64)-1, &ins, 1);
569         BUG_ON(ret);
570         ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
571                                        page->index << PAGE_CACHE_SHIFT,
572                                        ins.objectid, 1, 1);
573         BUG_ON(ret);
574         SetPageChecked(page);
575         kaddr = kmap(page);
576         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
577         flush_dcache_page(page);
578         btrfs_csum_file_block(trans, root, inode->i_ino,
579                               page->index << PAGE_CACHE_SHIFT,
580                               kaddr, PAGE_CACHE_SIZE);
581         kunmap(page);
582         btrfs_end_transaction(trans, root);
583         mutex_unlock(&root->fs_info->fs_mutex);
584
585         set_page_dirty(page);
586         unlock_page(page);
587         page_cache_release(page);
588 out:
589         return ret;
590 }
591
592 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
593 {
594         struct inode *inode = dentry->d_inode;
595         int err;
596
597         err = inode_change_ok(inode, attr);
598         if (err)
599                 return err;
600
601         if (S_ISREG(inode->i_mode) &&
602             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
603                 struct btrfs_trans_handle *trans;
604                 struct btrfs_root *root = BTRFS_I(inode)->root;
605                 u64 mask = root->blocksize - 1;
606                 u64 pos = (inode->i_size + mask) & ~mask;
607                 u64 hole_size;
608
609                 if (attr->ia_size <= pos)
610                         goto out;
611
612                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
613
614                 hole_size = (attr->ia_size - pos + mask) & ~mask;
615                 hole_size >>= inode->i_blkbits;
616
617                 mutex_lock(&root->fs_info->fs_mutex);
618                 trans = btrfs_start_transaction(root, 1);
619                 btrfs_set_trans_block_group(trans, inode);
620                 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
621                                                pos, 0, 0, hole_size);
622                 BUG_ON(err);
623                 btrfs_end_transaction(trans, root);
624                 mutex_unlock(&root->fs_info->fs_mutex);
625         }
626 out:
627         err = inode_setattr(inode, attr);
628
629         return err;
630 }
631 void btrfs_delete_inode(struct inode *inode)
632 {
633         struct btrfs_trans_handle *trans;
634         struct btrfs_root *root = BTRFS_I(inode)->root;
635         int ret;
636
637         truncate_inode_pages(&inode->i_data, 0);
638         if (is_bad_inode(inode)) {
639                 goto no_delete;
640         }
641         inode->i_size = 0;
642         mutex_lock(&root->fs_info->fs_mutex);
643         trans = btrfs_start_transaction(root, 1);
644         btrfs_set_trans_block_group(trans, inode);
645         ret = btrfs_truncate_in_trans(trans, root, inode);
646         BUG_ON(ret);
647         btrfs_free_inode(trans, root, inode);
648         btrfs_end_transaction(trans, root);
649         mutex_unlock(&root->fs_info->fs_mutex);
650         btrfs_btree_balance_dirty(root);
651         return;
652 no_delete:
653         clear_inode(inode);
654 }
655
656 /*
657  * this returns the key found in the dir entry in the location pointer.
658  * If no dir entries were found, location->objectid is 0.
659  */
660 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
661                                struct btrfs_key *location)
662 {
663         const char *name = dentry->d_name.name;
664         int namelen = dentry->d_name.len;
665         struct btrfs_dir_item *di;
666         struct btrfs_path *path;
667         struct btrfs_root *root = BTRFS_I(dir)->root;
668         int ret;
669
670         path = btrfs_alloc_path();
671         BUG_ON(!path);
672         btrfs_init_path(path);
673         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
674                                     namelen, 0);
675         if (!di || IS_ERR(di)) {
676                 location->objectid = 0;
677                 ret = 0;
678                 goto out;
679         }
680         btrfs_disk_key_to_cpu(location, &di->location);
681 out:
682         btrfs_release_path(root, path);
683         btrfs_free_path(path);
684         return ret;
685 }
686
687 /*
688  * when we hit a tree root in a directory, the btrfs part of the inode
689  * needs to be changed to reflect the root directory of the tree root.  This
690  * is kind of like crossing a mount point.
691  */
692 static int fixup_tree_root_location(struct btrfs_root *root,
693                              struct btrfs_key *location,
694                              struct btrfs_root **sub_root)
695 {
696         struct btrfs_path *path;
697         struct btrfs_root_item *ri;
698
699         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
700                 return 0;
701         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
702                 return 0;
703
704         path = btrfs_alloc_path();
705         BUG_ON(!path);
706         mutex_lock(&root->fs_info->fs_mutex);
707
708         *sub_root = btrfs_read_fs_root(root->fs_info, location);
709         if (IS_ERR(*sub_root))
710                 return PTR_ERR(*sub_root);
711
712         ri = &(*sub_root)->root_item;
713         location->objectid = btrfs_root_dirid(ri);
714         location->flags = 0;
715         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
716         location->offset = 0;
717
718         btrfs_free_path(path);
719         mutex_unlock(&root->fs_info->fs_mutex);
720         return 0;
721 }
722
723 static int btrfs_init_locked_inode(struct inode *inode, void *p)
724 {
725         struct btrfs_iget_args *args = p;
726         inode->i_ino = args->ino;
727         BTRFS_I(inode)->root = args->root;
728         return 0;
729 }
730
731 static int btrfs_find_actor(struct inode *inode, void *opaque)
732 {
733         struct btrfs_iget_args *args = opaque;
734         return (args->ino == inode->i_ino &&
735                 args->root == BTRFS_I(inode)->root);
736 }
737
738 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
739                                 struct btrfs_root *root)
740 {
741         struct inode *inode;
742         struct btrfs_iget_args args;
743         args.ino = objectid;
744         args.root = root;
745
746         inode = iget5_locked(s, objectid, btrfs_find_actor,
747                              btrfs_init_locked_inode,
748                              (void *)&args);
749         return inode;
750 }
751
752 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
753                                    struct nameidata *nd)
754 {
755         struct inode * inode;
756         struct btrfs_inode *bi = BTRFS_I(dir);
757         struct btrfs_root *root = bi->root;
758         struct btrfs_root *sub_root = root;
759         struct btrfs_key location;
760         int ret;
761
762         if (dentry->d_name.len > BTRFS_NAME_LEN)
763                 return ERR_PTR(-ENAMETOOLONG);
764         mutex_lock(&root->fs_info->fs_mutex);
765         ret = btrfs_inode_by_name(dir, dentry, &location);
766         mutex_unlock(&root->fs_info->fs_mutex);
767         if (ret < 0)
768                 return ERR_PTR(ret);
769         inode = NULL;
770         if (location.objectid) {
771                 ret = fixup_tree_root_location(root, &location, &sub_root);
772                 if (ret < 0)
773                         return ERR_PTR(ret);
774                 if (ret > 0)
775                         return ERR_PTR(-ENOENT);
776                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
777                                           sub_root);
778                 if (!inode)
779                         return ERR_PTR(-EACCES);
780                 if (inode->i_state & I_NEW) {
781                         /* the inode and parent dir are two different roots */
782                         if (sub_root != root) {
783                                 igrab(inode);
784                                 sub_root->inode = inode;
785                         }
786                         BTRFS_I(inode)->root = sub_root;
787                         memcpy(&BTRFS_I(inode)->location, &location,
788                                sizeof(location));
789                         btrfs_read_locked_inode(inode);
790                         unlock_new_inode(inode);
791                 }
792         }
793         return d_splice_alias(inode, dentry);
794 }
795
796 /*
797  * readahead one full node of leaves as long as their keys include
798  * the objectid supplied
799  */
800 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
801                          u64 objectid)
802 {
803         struct btrfs_node *node;
804         int i;
805         u32 nritems;
806         u64 item_objectid;
807         u64 blocknr;
808         int slot;
809         int ret;
810
811         if (!path->nodes[1])
812                 return;
813         node = btrfs_buffer_node(path->nodes[1]);
814         slot = path->slots[1];
815         nritems = btrfs_header_nritems(&node->header);
816         for (i = slot + 1; i < nritems; i++) {
817                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
818                 if (item_objectid != objectid)
819                         break;
820                 blocknr = btrfs_node_blockptr(node, i);
821                 ret = readahead_tree_block(root, blocknr);
822                 if (ret)
823                         break;
824         }
825 }
826 static unsigned char btrfs_filetype_table[] = {
827         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
828 };
829
830 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
831 {
832         struct inode *inode = filp->f_path.dentry->d_inode;
833         struct btrfs_root *root = BTRFS_I(inode)->root;
834         struct btrfs_item *item;
835         struct btrfs_dir_item *di;
836         struct btrfs_key key;
837         struct btrfs_path *path;
838         int ret;
839         u32 nritems;
840         struct btrfs_leaf *leaf;
841         int slot;
842         int advance;
843         unsigned char d_type;
844         int over = 0;
845         u32 di_cur;
846         u32 di_total;
847         u32 di_len;
848         int key_type = BTRFS_DIR_INDEX_KEY;
849
850         /* FIXME, use a real flag for deciding about the key type */
851         if (root->fs_info->tree_root == root)
852                 key_type = BTRFS_DIR_ITEM_KEY;
853         mutex_lock(&root->fs_info->fs_mutex);
854         key.objectid = inode->i_ino;
855         key.flags = 0;
856         btrfs_set_key_type(&key, key_type);
857         key.offset = filp->f_pos;
858         path = btrfs_alloc_path();
859         btrfs_init_path(path);
860         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
861         if (ret < 0)
862                 goto err;
863         advance = 0;
864         reada_leaves(root, path, inode->i_ino);
865         while(1) {
866                 leaf = btrfs_buffer_leaf(path->nodes[0]);
867                 nritems = btrfs_header_nritems(&leaf->header);
868                 slot = path->slots[0];
869                 if (advance || slot >= nritems) {
870                         if (slot >= nritems -1) {
871                                 reada_leaves(root, path, inode->i_ino);
872                                 ret = btrfs_next_leaf(root, path);
873                                 if (ret)
874                                         break;
875                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
876                                 nritems = btrfs_header_nritems(&leaf->header);
877                                 slot = path->slots[0];
878                         } else {
879                                 slot++;
880                                 path->slots[0]++;
881                         }
882                 }
883                 advance = 1;
884                 item = leaf->items + slot;
885                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
886                         break;
887                 if (btrfs_disk_key_type(&item->key) != key_type)
888                         break;
889                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
890                         continue;
891                 filp->f_pos = btrfs_disk_key_offset(&item->key);
892                 advance = 1;
893                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
894                 di_cur = 0;
895                 di_total = btrfs_item_size(leaf->items + slot);
896                 while(di_cur < di_total) {
897                         d_type = btrfs_filetype_table[btrfs_dir_type(di)];
898                         over = filldir(dirent, (const char *)(di + 1),
899                                        btrfs_dir_name_len(di),
900                                        btrfs_disk_key_offset(&item->key),
901                                        btrfs_disk_key_objectid(&di->location),
902                                        d_type);
903                         if (over)
904                                 goto nopos;
905                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
906                         di_cur += di_len;
907                         di = (struct btrfs_dir_item *)((char *)di + di_len);
908                 }
909         }
910         filp->f_pos++;
911 nopos:
912         ret = 0;
913 err:
914         btrfs_release_path(root, path);
915         btrfs_free_path(path);
916         mutex_unlock(&root->fs_info->fs_mutex);
917         return ret;
918 }
919
920 int btrfs_write_inode(struct inode *inode, int wait)
921 {
922         struct btrfs_root *root = BTRFS_I(inode)->root;
923         struct btrfs_trans_handle *trans;
924         int ret = 0;
925
926         if (wait) {
927                 mutex_lock(&root->fs_info->fs_mutex);
928                 trans = btrfs_start_transaction(root, 1);
929                 btrfs_set_trans_block_group(trans, inode);
930                 ret = btrfs_commit_transaction(trans, root);
931                 mutex_unlock(&root->fs_info->fs_mutex);
932         }
933         return ret;
934 }
935
936 /*
937  * This is somewhat expense, updating the tree every time the
938  * inode changes.  But, it is most likely to find the inode in cache.
939  * FIXME, needs more benchmarking...there are no reasons other than performance
940  * to keep or drop this code.
941  */
942 void btrfs_dirty_inode(struct inode *inode)
943 {
944         struct btrfs_root *root = BTRFS_I(inode)->root;
945         struct btrfs_trans_handle *trans;
946
947         mutex_lock(&root->fs_info->fs_mutex);
948         trans = btrfs_start_transaction(root, 1);
949         btrfs_set_trans_block_group(trans, inode);
950         btrfs_update_inode(trans, root, inode);
951         btrfs_end_transaction(trans, root);
952         mutex_unlock(&root->fs_info->fs_mutex);
953         btrfs_btree_balance_dirty(root);
954 }
955
956 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
957                                      struct btrfs_root *root,
958                                      u64 objectid,
959                                      struct btrfs_block_group_cache *group,
960                                      int mode)
961 {
962         struct inode *inode;
963         struct btrfs_inode_item inode_item;
964         struct btrfs_key *location;
965         int ret;
966         int owner;
967
968         inode = new_inode(root->fs_info->sb);
969         if (!inode)
970                 return ERR_PTR(-ENOMEM);
971
972         BTRFS_I(inode)->root = root;
973         if (mode & S_IFDIR)
974                 owner = 0;
975         else
976                 owner = 1;
977         group = btrfs_find_block_group(root, group, 0, 0, owner);
978         BTRFS_I(inode)->block_group = group;
979
980         inode->i_uid = current->fsuid;
981         inode->i_gid = current->fsgid;
982         inode->i_mode = mode;
983         inode->i_ino = objectid;
984         inode->i_blocks = 0;
985         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
986         fill_inode_item(&inode_item, inode);
987         location = &BTRFS_I(inode)->location;
988         location->objectid = objectid;
989         location->flags = 0;
990         location->offset = 0;
991         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
992
993         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
994         BUG_ON(ret);
995
996         insert_inode_hash(inode);
997         return inode;
998 }
999
1000 static inline u8 btrfs_inode_type(struct inode *inode)
1001 {
1002         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1003 }
1004
1005 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1006                             struct dentry *dentry, struct inode *inode)
1007 {
1008         int ret;
1009         struct btrfs_key key;
1010         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1011         key.objectid = inode->i_ino;
1012         key.flags = 0;
1013         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1014         key.offset = 0;
1015
1016         ret = btrfs_insert_dir_item(trans, root,
1017                                     dentry->d_name.name, dentry->d_name.len,
1018                                     dentry->d_parent->d_inode->i_ino,
1019                                     &key, btrfs_inode_type(inode));
1020         if (ret == 0) {
1021                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
1022                 ret = btrfs_update_inode(trans, root,
1023                                          dentry->d_parent->d_inode);
1024         }
1025         return ret;
1026 }
1027
1028 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1029                             struct dentry *dentry, struct inode *inode)
1030 {
1031         int err = btrfs_add_link(trans, dentry, inode);
1032         if (!err) {
1033                 d_instantiate(dentry, inode);
1034                 return 0;
1035         }
1036         if (err > 0)
1037                 err = -EEXIST;
1038         return err;
1039 }
1040
1041 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1042                         int mode, struct nameidata *nd)
1043 {
1044         struct btrfs_trans_handle *trans;
1045         struct btrfs_root *root = BTRFS_I(dir)->root;
1046         struct inode *inode;
1047         int err;
1048         int drop_inode = 0;
1049         u64 objectid;
1050
1051         mutex_lock(&root->fs_info->fs_mutex);
1052         trans = btrfs_start_transaction(root, 1);
1053         btrfs_set_trans_block_group(trans, dir);
1054
1055         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1056         if (err) {
1057                 err = -ENOSPC;
1058                 goto out_unlock;
1059         }
1060
1061         inode = btrfs_new_inode(trans, root, objectid,
1062                                 BTRFS_I(dir)->block_group, mode);
1063         err = PTR_ERR(inode);
1064         if (IS_ERR(inode))
1065                 goto out_unlock;
1066
1067         btrfs_set_trans_block_group(trans, inode);
1068         err = btrfs_add_nondir(trans, dentry, inode);
1069         if (err)
1070                 drop_inode = 1;
1071         else {
1072                 inode->i_mapping->a_ops = &btrfs_aops;
1073                 inode->i_fop = &btrfs_file_operations;
1074                 inode->i_op = &btrfs_file_inode_operations;
1075         }
1076         dir->i_sb->s_dirt = 1;
1077         btrfs_update_inode_block_group(trans, inode);
1078         btrfs_update_inode_block_group(trans, dir);
1079 out_unlock:
1080         btrfs_end_transaction(trans, root);
1081         mutex_unlock(&root->fs_info->fs_mutex);
1082
1083         if (drop_inode) {
1084                 inode_dec_link_count(inode);
1085                 iput(inode);
1086         }
1087         btrfs_btree_balance_dirty(root);
1088         return err;
1089 }
1090
1091 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1092                       struct dentry *dentry)
1093 {
1094         struct btrfs_trans_handle *trans;
1095         struct btrfs_root *root = BTRFS_I(dir)->root;
1096         struct inode *inode = old_dentry->d_inode;
1097         int err;
1098         int drop_inode = 0;
1099
1100         if (inode->i_nlink == 0)
1101                 return -ENOENT;
1102
1103         inc_nlink(inode);
1104         mutex_lock(&root->fs_info->fs_mutex);
1105         trans = btrfs_start_transaction(root, 1);
1106         btrfs_set_trans_block_group(trans, dir);
1107         atomic_inc(&inode->i_count);
1108         err = btrfs_add_nondir(trans, dentry, inode);
1109         if (err)
1110                 drop_inode = 1;
1111         dir->i_sb->s_dirt = 1;
1112         btrfs_update_inode_block_group(trans, dir);
1113         btrfs_update_inode(trans, root, inode);
1114
1115         btrfs_end_transaction(trans, root);
1116         mutex_unlock(&root->fs_info->fs_mutex);
1117
1118         if (drop_inode) {
1119                 inode_dec_link_count(inode);
1120                 iput(inode);
1121         }
1122         btrfs_btree_balance_dirty(root);
1123         return err;
1124 }
1125
1126 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1127                                 struct btrfs_root *root,
1128                                 u64 objectid, u64 dirid)
1129 {
1130         int ret;
1131         char buf[2];
1132         struct btrfs_key key;
1133
1134         buf[0] = '.';
1135         buf[1] = '.';
1136
1137         key.objectid = objectid;
1138         key.offset = 0;
1139         key.flags = 0;
1140         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1141
1142         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1143                                     &key, BTRFS_FT_DIR);
1144         if (ret)
1145                 goto error;
1146         key.objectid = dirid;
1147         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1148                                     &key, BTRFS_FT_DIR);
1149         if (ret)
1150                 goto error;
1151 error:
1152         return ret;
1153 }
1154
1155 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1156 {
1157         struct inode *inode;
1158         struct btrfs_trans_handle *trans;
1159         struct btrfs_root *root = BTRFS_I(dir)->root;
1160         int err = 0;
1161         int drop_on_err = 0;
1162         u64 objectid;
1163
1164         mutex_lock(&root->fs_info->fs_mutex);
1165         trans = btrfs_start_transaction(root, 1);
1166         btrfs_set_trans_block_group(trans, dir);
1167         if (IS_ERR(trans)) {
1168                 err = PTR_ERR(trans);
1169                 goto out_unlock;
1170         }
1171
1172         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1173         if (err) {
1174                 err = -ENOSPC;
1175                 goto out_unlock;
1176         }
1177
1178         inode = btrfs_new_inode(trans, root, objectid,
1179                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1180         if (IS_ERR(inode)) {
1181                 err = PTR_ERR(inode);
1182                 goto out_fail;
1183         }
1184         drop_on_err = 1;
1185         inode->i_op = &btrfs_dir_inode_operations;
1186         inode->i_fop = &btrfs_dir_file_operations;
1187         btrfs_set_trans_block_group(trans, inode);
1188
1189         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1190         if (err)
1191                 goto out_fail;
1192
1193         inode->i_size = 6;
1194         err = btrfs_update_inode(trans, root, inode);
1195         if (err)
1196                 goto out_fail;
1197         err = btrfs_add_link(trans, dentry, inode);
1198         if (err)
1199                 goto out_fail;
1200         d_instantiate(dentry, inode);
1201         drop_on_err = 0;
1202         dir->i_sb->s_dirt = 1;
1203         btrfs_update_inode_block_group(trans, inode);
1204         btrfs_update_inode_block_group(trans, dir);
1205
1206 out_fail:
1207         btrfs_end_transaction(trans, root);
1208 out_unlock:
1209         mutex_unlock(&root->fs_info->fs_mutex);
1210         if (drop_on_err)
1211                 iput(inode);
1212         btrfs_btree_balance_dirty(root);
1213         return err;
1214 }
1215
1216 /*
1217  * FIBMAP and others want to pass in a fake buffer head.  They need to
1218  * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy
1219  * any packed file data into the fake bh
1220  */
1221 #define BTRFS_GET_BLOCK_NO_CREATE 0
1222 #define BTRFS_GET_BLOCK_CREATE 1
1223 #define BTRFS_GET_BLOCK_NO_DIRECT 2
1224
1225 /*
1226  * FIXME create==1 doe not work.
1227  */
1228 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1229                                 struct buffer_head *result, int create)
1230 {
1231         int ret;
1232         int err = 0;
1233         u64 blocknr;
1234         u64 extent_start = 0;
1235         u64 extent_end = 0;
1236         u64 objectid = inode->i_ino;
1237         u32 found_type;
1238         u64 alloc_hint = 0;
1239         struct btrfs_path *path;
1240         struct btrfs_root *root = BTRFS_I(inode)->root;
1241         struct btrfs_file_extent_item *item;
1242         struct btrfs_leaf *leaf;
1243         struct btrfs_disk_key *found_key;
1244         struct btrfs_trans_handle *trans = NULL;
1245
1246         path = btrfs_alloc_path();
1247         BUG_ON(!path);
1248         btrfs_init_path(path);
1249         if (create & BTRFS_GET_BLOCK_CREATE) {
1250                 WARN_ON(1);
1251                 /* this almost but not quite works */
1252                 trans = btrfs_start_transaction(root, 1);
1253                 if (!trans) {
1254                         err = -ENOMEM;
1255                         goto out;
1256                 }
1257                 ret = btrfs_drop_extents(trans, root, inode,
1258                                          iblock << inode->i_blkbits,
1259                                          (iblock + 1) << inode->i_blkbits,
1260                                          &alloc_hint);
1261                 BUG_ON(ret);
1262         }
1263
1264         ret = btrfs_lookup_file_extent(NULL, root, path,
1265                                        inode->i_ino,
1266                                        iblock << inode->i_blkbits, 0);
1267         if (ret < 0) {
1268                 err = ret;
1269                 goto out;
1270         }
1271
1272         if (ret != 0) {
1273                 if (path->slots[0] == 0) {
1274                         btrfs_release_path(root, path);
1275                         goto not_found;
1276                 }
1277                 path->slots[0]--;
1278         }
1279
1280         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1281                               struct btrfs_file_extent_item);
1282         leaf = btrfs_buffer_leaf(path->nodes[0]);
1283         blocknr = btrfs_file_extent_disk_blocknr(item);
1284         blocknr += btrfs_file_extent_offset(item);
1285
1286         /* are we inside the extent that was found? */
1287         found_key = &leaf->items[path->slots[0]].key;
1288         found_type = btrfs_disk_key_type(found_key);
1289         if (btrfs_disk_key_objectid(found_key) != objectid ||
1290             found_type != BTRFS_EXTENT_DATA_KEY) {
1291                 extent_end = 0;
1292                 extent_start = 0;
1293                 goto not_found;
1294         }
1295         found_type = btrfs_file_extent_type(item);
1296         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1297         if (found_type == BTRFS_FILE_EXTENT_REG) {
1298                 extent_start = extent_start >> inode->i_blkbits;
1299                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1300                 err = 0;
1301                 if (btrfs_file_extent_disk_blocknr(item) == 0)
1302                         goto out;
1303                 if (iblock >= extent_start && iblock < extent_end) {
1304                         btrfs_map_bh_to_logical(root, result, blocknr +
1305                                                 iblock - extent_start);
1306                         goto out;
1307                 }
1308         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1309                 char *ptr;
1310                 char *map;
1311                 u32 size;
1312
1313                 if (create & BTRFS_GET_BLOCK_NO_DIRECT) {
1314                         err = -EINVAL;
1315                         goto out;
1316                 }
1317                 size = btrfs_file_extent_inline_len(leaf->items +
1318                                                     path->slots[0]);
1319                 extent_end = (extent_start + size) >> inode->i_blkbits;
1320                 extent_start >>= inode->i_blkbits;
1321                 if (iblock < extent_start || iblock > extent_end) {
1322                         goto not_found;
1323                 }
1324                 ptr = btrfs_file_extent_inline_start(item);
1325                 map = kmap(result->b_page);
1326                 memcpy(map, ptr, size);
1327                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1328                 flush_dcache_page(result->b_page);
1329                 kunmap(result->b_page);
1330                 set_buffer_uptodate(result);
1331                 SetPageChecked(result->b_page);
1332                 btrfs_map_bh_to_logical(root, result, 0);
1333         }
1334 not_found:
1335         if (create & BTRFS_GET_BLOCK_CREATE) {
1336                 struct btrfs_key ins;
1337                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1338                                          1, alloc_hint, (u64)-1,
1339                                          &ins, 1);
1340                 BUG_ON(ret);
1341                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1342                                                iblock << inode->i_blkbits,
1343                                                ins.objectid, ins.offset,
1344                                                ins.offset);
1345                 BUG_ON(ret);
1346                 SetPageChecked(result->b_page);
1347                 btrfs_map_bh_to_logical(root, result, ins.objectid);
1348         }
1349 out:
1350         if (trans)
1351                 err = btrfs_end_transaction(trans, root);
1352         btrfs_free_path(path);
1353         return err;
1354 }
1355
1356 int btrfs_get_block(struct inode *inode, sector_t iblock,
1357                     struct buffer_head *result, int create)
1358 {
1359         int err;
1360         struct btrfs_root *root = BTRFS_I(inode)->root;
1361         mutex_lock(&root->fs_info->fs_mutex);
1362         err = btrfs_get_block_lock(inode, iblock, result, create);
1363         mutex_unlock(&root->fs_info->fs_mutex);
1364         return err;
1365 }
1366
1367 static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
1368                            struct buffer_head *result, int create)
1369 {
1370         struct btrfs_root *root = BTRFS_I(inode)->root;
1371         mutex_lock(&root->fs_info->fs_mutex);
1372         btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT);
1373         mutex_unlock(&root->fs_info->fs_mutex);
1374         return 0;
1375 }
1376
1377 static sector_t btrfs_bmap(struct address_space *as, sector_t block)
1378 {
1379         return generic_block_bmap(as, block, btrfs_get_block_bmap);
1380 }
1381
1382 static int btrfs_prepare_write(struct file *file, struct page *page,
1383                                unsigned from, unsigned to)
1384 {
1385         return block_prepare_write(page, from, to, btrfs_get_block);
1386 }
1387
1388 static int btrfs_readpage(struct file *file, struct page *page)
1389 {
1390         return mpage_readpage(page, btrfs_get_block);
1391 }
1392
1393 /*
1394  * Aside from a tiny bit of packed file data handling, this is the
1395  * same as the generic code.
1396  *
1397  * While block_write_full_page is writing back the dirty buffers under
1398  * the page lock, whoever dirtied the buffers may decide to clean them
1399  * again at any time.  We handle that by only looking at the buffer
1400  * state inside lock_buffer().
1401  *
1402  * If block_write_full_page() is called for regular writeback
1403  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1404  * locked buffer.   This only can happen if someone has written the buffer
1405  * directly, with submit_bh().  At the address_space level PageWriteback
1406  * prevents this contention from occurring.
1407  */
1408 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1409                                    struct writeback_control *wbc)
1410 {
1411         int err;
1412         sector_t block;
1413         sector_t last_block;
1414         struct buffer_head *bh, *head;
1415         const unsigned blocksize = 1 << inode->i_blkbits;
1416         int nr_underway = 0;
1417
1418         BUG_ON(!PageLocked(page));
1419
1420         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1421
1422         if (!page_has_buffers(page)) {
1423                 create_empty_buffers(page, blocksize,
1424                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1425         }
1426
1427         /*
1428          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1429          * here, and the (potentially unmapped) buffers may become dirty at
1430          * any time.  If a buffer becomes dirty here after we've inspected it
1431          * then we just miss that fact, and the page stays dirty.
1432          *
1433          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1434          * handle that here by just cleaning them.
1435          */
1436
1437         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1438         head = page_buffers(page);
1439         bh = head;
1440
1441         /*
1442          * Get all the dirty buffers mapped to disk addresses and
1443          * handle any aliases from the underlying blockdev's mapping.
1444          */
1445         do {
1446                 if (block > last_block) {
1447                         /*
1448                          * mapped buffers outside i_size will occur, because
1449                          * this page can be outside i_size when there is a
1450                          * truncate in progress.
1451                          */
1452                         /*
1453                          * The buffer was zeroed by block_write_full_page()
1454                          */
1455                         clear_buffer_dirty(bh);
1456                         set_buffer_uptodate(bh);
1457                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1458                         WARN_ON(bh->b_size != blocksize);
1459                         err = btrfs_get_block(inode, block, bh, 0);
1460                         if (err) {
1461                                 goto recover;
1462                         }
1463                         if (buffer_new(bh)) {
1464                                 /* blockdev mappings never come here */
1465                                 clear_buffer_new(bh);
1466                         }
1467                 }
1468                 bh = bh->b_this_page;
1469                 block++;
1470         } while (bh != head);
1471
1472         do {
1473                 if (!buffer_mapped(bh))
1474                         continue;
1475                 /*
1476                  * If it's a fully non-blocking write attempt and we cannot
1477                  * lock the buffer then redirty the page.  Note that this can
1478                  * potentially cause a busy-wait loop from pdflush and kswapd
1479                  * activity, but those code paths have their own higher-level
1480                  * throttling.
1481                  */
1482                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1483                         lock_buffer(bh);
1484                 } else if (test_set_buffer_locked(bh)) {
1485                         redirty_page_for_writepage(wbc, page);
1486                         continue;
1487                 }
1488                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1489                         mark_buffer_async_write(bh);
1490                 } else {
1491                         unlock_buffer(bh);
1492                 }
1493         } while ((bh = bh->b_this_page) != head);
1494
1495         /*
1496          * The page and its buffers are protected by PageWriteback(), so we can
1497          * drop the bh refcounts early.
1498          */
1499         BUG_ON(PageWriteback(page));
1500         set_page_writeback(page);
1501
1502         do {
1503                 struct buffer_head *next = bh->b_this_page;
1504                 if (buffer_async_write(bh)) {
1505                         submit_bh(WRITE, bh);
1506                         nr_underway++;
1507                 }
1508                 bh = next;
1509         } while (bh != head);
1510         unlock_page(page);
1511
1512         err = 0;
1513 done:
1514         if (nr_underway == 0) {
1515                 /*
1516                  * The page was marked dirty, but the buffers were
1517                  * clean.  Someone wrote them back by hand with
1518                  * ll_rw_block/submit_bh.  A rare case.
1519                  */
1520                 int uptodate = 1;
1521                 do {
1522                         if (!buffer_uptodate(bh)) {
1523                                 uptodate = 0;
1524                                 break;
1525                         }
1526                         bh = bh->b_this_page;
1527                 } while (bh != head);
1528                 if (uptodate)
1529                         SetPageUptodate(page);
1530                 end_page_writeback(page);
1531         }
1532         return err;
1533
1534 recover:
1535         /*
1536          * ENOSPC, or some other error.  We may already have added some
1537          * blocks to the file, so we need to write these out to avoid
1538          * exposing stale data.
1539          * The page is currently locked and not marked for writeback
1540          */
1541         bh = head;
1542         /* Recovery: lock and submit the mapped buffers */
1543         do {
1544                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1545                         lock_buffer(bh);
1546                         mark_buffer_async_write(bh);
1547                 } else {
1548                         /*
1549                          * The buffer may have been set dirty during
1550                          * attachment to a dirty page.
1551                          */
1552                         clear_buffer_dirty(bh);
1553                 }
1554         } while ((bh = bh->b_this_page) != head);
1555         SetPageError(page);
1556         BUG_ON(PageWriteback(page));
1557         set_page_writeback(page);
1558         do {
1559                 struct buffer_head *next = bh->b_this_page;
1560                 if (buffer_async_write(bh)) {
1561                         clear_buffer_dirty(bh);
1562                         submit_bh(WRITE, bh);
1563                         nr_underway++;
1564                 }
1565                 bh = next;
1566         } while (bh != head);
1567         unlock_page(page);
1568         goto done;
1569 }
1570
1571 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1572 {
1573         struct inode * const inode = page->mapping->host;
1574         loff_t i_size = i_size_read(inode);
1575         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1576         unsigned offset;
1577         void *kaddr;
1578
1579         /* Is the page fully inside i_size? */
1580         if (page->index < end_index)
1581                 return __btrfs_write_full_page(inode, page, wbc);
1582
1583         /* Is the page fully outside i_size? (truncate in progress) */
1584         offset = i_size & (PAGE_CACHE_SIZE-1);
1585         if (page->index >= end_index+1 || !offset) {
1586                 /*
1587                  * The page may have dirty, unmapped buffers.  For example,
1588                  * they may have been added in ext3_writepage().  Make them
1589                  * freeable here, so the page does not leak.
1590                  */
1591                 block_invalidatepage(page, 0);
1592                 unlock_page(page);
1593                 return 0; /* don't care */
1594         }
1595
1596         /*
1597          * The page straddles i_size.  It must be zeroed out on each and every
1598          * writepage invokation because it may be mmapped.  "A file is mapped
1599          * in multiples of the page size.  For a file that is not a multiple of
1600          * the  page size, the remaining memory is zeroed when mapped, and
1601          * writes to that region are not written out to the file."
1602          */
1603         kaddr = kmap_atomic(page, KM_USER0);
1604         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1605         flush_dcache_page(page);
1606         kunmap_atomic(kaddr, KM_USER0);
1607         return __btrfs_write_full_page(inode, page, wbc);
1608 }
1609
1610 static void btrfs_truncate(struct inode *inode)
1611 {
1612         struct btrfs_root *root = BTRFS_I(inode)->root;
1613         int ret;
1614         struct btrfs_trans_handle *trans;
1615
1616         if (!S_ISREG(inode->i_mode))
1617                 return;
1618         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1619                 return;
1620
1621         btrfs_truncate_page(inode->i_mapping, inode->i_size);
1622
1623         mutex_lock(&root->fs_info->fs_mutex);
1624         trans = btrfs_start_transaction(root, 1);
1625         btrfs_set_trans_block_group(trans, inode);
1626
1627         /* FIXME, add redo link to tree so we don't leak on crash */
1628         ret = btrfs_truncate_in_trans(trans, root, inode);
1629         BUG_ON(ret);
1630         btrfs_update_inode(trans, root, inode);
1631         ret = btrfs_end_transaction(trans, root);
1632         BUG_ON(ret);
1633         mutex_unlock(&root->fs_info->fs_mutex);
1634         btrfs_btree_balance_dirty(root);
1635 }
1636
1637 int btrfs_commit_write(struct file *file, struct page *page,
1638                        unsigned from, unsigned to)
1639 {
1640         struct inode *inode = page->mapping->host;
1641         struct buffer_head *bh;
1642         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1643
1644         SetPageUptodate(page);
1645         bh = page_buffers(page);
1646         set_buffer_uptodate(bh);
1647         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1648                 set_page_dirty(page);
1649         }
1650         if (pos > inode->i_size) {
1651                 i_size_write(inode, pos);
1652                 mark_inode_dirty(inode);
1653         }
1654         return 0;
1655 }
1656
1657 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
1658 {
1659         struct btrfs_trans_handle *trans;
1660         struct btrfs_key key;
1661         struct btrfs_root_item root_item;
1662         struct btrfs_inode_item *inode_item;
1663         struct buffer_head *subvol;
1664         struct btrfs_leaf *leaf;
1665         struct btrfs_root *new_root;
1666         struct inode *inode;
1667         struct inode *dir;
1668         int ret;
1669         u64 objectid;
1670         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
1671
1672         mutex_lock(&root->fs_info->fs_mutex);
1673         trans = btrfs_start_transaction(root, 1);
1674         BUG_ON(!trans);
1675
1676         subvol = btrfs_alloc_free_block(trans, root, 0);
1677         if (subvol == NULL)
1678                 return -ENOSPC;
1679         leaf = btrfs_buffer_leaf(subvol);
1680         btrfs_set_header_nritems(&leaf->header, 0);
1681         btrfs_set_header_level(&leaf->header, 0);
1682         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
1683         btrfs_set_header_generation(&leaf->header, trans->transid);
1684         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
1685         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
1686                sizeof(leaf->header.fsid));
1687         mark_buffer_dirty(subvol);
1688
1689         inode_item = &root_item.inode;
1690         memset(inode_item, 0, sizeof(*inode_item));
1691         btrfs_set_inode_generation(inode_item, 1);
1692         btrfs_set_inode_size(inode_item, 3);
1693         btrfs_set_inode_nlink(inode_item, 1);
1694         btrfs_set_inode_nblocks(inode_item, 1);
1695         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
1696
1697         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
1698         btrfs_set_root_refs(&root_item, 1);
1699         brelse(subvol);
1700         subvol = NULL;
1701
1702         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
1703                                        0, &objectid);
1704         BUG_ON(ret);
1705
1706         btrfs_set_root_dirid(&root_item, new_dirid);
1707
1708         key.objectid = objectid;
1709         key.offset = 1;
1710         key.flags = 0;
1711         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
1712         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
1713                                 &root_item);
1714         BUG_ON(ret);
1715
1716         /*
1717          * insert the directory item
1718          */
1719         key.offset = (u64)-1;
1720         dir = root->fs_info->sb->s_root->d_inode;
1721         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
1722                                     name, namelen, dir->i_ino, &key,
1723                                     BTRFS_FT_DIR);
1724         BUG_ON(ret);
1725
1726         ret = btrfs_commit_transaction(trans, root);
1727         BUG_ON(ret);
1728
1729         new_root = btrfs_read_fs_root(root->fs_info, &key);
1730         BUG_ON(!new_root);
1731
1732         trans = btrfs_start_transaction(new_root, 1);
1733         BUG_ON(!trans);
1734
1735         inode = btrfs_new_inode(trans, new_root, new_dirid,
1736                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
1737         inode->i_op = &btrfs_dir_inode_operations;
1738         inode->i_fop = &btrfs_dir_file_operations;
1739
1740         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
1741         BUG_ON(ret);
1742
1743         inode->i_nlink = 1;
1744         inode->i_size = 6;
1745         ret = btrfs_update_inode(trans, new_root, inode);
1746         BUG_ON(ret);
1747
1748         ret = btrfs_commit_transaction(trans, new_root);
1749         BUG_ON(ret);
1750
1751         iput(inode);
1752
1753         mutex_unlock(&root->fs_info->fs_mutex);
1754         btrfs_btree_balance_dirty(root);
1755         return 0;
1756 }
1757
1758 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
1759 {
1760         struct btrfs_trans_handle *trans;
1761         struct btrfs_key key;
1762         struct btrfs_root_item new_root_item;
1763         int ret;
1764         u64 objectid;
1765
1766         if (!root->ref_cows)
1767                 return -EINVAL;
1768
1769         mutex_lock(&root->fs_info->fs_mutex);
1770         trans = btrfs_start_transaction(root, 1);
1771         BUG_ON(!trans);
1772
1773         ret = btrfs_update_inode(trans, root, root->inode);
1774         BUG_ON(ret);
1775
1776         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
1777                                        0, &objectid);
1778         BUG_ON(ret);
1779
1780         memcpy(&new_root_item, &root->root_item,
1781                sizeof(new_root_item));
1782
1783         key.objectid = objectid;
1784         key.offset = 1;
1785         key.flags = 0;
1786         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
1787         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
1788
1789         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
1790                                 &new_root_item);
1791         BUG_ON(ret);
1792
1793         /*
1794          * insert the directory item
1795          */
1796         key.offset = (u64)-1;
1797         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
1798                                     name, namelen,
1799                                     root->fs_info->sb->s_root->d_inode->i_ino,
1800                                     &key, BTRFS_FT_DIR);
1801
1802         BUG_ON(ret);
1803
1804         ret = btrfs_inc_root_ref(trans, root);
1805         BUG_ON(ret);
1806
1807         ret = btrfs_commit_transaction(trans, root);
1808         BUG_ON(ret);
1809         mutex_unlock(&root->fs_info->fs_mutex);
1810         btrfs_btree_balance_dirty(root);
1811         return 0;
1812 }
1813
1814 int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
1815                 cmd, unsigned long arg)
1816 {
1817         struct btrfs_root *root = BTRFS_I(inode)->root;
1818         struct btrfs_ioctl_vol_args vol_args;
1819         int ret = 0;
1820         struct btrfs_dir_item *di;
1821         int namelen;
1822         struct btrfs_path *path;
1823         u64 root_dirid;
1824
1825         switch (cmd) {
1826         case BTRFS_IOC_SNAP_CREATE:
1827                 if (copy_from_user(&vol_args,
1828                                    (struct btrfs_ioctl_vol_args __user *)arg,
1829                                    sizeof(vol_args)))
1830                         return -EFAULT;
1831                 namelen = strlen(vol_args.name);
1832                 if (namelen > BTRFS_VOL_NAME_MAX)
1833                         return -EINVAL;
1834                 path = btrfs_alloc_path();
1835                 if (!path)
1836                         return -ENOMEM;
1837                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
1838                 mutex_lock(&root->fs_info->fs_mutex);
1839                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
1840                                     path, root_dirid,
1841                                     vol_args.name, namelen, 0);
1842                 mutex_unlock(&root->fs_info->fs_mutex);
1843                 btrfs_free_path(path);
1844                 if (di && !IS_ERR(di))
1845                         return -EEXIST;
1846
1847                 if (root == root->fs_info->tree_root)
1848                         ret = create_subvol(root, vol_args.name, namelen);
1849                 else
1850                         ret = create_snapshot(root, vol_args.name, namelen);
1851                 WARN_ON(ret);
1852                 break;
1853         default:
1854                 return -ENOTTY;
1855         }
1856         return ret;
1857 }
1858
1859 #ifdef CONFIG_COMPAT
1860 long btrfs_compat_ioctl(struct file *file, unsigned int cmd,
1861                                unsigned long arg)
1862 {
1863         struct inode *inode = file->f_path.dentry->d_inode;
1864         int ret;
1865         lock_kernel();
1866         ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
1867         unlock_kernel();
1868         return ret;
1869
1870 }
1871 #endif
1872
1873 /*
1874  * Called inside transaction, so use GFP_NOFS
1875  */
1876 struct inode *btrfs_alloc_inode(struct super_block *sb)
1877 {
1878         struct btrfs_inode *ei;
1879
1880         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
1881         if (!ei)
1882                 return NULL;
1883         return &ei->vfs_inode;
1884 }
1885
1886 void btrfs_destroy_inode(struct inode *inode)
1887 {
1888         WARN_ON(!list_empty(&inode->i_dentry));
1889         WARN_ON(inode->i_data.nrpages);
1890
1891         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
1892 }
1893
1894 static void init_once(void * foo, struct kmem_cache * cachep,
1895                       unsigned long flags)
1896 {
1897         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
1898
1899         inode_init_once(&ei->vfs_inode);
1900 }
1901
1902 void btrfs_destroy_cachep(void)
1903 {
1904         if (btrfs_inode_cachep)
1905                 kmem_cache_destroy(btrfs_inode_cachep);
1906         if (btrfs_trans_handle_cachep)
1907                 kmem_cache_destroy(btrfs_trans_handle_cachep);
1908         if (btrfs_transaction_cachep)
1909                 kmem_cache_destroy(btrfs_transaction_cachep);
1910         if (btrfs_bit_radix_cachep)
1911                 kmem_cache_destroy(btrfs_bit_radix_cachep);
1912         if (btrfs_path_cachep)
1913                 kmem_cache_destroy(btrfs_path_cachep);
1914 }
1915
1916 int btrfs_init_cachep(void)
1917 {
1918         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
1919                                              sizeof(struct btrfs_inode),
1920                                              0, (SLAB_RECLAIM_ACCOUNT|
1921                                                 SLAB_MEM_SPREAD),
1922                                              init_once, NULL);
1923         if (!btrfs_inode_cachep)
1924                 goto fail;
1925         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
1926                                              sizeof(struct btrfs_trans_handle),
1927                                              0, (SLAB_RECLAIM_ACCOUNT|
1928                                                 SLAB_MEM_SPREAD),
1929                                              NULL, NULL);
1930         if (!btrfs_trans_handle_cachep)
1931                 goto fail;
1932         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
1933                                              sizeof(struct btrfs_transaction),
1934                                              0, (SLAB_RECLAIM_ACCOUNT|
1935                                                 SLAB_MEM_SPREAD),
1936                                              NULL, NULL);
1937         if (!btrfs_transaction_cachep)
1938                 goto fail;
1939         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
1940                                              sizeof(struct btrfs_transaction),
1941                                              0, (SLAB_RECLAIM_ACCOUNT|
1942                                                 SLAB_MEM_SPREAD),
1943                                              NULL, NULL);
1944         if (!btrfs_path_cachep)
1945                 goto fail;
1946         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
1947                                              256,
1948                                              0, (SLAB_RECLAIM_ACCOUNT|
1949                                                 SLAB_MEM_SPREAD |
1950                                                 SLAB_DESTROY_BY_RCU),
1951                                              NULL, NULL);
1952         if (!btrfs_bit_radix_cachep)
1953                 goto fail;
1954         return 0;
1955 fail:
1956         btrfs_destroy_cachep();
1957         return -ENOMEM;
1958 }
1959
1960 static int btrfs_getattr(struct vfsmount *mnt,
1961                          struct dentry *dentry, struct kstat *stat)
1962 {
1963         struct inode *inode = dentry->d_inode;
1964         generic_fillattr(inode, stat);
1965         stat->blksize = 256 * 1024;
1966         return 0;
1967 }
1968
1969 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
1970                            struct inode * new_dir,struct dentry *new_dentry)
1971 {
1972         struct btrfs_trans_handle *trans;
1973         struct btrfs_root *root = BTRFS_I(old_dir)->root;
1974         struct inode *new_inode = new_dentry->d_inode;
1975         struct inode *old_inode = old_dentry->d_inode;
1976         struct timespec ctime = CURRENT_TIME;
1977         struct btrfs_path *path;
1978         struct btrfs_dir_item *di;
1979         int ret;
1980
1981         if (S_ISDIR(old_inode->i_mode) && new_inode &&
1982             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
1983                 return -ENOTEMPTY;
1984         }
1985         mutex_lock(&root->fs_info->fs_mutex);
1986         trans = btrfs_start_transaction(root, 1);
1987         btrfs_set_trans_block_group(trans, new_dir);
1988         path = btrfs_alloc_path();
1989         if (!path) {
1990                 ret = -ENOMEM;
1991                 goto out_fail;
1992         }
1993
1994         old_dentry->d_inode->i_nlink++;
1995         old_dir->i_ctime = old_dir->i_mtime = ctime;
1996         new_dir->i_ctime = new_dir->i_mtime = ctime;
1997         old_inode->i_ctime = ctime;
1998         if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
1999                 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2000                 u64 old_parent_oid;
2001                 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2002                                            "..", 2, -1);
2003                 if (IS_ERR(di)) {
2004                         ret = PTR_ERR(di);
2005                         goto out_fail;
2006                 }
2007                 if (!di) {
2008                         ret = -ENOENT;
2009                         goto out_fail;
2010                 }
2011                 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2012                 ret = btrfs_del_item(trans, root, path);
2013                 if (ret) {
2014                         ret = -EIO;
2015                         goto out_fail;
2016                 }
2017                 btrfs_release_path(root, path);
2018
2019                 di = btrfs_lookup_dir_index_item(trans, root, path,
2020                                                  old_inode->i_ino,
2021                                                  old_parent_oid,
2022                                                  "..", 2, -1);
2023                 if (IS_ERR(di)) {
2024                         ret = PTR_ERR(di);
2025                         goto out_fail;
2026                 }
2027                 if (!di) {
2028                         ret = -ENOENT;
2029                         goto out_fail;
2030                 }
2031                 ret = btrfs_del_item(trans, root, path);
2032                 if (ret) {
2033                         ret = -EIO;
2034                         goto out_fail;
2035                 }
2036                 btrfs_release_path(root, path);
2037
2038                 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2039                                             old_inode->i_ino, location,
2040                                             BTRFS_FT_DIR);
2041                 if (ret)
2042                         goto out_fail;
2043         }
2044
2045
2046         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2047         if (ret)
2048                 goto out_fail;
2049
2050         if (new_inode) {
2051                 new_inode->i_ctime = CURRENT_TIME;
2052                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2053                 if (ret)
2054                         goto out_fail;
2055                 if (S_ISDIR(new_inode->i_mode))
2056                         clear_nlink(new_inode);
2057                 else
2058                         drop_nlink(new_inode);
2059                 btrfs_update_inode(trans, root, new_inode);
2060         }
2061         ret = btrfs_add_link(trans, new_dentry, old_inode);
2062         if (ret)
2063                 goto out_fail;
2064
2065 out_fail:
2066         btrfs_free_path(path);
2067         btrfs_end_transaction(trans, root);
2068         mutex_unlock(&root->fs_info->fs_mutex);
2069         return ret;
2070 }
2071
2072 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2073                          const char *symname)
2074 {
2075         struct btrfs_trans_handle *trans;
2076         struct btrfs_root *root = BTRFS_I(dir)->root;
2077         struct btrfs_path *path;
2078         struct btrfs_key key;
2079         struct inode *inode;
2080         int err;
2081         int drop_inode = 0;
2082         u64 objectid;
2083         int name_len;
2084         int datasize;
2085         char *ptr;
2086         struct btrfs_file_extent_item *ei;
2087
2088         name_len = strlen(symname) + 1;
2089         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2090                 return -ENAMETOOLONG;
2091         mutex_lock(&root->fs_info->fs_mutex);
2092         trans = btrfs_start_transaction(root, 1);
2093         btrfs_set_trans_block_group(trans, dir);
2094
2095         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2096         if (err) {
2097                 err = -ENOSPC;
2098                 goto out_unlock;
2099         }
2100
2101         inode = btrfs_new_inode(trans, root, objectid,
2102                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2103         err = PTR_ERR(inode);
2104         if (IS_ERR(inode))
2105                 goto out_unlock;
2106
2107         btrfs_set_trans_block_group(trans, inode);
2108         err = btrfs_add_nondir(trans, dentry, inode);
2109         if (err)
2110                 drop_inode = 1;
2111         else {
2112                 inode->i_mapping->a_ops = &btrfs_aops;
2113                 inode->i_fop = &btrfs_file_operations;
2114                 inode->i_op = &btrfs_file_inode_operations;
2115         }
2116         dir->i_sb->s_dirt = 1;
2117         btrfs_update_inode_block_group(trans, inode);
2118         btrfs_update_inode_block_group(trans, dir);
2119         if (drop_inode)
2120                 goto out_unlock;
2121
2122         path = btrfs_alloc_path();
2123         BUG_ON(!path);
2124         key.objectid = inode->i_ino;
2125         key.offset = 0;
2126         key.flags = 0;
2127         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2128         datasize = btrfs_file_extent_calc_inline_size(name_len);
2129         err = btrfs_insert_empty_item(trans, root, path, &key,
2130                                       datasize);
2131         BUG_ON(err);
2132         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2133                path->slots[0], struct btrfs_file_extent_item);
2134         btrfs_set_file_extent_generation(ei, trans->transid);
2135         btrfs_set_file_extent_type(ei,
2136                                    BTRFS_FILE_EXTENT_INLINE);
2137         ptr = btrfs_file_extent_inline_start(ei);
2138         btrfs_memcpy(root, path->nodes[0]->b_data,
2139                      ptr, symname, name_len);
2140         mark_buffer_dirty(path->nodes[0]);
2141         btrfs_free_path(path);
2142         inode->i_op = &btrfs_symlink_inode_operations;
2143         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2144         inode->i_size = name_len - 1;
2145         btrfs_update_inode(trans, root, inode);
2146         err = 0;
2147
2148 out_unlock:
2149         btrfs_end_transaction(trans, root);
2150         mutex_unlock(&root->fs_info->fs_mutex);
2151
2152         if (drop_inode) {
2153                 inode_dec_link_count(inode);
2154                 iput(inode);
2155         }
2156         btrfs_btree_balance_dirty(root);
2157         return err;
2158 }
2159
2160 static struct inode_operations btrfs_dir_inode_operations = {
2161         .lookup         = btrfs_lookup,
2162         .create         = btrfs_create,
2163         .unlink         = btrfs_unlink,
2164         .link           = btrfs_link,
2165         .mkdir          = btrfs_mkdir,
2166         .rmdir          = btrfs_rmdir,
2167         .rename         = btrfs_rename,
2168         .symlink        = btrfs_symlink,
2169         .setattr        = btrfs_setattr,
2170 };
2171
2172 static struct inode_operations btrfs_dir_ro_inode_operations = {
2173         .lookup         = btrfs_lookup,
2174 };
2175
2176 static struct file_operations btrfs_dir_file_operations = {
2177         .llseek         = generic_file_llseek,
2178         .read           = generic_read_dir,
2179         .readdir        = btrfs_readdir,
2180         .ioctl          = btrfs_ioctl,
2181 #ifdef CONFIG_COMPAT
2182         .compat_ioctl   = btrfs_compat_ioctl,
2183 #endif
2184 };
2185
2186 static struct address_space_operations btrfs_aops = {
2187         .readpage       = btrfs_readpage,
2188         .writepage      = btrfs_writepage,
2189         .sync_page      = block_sync_page,
2190         .prepare_write  = btrfs_prepare_write,
2191         .commit_write   = btrfs_commit_write,
2192         .bmap           = btrfs_bmap,
2193 };
2194
2195 static struct address_space_operations btrfs_symlink_aops = {
2196         .readpage       = btrfs_readpage,
2197         .writepage      = btrfs_writepage,
2198 };
2199
2200 static struct inode_operations btrfs_file_inode_operations = {
2201         .truncate       = btrfs_truncate,
2202         .getattr        = btrfs_getattr,
2203         .setattr        = btrfs_setattr,
2204 };
2205
2206 static struct inode_operations btrfs_symlink_inode_operations = {
2207         .readlink       = generic_readlink,
2208         .follow_link    = page_follow_link_light,
2209         .put_link       = page_put_link,
2210 };