]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/btrfs/inode.c
btrfs: Code cleanup
[mv-sheeva.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/module.h>
20 #include <linux/buffer_head.h>
21 #include <linux/fs.h>
22 #include <linux/pagemap.h>
23 #include <linux/highmem.h>
24 #include <linux/time.h>
25 #include <linux/init.h>
26 #include <linux/string.h>
27 #include <linux/smp_lock.h>
28 #include <linux/backing-dev.h>
29 #include <linux/mpage.h>
30 #include <linux/swap.h>
31 #include <linux/writeback.h>
32 #include <linux/statfs.h>
33 #include <linux/compat.h>
34 #include "ctree.h"
35 #include "disk-io.h"
36 #include "transaction.h"
37 #include "btrfs_inode.h"
38 #include "ioctl.h"
39 #include "print-tree.h"
40
41 struct btrfs_iget_args {
42         u64 ino;
43         struct btrfs_root *root;
44 };
45
46 static struct inode_operations btrfs_dir_inode_operations;
47 static struct inode_operations btrfs_symlink_inode_operations;
48 static struct inode_operations btrfs_dir_ro_inode_operations;
49 static struct inode_operations btrfs_file_inode_operations;
50 static struct address_space_operations btrfs_aops;
51 static struct address_space_operations btrfs_symlink_aops;
52 static struct file_operations btrfs_dir_file_operations;
53
54 static struct kmem_cache *btrfs_inode_cachep;
55 struct kmem_cache *btrfs_trans_handle_cachep;
56 struct kmem_cache *btrfs_transaction_cachep;
57 struct kmem_cache *btrfs_bit_radix_cachep;
58 struct kmem_cache *btrfs_path_cachep;
59
60 #define S_SHIFT 12
61 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
62         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
63         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
64         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
65         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
66         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
67         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
68         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
69 };
70
71 void btrfs_read_locked_inode(struct inode *inode)
72 {
73         struct btrfs_path *path;
74         struct btrfs_inode_item *inode_item;
75         struct btrfs_root *root = BTRFS_I(inode)->root;
76         struct btrfs_key location;
77         u64 alloc_group_block;
78         int ret;
79
80         path = btrfs_alloc_path();
81         BUG_ON(!path);
82         mutex_lock(&root->fs_info->fs_mutex);
83
84         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
85         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
86         if (ret) {
87                 btrfs_free_path(path);
88                 goto make_bad;
89         }
90         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
91                                   path->slots[0],
92                                   struct btrfs_inode_item);
93
94         inode->i_mode = btrfs_inode_mode(inode_item);
95         inode->i_nlink = btrfs_inode_nlink(inode_item);
96         inode->i_uid = btrfs_inode_uid(inode_item);
97         inode->i_gid = btrfs_inode_gid(inode_item);
98         inode->i_size = btrfs_inode_size(inode_item);
99         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
100         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
101         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
102         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
103         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
104         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
105         inode->i_blocks = btrfs_inode_nblocks(inode_item);
106         inode->i_generation = btrfs_inode_generation(inode_item);
107         alloc_group_block = btrfs_inode_block_group(inode_item);
108         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
109                                                        alloc_group_block);
110
111         btrfs_free_path(path);
112         inode_item = NULL;
113
114         mutex_unlock(&root->fs_info->fs_mutex);
115
116         switch (inode->i_mode & S_IFMT) {
117 #if 0
118         default:
119                 init_special_inode(inode, inode->i_mode,
120                                    btrfs_inode_rdev(inode_item));
121                 break;
122 #endif
123         case S_IFREG:
124                 inode->i_mapping->a_ops = &btrfs_aops;
125                 inode->i_fop = &btrfs_file_operations;
126                 inode->i_op = &btrfs_file_inode_operations;
127                 break;
128         case S_IFDIR:
129                 inode->i_fop = &btrfs_dir_file_operations;
130                 if (root == root->fs_info->tree_root)
131                         inode->i_op = &btrfs_dir_ro_inode_operations;
132                 else
133                         inode->i_op = &btrfs_dir_inode_operations;
134                 break;
135         case S_IFLNK:
136                 inode->i_op = &btrfs_symlink_inode_operations;
137                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
138                 break;
139         }
140         return;
141
142 make_bad:
143         btrfs_release_path(root, path);
144         btrfs_free_path(path);
145         mutex_unlock(&root->fs_info->fs_mutex);
146         make_bad_inode(inode);
147 }
148
149 static void fill_inode_item(struct btrfs_inode_item *item,
150                             struct inode *inode)
151 {
152         btrfs_set_inode_uid(item, inode->i_uid);
153         btrfs_set_inode_gid(item, inode->i_gid);
154         btrfs_set_inode_size(item, inode->i_size);
155         btrfs_set_inode_mode(item, inode->i_mode);
156         btrfs_set_inode_nlink(item, inode->i_nlink);
157         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
158         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
159         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
160         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
161         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
162         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
163         btrfs_set_inode_nblocks(item, inode->i_blocks);
164         btrfs_set_inode_generation(item, inode->i_generation);
165         btrfs_set_inode_block_group(item,
166                                     BTRFS_I(inode)->block_group->key.objectid);
167 }
168
169 static int btrfs_update_inode(struct btrfs_trans_handle *trans,
170                               struct btrfs_root *root,
171                               struct inode *inode)
172 {
173         struct btrfs_inode_item *inode_item;
174         struct btrfs_path *path;
175         int ret;
176
177         path = btrfs_alloc_path();
178         BUG_ON(!path);
179         ret = btrfs_lookup_inode(trans, root, path,
180                                  &BTRFS_I(inode)->location, 1);
181         if (ret) {
182                 if (ret > 0)
183                         ret = -ENOENT;
184                 goto failed;
185         }
186
187         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
188                                   path->slots[0],
189                                   struct btrfs_inode_item);
190
191         fill_inode_item(inode_item, inode);
192         btrfs_mark_buffer_dirty(path->nodes[0]);
193         ret = 0;
194 failed:
195         btrfs_release_path(root, path);
196         btrfs_free_path(path);
197         return ret;
198 }
199
200
201 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
202                               struct btrfs_root *root,
203                               struct inode *dir,
204                               struct dentry *dentry)
205 {
206         struct btrfs_path *path;
207         const char *name = dentry->d_name.name;
208         int name_len = dentry->d_name.len;
209         int ret = 0;
210         u64 objectid;
211         struct btrfs_dir_item *di;
212
213         path = btrfs_alloc_path();
214         BUG_ON(!path);
215         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
216                                     name, name_len, -1);
217         if (IS_ERR(di)) {
218                 ret = PTR_ERR(di);
219                 goto err;
220         }
221         if (!di) {
222                 ret = -ENOENT;
223                 goto err;
224         }
225         objectid = btrfs_disk_key_objectid(&di->location);
226         ret = btrfs_delete_one_dir_name(trans, root, path, di);
227         BUG_ON(ret);
228         btrfs_release_path(root, path);
229
230         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
231                                          objectid, name, name_len, -1);
232         if (IS_ERR(di)) {
233                 ret = PTR_ERR(di);
234                 goto err;
235         }
236         if (!di) {
237                 ret = -ENOENT;
238                 goto err;
239         }
240         ret = btrfs_delete_one_dir_name(trans, root, path, di);
241         BUG_ON(ret);
242
243         dentry->d_inode->i_ctime = dir->i_ctime;
244 err:
245         btrfs_free_path(path);
246         if (!ret) {
247                 dir->i_size -= name_len * 2;
248                 btrfs_update_inode(trans, root, dir);
249                 drop_nlink(dentry->d_inode);
250                 btrfs_update_inode(trans, root, dentry->d_inode);
251                 dir->i_sb->s_dirt = 1;
252         }
253         return ret;
254 }
255
256 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
257 {
258         struct btrfs_root *root;
259         struct btrfs_trans_handle *trans;
260         int ret;
261
262         root = BTRFS_I(dir)->root;
263         mutex_lock(&root->fs_info->fs_mutex);
264         trans = btrfs_start_transaction(root, 1);
265         btrfs_set_trans_block_group(trans, dir);
266         ret = btrfs_unlink_trans(trans, root, dir, dentry);
267         btrfs_end_transaction(trans, root);
268         mutex_unlock(&root->fs_info->fs_mutex);
269         btrfs_btree_balance_dirty(root);
270         return ret;
271 }
272
273 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
274 {
275         struct inode *inode = dentry->d_inode;
276         int err;
277         int ret;
278         struct btrfs_root *root = BTRFS_I(dir)->root;
279         struct btrfs_path *path;
280         struct btrfs_key key;
281         struct btrfs_trans_handle *trans;
282         struct btrfs_key found_key;
283         int found_type;
284         struct btrfs_leaf *leaf;
285         char *goodnames = "..";
286
287         path = btrfs_alloc_path();
288         BUG_ON(!path);
289         mutex_lock(&root->fs_info->fs_mutex);
290         trans = btrfs_start_transaction(root, 1);
291         btrfs_set_trans_block_group(trans, dir);
292         key.objectid = inode->i_ino;
293         key.offset = (u64)-1;
294         key.flags = (u32)-1;
295         while(1) {
296                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
297                 if (ret < 0) {
298                         err = ret;
299                         goto out;
300                 }
301                 BUG_ON(ret == 0);
302                 if (path->slots[0] == 0) {
303                         err = -ENOENT;
304                         goto out;
305                 }
306                 path->slots[0]--;
307                 leaf = btrfs_buffer_leaf(path->nodes[0]);
308                 btrfs_disk_key_to_cpu(&found_key,
309                                       &leaf->items[path->slots[0]].key);
310                 found_type = btrfs_key_type(&found_key);
311                 if (found_key.objectid != inode->i_ino) {
312                         err = -ENOENT;
313                         goto out;
314                 }
315                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
316                      found_type != BTRFS_DIR_INDEX_KEY) ||
317                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
318                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
319                         err = -ENOTEMPTY;
320                         goto out;
321                 }
322                 ret = btrfs_del_item(trans, root, path);
323                 BUG_ON(ret);
324
325                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
326                         break;
327                 btrfs_release_path(root, path);
328         }
329         ret = 0;
330         btrfs_release_path(root, path);
331
332         /* now the directory is empty */
333         err = btrfs_unlink_trans(trans, root, dir, dentry);
334         if (!err) {
335                 inode->i_size = 0;
336         }
337 out:
338         btrfs_release_path(root, path);
339         btrfs_free_path(path);
340         mutex_unlock(&root->fs_info->fs_mutex);
341         ret = btrfs_end_transaction(trans, root);
342         btrfs_btree_balance_dirty(root);
343         if (ret && !err)
344                 err = ret;
345         return err;
346 }
347
348 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
349                             struct btrfs_root *root,
350                             struct inode *inode)
351 {
352         struct btrfs_path *path;
353         int ret;
354
355         clear_inode(inode);
356
357         path = btrfs_alloc_path();
358         BUG_ON(!path);
359         ret = btrfs_lookup_inode(trans, root, path,
360                                  &BTRFS_I(inode)->location, -1);
361         BUG_ON(ret);
362         ret = btrfs_del_item(trans, root, path);
363         BUG_ON(ret);
364         btrfs_free_path(path);
365         return ret;
366 }
367
368 /*
369  * truncates go from a high offset to a low offset.  So, walk
370  * from hi to lo in the node and issue readas.  Stop when you find
371  * keys from a different objectid
372  */
373 static void reada_truncate(struct btrfs_root *root, struct btrfs_path *path,
374                            u64 objectid)
375 {
376         struct btrfs_node *node;
377         int i;
378         int nritems;
379         u64 item_objectid;
380         u64 blocknr;
381         int slot;
382         int ret;
383
384         if (!path->nodes[1])
385                 return;
386         node = btrfs_buffer_node(path->nodes[1]);
387         slot = path->slots[1];
388         if (slot == 0)
389                 return;
390         nritems = btrfs_header_nritems(&node->header);
391         for (i = slot - 1; i >= 0; i--) {
392                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
393                 if (item_objectid != objectid)
394                         break;
395                 blocknr = btrfs_node_blockptr(node, i);
396                 ret = readahead_tree_block(root, blocknr);
397                 if (ret)
398                         break;
399         }
400 }
401
402 /*
403  * this can truncate away extent items, csum items and directory items.
404  * It starts at a high offset and removes keys until it can't find
405  * any higher than i_size.
406  *
407  * csum items that cross the new i_size are truncated to the new size
408  * as well.
409  */
410 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
411                                    struct btrfs_root *root,
412                                    struct inode *inode)
413 {
414         int ret;
415         struct btrfs_path *path;
416         struct btrfs_key key;
417         struct btrfs_disk_key *found_key;
418         u32 found_type;
419         struct btrfs_leaf *leaf;
420         struct btrfs_file_extent_item *fi;
421         u64 extent_start = 0;
422         u64 extent_num_blocks = 0;
423         u64 item_end = 0;
424         int found_extent;
425         int del_item;
426
427         path = btrfs_alloc_path();
428         BUG_ON(!path);
429         /* FIXME, add redo link to tree so we don't leak on crash */
430         key.objectid = inode->i_ino;
431         key.offset = (u64)-1;
432         key.flags = (u32)-1;
433         while(1) {
434                 btrfs_init_path(path);
435                 fi = NULL;
436                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
437                 if (ret < 0) {
438                         goto error;
439                 }
440                 if (ret > 0) {
441                         BUG_ON(path->slots[0] == 0);
442                         path->slots[0]--;
443                 }
444                 reada_truncate(root, path, inode->i_ino);
445                 leaf = btrfs_buffer_leaf(path->nodes[0]);
446                 found_key = &leaf->items[path->slots[0]].key;
447                 found_type = btrfs_disk_key_type(found_key);
448
449                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
450                         break;
451                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
452                     found_type != BTRFS_DIR_ITEM_KEY &&
453                     found_type != BTRFS_DIR_INDEX_KEY &&
454                     found_type != BTRFS_EXTENT_DATA_KEY)
455                         break;
456
457                 item_end = btrfs_disk_key_offset(found_key);
458                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
459                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
460                                             path->slots[0],
461                                             struct btrfs_file_extent_item);
462                         if (btrfs_file_extent_type(fi) !=
463                             BTRFS_FILE_EXTENT_INLINE) {
464                                 item_end += btrfs_file_extent_num_blocks(fi) <<
465                                                 inode->i_blkbits;
466                         }
467                 }
468                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
469                         ret = btrfs_csum_truncate(trans, root, path,
470                                                   inode->i_size);
471                         BUG_ON(ret);
472                 }
473                 if (item_end < inode->i_size) {
474                         if (found_type) {
475                                 btrfs_set_key_type(&key, found_type - 1);
476                                 continue;
477                         }
478                         break;
479                 }
480                 if (btrfs_disk_key_offset(found_key) >= inode->i_size)
481                         del_item = 1;
482                 else
483                         del_item = 0;
484                 found_extent = 0;
485
486                 /* FIXME, shrink the extent if the ref count is only 1 */
487                 if (found_type == BTRFS_EXTENT_DATA_KEY &&
488                            btrfs_file_extent_type(fi) !=
489                            BTRFS_FILE_EXTENT_INLINE) {
490                         u64 num_dec;
491                         if (!del_item) {
492                                 u64 orig_num_blocks =
493                                         btrfs_file_extent_num_blocks(fi);
494                                 extent_num_blocks = inode->i_size -
495                                         btrfs_disk_key_offset(found_key) +
496                                         root->blocksize - 1;
497                                 extent_num_blocks >>= inode->i_blkbits;
498                                 btrfs_set_file_extent_num_blocks(fi,
499                                                          extent_num_blocks);
500                                 inode->i_blocks -= (orig_num_blocks -
501                                         extent_num_blocks) << 3;
502                                 mark_buffer_dirty(path->nodes[0]);
503                         } else {
504                                 extent_start =
505                                         btrfs_file_extent_disk_blocknr(fi);
506                                 extent_num_blocks =
507                                         btrfs_file_extent_disk_num_blocks(fi);
508                                 /* FIXME blocksize != 4096 */
509                                 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
510                                 if (extent_start != 0) {
511                                         found_extent = 1;
512                                         inode->i_blocks -= num_dec;
513                                 }
514                         }
515                 }
516                 if (del_item) {
517                         ret = btrfs_del_item(trans, root, path);
518                         BUG_ON(ret);
519                 } else {
520                         break;
521                 }
522                 btrfs_release_path(root, path);
523                 if (found_extent) {
524                         ret = btrfs_free_extent(trans, root, extent_start,
525                                                 extent_num_blocks, 0);
526                         BUG_ON(ret);
527                 }
528         }
529         ret = 0;
530 error:
531         btrfs_release_path(root, path);
532         btrfs_free_path(path);
533         inode->i_sb->s_dirt = 1;
534         return ret;
535 }
536
537 /*
538  * taken from block_truncate_page, but does cow as it zeros out
539  * any bytes left in the last page in the file.
540  */
541 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
542 {
543         struct inode *inode = mapping->host;
544         unsigned blocksize = 1 << inode->i_blkbits;
545         pgoff_t index = from >> PAGE_CACHE_SHIFT;
546         unsigned offset = from & (PAGE_CACHE_SIZE-1);
547         struct page *page;
548         char *kaddr;
549         int ret = 0;
550         struct btrfs_root *root = BTRFS_I(inode)->root;
551         u64 alloc_hint = 0;
552         struct btrfs_key ins;
553         struct btrfs_trans_handle *trans;
554
555         if ((offset & (blocksize - 1)) == 0)
556                 goto out;
557
558         ret = -ENOMEM;
559         page = grab_cache_page(mapping, index);
560         if (!page)
561                 goto out;
562
563         if (!PageUptodate(page)) {
564                 ret = mpage_readpage(page, btrfs_get_block);
565                 lock_page(page);
566                 if (!PageUptodate(page)) {
567                         ret = -EIO;
568                         goto out;
569                 }
570         }
571         mutex_lock(&root->fs_info->fs_mutex);
572         trans = btrfs_start_transaction(root, 1);
573         btrfs_set_trans_block_group(trans, inode);
574
575         ret = btrfs_drop_extents(trans, root, inode,
576                                  page->index << PAGE_CACHE_SHIFT,
577                                  (page->index + 1) << PAGE_CACHE_SHIFT,
578                                  &alloc_hint);
579         BUG_ON(ret);
580         ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1,
581                                  alloc_hint, (u64)-1, &ins, 1);
582         BUG_ON(ret);
583         ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
584                                        page->index << PAGE_CACHE_SHIFT,
585                                        ins.objectid, 1, 1);
586         BUG_ON(ret);
587         SetPageChecked(page);
588         kaddr = kmap(page);
589         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
590         flush_dcache_page(page);
591         btrfs_csum_file_block(trans, root, inode->i_ino,
592                               page->index << PAGE_CACHE_SHIFT,
593                               kaddr, PAGE_CACHE_SIZE);
594         kunmap(page);
595         btrfs_end_transaction(trans, root);
596         mutex_unlock(&root->fs_info->fs_mutex);
597
598         set_page_dirty(page);
599         unlock_page(page);
600         page_cache_release(page);
601 out:
602         return ret;
603 }
604
605 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
606 {
607         struct inode *inode = dentry->d_inode;
608         int err;
609
610         err = inode_change_ok(inode, attr);
611         if (err)
612                 return err;
613
614         if (S_ISREG(inode->i_mode) &&
615             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
616                 struct btrfs_trans_handle *trans;
617                 struct btrfs_root *root = BTRFS_I(inode)->root;
618                 u64 mask = root->blocksize - 1;
619                 u64 pos = (inode->i_size + mask) & ~mask;
620                 u64 hole_size;
621
622                 if (attr->ia_size <= pos)
623                         goto out;
624
625                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
626
627                 hole_size = (attr->ia_size - pos + mask) & ~mask;
628                 hole_size >>= inode->i_blkbits;
629
630                 mutex_lock(&root->fs_info->fs_mutex);
631                 trans = btrfs_start_transaction(root, 1);
632                 btrfs_set_trans_block_group(trans, inode);
633                 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
634                                                pos, 0, 0, hole_size);
635                 BUG_ON(err);
636                 btrfs_end_transaction(trans, root);
637                 mutex_unlock(&root->fs_info->fs_mutex);
638         }
639 out:
640         err = inode_setattr(inode, attr);
641
642         return err;
643 }
644 void btrfs_delete_inode(struct inode *inode)
645 {
646         struct btrfs_trans_handle *trans;
647         struct btrfs_root *root = BTRFS_I(inode)->root;
648         int ret;
649
650         truncate_inode_pages(&inode->i_data, 0);
651         if (is_bad_inode(inode)) {
652                 goto no_delete;
653         }
654         inode->i_size = 0;
655         mutex_lock(&root->fs_info->fs_mutex);
656         trans = btrfs_start_transaction(root, 1);
657         btrfs_set_trans_block_group(trans, inode);
658         ret = btrfs_truncate_in_trans(trans, root, inode);
659         BUG_ON(ret);
660         btrfs_free_inode(trans, root, inode);
661         btrfs_end_transaction(trans, root);
662         mutex_unlock(&root->fs_info->fs_mutex);
663         btrfs_btree_balance_dirty(root);
664         return;
665 no_delete:
666         clear_inode(inode);
667 }
668
669 /*
670  * this returns the key found in the dir entry in the location pointer.
671  * If no dir entries were found, location->objectid is 0.
672  */
673 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
674                                struct btrfs_key *location)
675 {
676         const char *name = dentry->d_name.name;
677         int namelen = dentry->d_name.len;
678         struct btrfs_dir_item *di;
679         struct btrfs_path *path;
680         struct btrfs_root *root = BTRFS_I(dir)->root;
681         int ret;
682
683         path = btrfs_alloc_path();
684         BUG_ON(!path);
685         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
686                                     namelen, 0);
687         if (!di || IS_ERR(di)) {
688                 location->objectid = 0;
689                 ret = 0;
690                 goto out;
691         }
692         btrfs_disk_key_to_cpu(location, &di->location);
693 out:
694         btrfs_release_path(root, path);
695         btrfs_free_path(path);
696         return ret;
697 }
698
699 /*
700  * when we hit a tree root in a directory, the btrfs part of the inode
701  * needs to be changed to reflect the root directory of the tree root.  This
702  * is kind of like crossing a mount point.
703  */
704 static int fixup_tree_root_location(struct btrfs_root *root,
705                              struct btrfs_key *location,
706                              struct btrfs_root **sub_root)
707 {
708         struct btrfs_path *path;
709         struct btrfs_root_item *ri;
710
711         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
712                 return 0;
713         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
714                 return 0;
715
716         path = btrfs_alloc_path();
717         BUG_ON(!path);
718         mutex_lock(&root->fs_info->fs_mutex);
719
720         *sub_root = btrfs_read_fs_root(root->fs_info, location);
721         if (IS_ERR(*sub_root))
722                 return PTR_ERR(*sub_root);
723
724         ri = &(*sub_root)->root_item;
725         location->objectid = btrfs_root_dirid(ri);
726         location->flags = 0;
727         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
728         location->offset = 0;
729
730         btrfs_free_path(path);
731         mutex_unlock(&root->fs_info->fs_mutex);
732         return 0;
733 }
734
735 static int btrfs_init_locked_inode(struct inode *inode, void *p)
736 {
737         struct btrfs_iget_args *args = p;
738         inode->i_ino = args->ino;
739         BTRFS_I(inode)->root = args->root;
740         return 0;
741 }
742
743 static int btrfs_find_actor(struct inode *inode, void *opaque)
744 {
745         struct btrfs_iget_args *args = opaque;
746         return (args->ino == inode->i_ino &&
747                 args->root == BTRFS_I(inode)->root);
748 }
749
750 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
751                                 struct btrfs_root *root)
752 {
753         struct inode *inode;
754         struct btrfs_iget_args args;
755         args.ino = objectid;
756         args.root = root;
757
758         inode = iget5_locked(s, objectid, btrfs_find_actor,
759                              btrfs_init_locked_inode,
760                              (void *)&args);
761         return inode;
762 }
763
764 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
765                                    struct nameidata *nd)
766 {
767         struct inode * inode;
768         struct btrfs_inode *bi = BTRFS_I(dir);
769         struct btrfs_root *root = bi->root;
770         struct btrfs_root *sub_root = root;
771         struct btrfs_key location;
772         int ret;
773
774         if (dentry->d_name.len > BTRFS_NAME_LEN)
775                 return ERR_PTR(-ENAMETOOLONG);
776         mutex_lock(&root->fs_info->fs_mutex);
777         ret = btrfs_inode_by_name(dir, dentry, &location);
778         mutex_unlock(&root->fs_info->fs_mutex);
779         if (ret < 0)
780                 return ERR_PTR(ret);
781         inode = NULL;
782         if (location.objectid) {
783                 ret = fixup_tree_root_location(root, &location, &sub_root);
784                 if (ret < 0)
785                         return ERR_PTR(ret);
786                 if (ret > 0)
787                         return ERR_PTR(-ENOENT);
788                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
789                                           sub_root);
790                 if (!inode)
791                         return ERR_PTR(-EACCES);
792                 if (inode->i_state & I_NEW) {
793                         /* the inode and parent dir are two different roots */
794                         if (sub_root != root) {
795                                 igrab(inode);
796                                 sub_root->inode = inode;
797                         }
798                         BTRFS_I(inode)->root = sub_root;
799                         memcpy(&BTRFS_I(inode)->location, &location,
800                                sizeof(location));
801                         btrfs_read_locked_inode(inode);
802                         unlock_new_inode(inode);
803                 }
804         }
805         return d_splice_alias(inode, dentry);
806 }
807
808 /*
809  * readahead one full node of leaves as long as their keys include
810  * the objectid supplied
811  */
812 static void reada_leaves(struct btrfs_root *root, struct btrfs_path *path,
813                          u64 objectid)
814 {
815         struct btrfs_node *node;
816         int i;
817         u32 nritems;
818         u64 item_objectid;
819         u64 blocknr;
820         int slot;
821         int ret;
822
823         if (!path->nodes[1])
824                 return;
825         node = btrfs_buffer_node(path->nodes[1]);
826         slot = path->slots[1];
827         nritems = btrfs_header_nritems(&node->header);
828         for (i = slot + 1; i < nritems; i++) {
829                 item_objectid = btrfs_disk_key_objectid(&node->ptrs[i].key);
830                 if (item_objectid != objectid)
831                         break;
832                 blocknr = btrfs_node_blockptr(node, i);
833                 ret = readahead_tree_block(root, blocknr);
834                 if (ret)
835                         break;
836         }
837 }
838 static unsigned char btrfs_filetype_table[] = {
839         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
840 };
841
842 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
843 {
844         struct inode *inode = filp->f_path.dentry->d_inode;
845         struct btrfs_root *root = BTRFS_I(inode)->root;
846         struct btrfs_item *item;
847         struct btrfs_dir_item *di;
848         struct btrfs_key key;
849         struct btrfs_path *path;
850         int ret;
851         u32 nritems;
852         struct btrfs_leaf *leaf;
853         int slot;
854         int advance;
855         unsigned char d_type;
856         int over = 0;
857         u32 di_cur;
858         u32 di_total;
859         u32 di_len;
860         int key_type = BTRFS_DIR_INDEX_KEY;
861
862         /* FIXME, use a real flag for deciding about the key type */
863         if (root->fs_info->tree_root == root)
864                 key_type = BTRFS_DIR_ITEM_KEY;
865         mutex_lock(&root->fs_info->fs_mutex);
866         key.objectid = inode->i_ino;
867         key.flags = 0;
868         btrfs_set_key_type(&key, key_type);
869         key.offset = filp->f_pos;
870         path = btrfs_alloc_path();
871         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
872         if (ret < 0)
873                 goto err;
874         advance = 0;
875         reada_leaves(root, path, inode->i_ino);
876         while(1) {
877                 leaf = btrfs_buffer_leaf(path->nodes[0]);
878                 nritems = btrfs_header_nritems(&leaf->header);
879                 slot = path->slots[0];
880                 if (advance || slot >= nritems) {
881                         if (slot >= nritems -1) {
882                                 reada_leaves(root, path, inode->i_ino);
883                                 ret = btrfs_next_leaf(root, path);
884                                 if (ret)
885                                         break;
886                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
887                                 nritems = btrfs_header_nritems(&leaf->header);
888                                 slot = path->slots[0];
889                         } else {
890                                 slot++;
891                                 path->slots[0]++;
892                         }
893                 }
894                 advance = 1;
895                 item = leaf->items + slot;
896                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
897                         break;
898                 if (btrfs_disk_key_type(&item->key) != key_type)
899                         break;
900                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
901                         continue;
902                 filp->f_pos = btrfs_disk_key_offset(&item->key);
903                 advance = 1;
904                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
905                 di_cur = 0;
906                 di_total = btrfs_item_size(leaf->items + slot);
907                 while(di_cur < di_total) {
908                         d_type = btrfs_filetype_table[btrfs_dir_type(di)];
909                         over = filldir(dirent, (const char *)(di + 1),
910                                        btrfs_dir_name_len(di),
911                                        btrfs_disk_key_offset(&item->key),
912                                        btrfs_disk_key_objectid(&di->location),
913                                        d_type);
914                         if (over)
915                                 goto nopos;
916                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
917                         di_cur += di_len;
918                         di = (struct btrfs_dir_item *)((char *)di + di_len);
919                 }
920         }
921         filp->f_pos++;
922 nopos:
923         ret = 0;
924 err:
925         btrfs_release_path(root, path);
926         btrfs_free_path(path);
927         mutex_unlock(&root->fs_info->fs_mutex);
928         return ret;
929 }
930
931 int btrfs_write_inode(struct inode *inode, int wait)
932 {
933         struct btrfs_root *root = BTRFS_I(inode)->root;
934         struct btrfs_trans_handle *trans;
935         int ret = 0;
936
937         if (wait) {
938                 mutex_lock(&root->fs_info->fs_mutex);
939                 trans = btrfs_start_transaction(root, 1);
940                 btrfs_set_trans_block_group(trans, inode);
941                 ret = btrfs_commit_transaction(trans, root);
942                 mutex_unlock(&root->fs_info->fs_mutex);
943         }
944         return ret;
945 }
946
947 /*
948  * This is somewhat expense, updating the tree every time the
949  * inode changes.  But, it is most likely to find the inode in cache.
950  * FIXME, needs more benchmarking...there are no reasons other than performance
951  * to keep or drop this code.
952  */
953 void btrfs_dirty_inode(struct inode *inode)
954 {
955         struct btrfs_root *root = BTRFS_I(inode)->root;
956         struct btrfs_trans_handle *trans;
957
958         mutex_lock(&root->fs_info->fs_mutex);
959         trans = btrfs_start_transaction(root, 1);
960         btrfs_set_trans_block_group(trans, inode);
961         btrfs_update_inode(trans, root, inode);
962         btrfs_end_transaction(trans, root);
963         mutex_unlock(&root->fs_info->fs_mutex);
964         btrfs_btree_balance_dirty(root);
965 }
966
967 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
968                                      struct btrfs_root *root,
969                                      u64 objectid,
970                                      struct btrfs_block_group_cache *group,
971                                      int mode)
972 {
973         struct inode *inode;
974         struct btrfs_inode_item inode_item;
975         struct btrfs_key *location;
976         int ret;
977         int owner;
978
979         inode = new_inode(root->fs_info->sb);
980         if (!inode)
981                 return ERR_PTR(-ENOMEM);
982
983         BTRFS_I(inode)->root = root;
984         if (mode & S_IFDIR)
985                 owner = 0;
986         else
987                 owner = 1;
988         group = btrfs_find_block_group(root, group, 0, 0, owner);
989         BTRFS_I(inode)->block_group = group;
990
991         inode->i_uid = current->fsuid;
992         inode->i_gid = current->fsgid;
993         inode->i_mode = mode;
994         inode->i_ino = objectid;
995         inode->i_blocks = 0;
996         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
997         fill_inode_item(&inode_item, inode);
998         location = &BTRFS_I(inode)->location;
999         location->objectid = objectid;
1000         location->flags = 0;
1001         location->offset = 0;
1002         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
1003
1004         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
1005         BUG_ON(ret);
1006
1007         insert_inode_hash(inode);
1008         return inode;
1009 }
1010
1011 static inline u8 btrfs_inode_type(struct inode *inode)
1012 {
1013         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1014 }
1015
1016 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1017                             struct dentry *dentry, struct inode *inode)
1018 {
1019         int ret;
1020         struct btrfs_key key;
1021         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1022         key.objectid = inode->i_ino;
1023         key.flags = 0;
1024         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1025         key.offset = 0;
1026
1027         ret = btrfs_insert_dir_item(trans, root,
1028                                     dentry->d_name.name, dentry->d_name.len,
1029                                     dentry->d_parent->d_inode->i_ino,
1030                                     &key, btrfs_inode_type(inode));
1031         if (ret == 0) {
1032                 dentry->d_parent->d_inode->i_size += dentry->d_name.len * 2;
1033                 ret = btrfs_update_inode(trans, root,
1034                                          dentry->d_parent->d_inode);
1035         }
1036         return ret;
1037 }
1038
1039 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1040                             struct dentry *dentry, struct inode *inode)
1041 {
1042         int err = btrfs_add_link(trans, dentry, inode);
1043         if (!err) {
1044                 d_instantiate(dentry, inode);
1045                 return 0;
1046         }
1047         if (err > 0)
1048                 err = -EEXIST;
1049         return err;
1050 }
1051
1052 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1053                         int mode, struct nameidata *nd)
1054 {
1055         struct btrfs_trans_handle *trans;
1056         struct btrfs_root *root = BTRFS_I(dir)->root;
1057         struct inode *inode;
1058         int err;
1059         int drop_inode = 0;
1060         u64 objectid;
1061
1062         mutex_lock(&root->fs_info->fs_mutex);
1063         trans = btrfs_start_transaction(root, 1);
1064         btrfs_set_trans_block_group(trans, dir);
1065
1066         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1067         if (err) {
1068                 err = -ENOSPC;
1069                 goto out_unlock;
1070         }
1071
1072         inode = btrfs_new_inode(trans, root, objectid,
1073                                 BTRFS_I(dir)->block_group, mode);
1074         err = PTR_ERR(inode);
1075         if (IS_ERR(inode))
1076                 goto out_unlock;
1077
1078         btrfs_set_trans_block_group(trans, inode);
1079         err = btrfs_add_nondir(trans, dentry, inode);
1080         if (err)
1081                 drop_inode = 1;
1082         else {
1083                 inode->i_mapping->a_ops = &btrfs_aops;
1084                 inode->i_fop = &btrfs_file_operations;
1085                 inode->i_op = &btrfs_file_inode_operations;
1086         }
1087         dir->i_sb->s_dirt = 1;
1088         btrfs_update_inode_block_group(trans, inode);
1089         btrfs_update_inode_block_group(trans, dir);
1090 out_unlock:
1091         btrfs_end_transaction(trans, root);
1092         mutex_unlock(&root->fs_info->fs_mutex);
1093
1094         if (drop_inode) {
1095                 inode_dec_link_count(inode);
1096                 iput(inode);
1097         }
1098         btrfs_btree_balance_dirty(root);
1099         return err;
1100 }
1101
1102 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1103                       struct dentry *dentry)
1104 {
1105         struct btrfs_trans_handle *trans;
1106         struct btrfs_root *root = BTRFS_I(dir)->root;
1107         struct inode *inode = old_dentry->d_inode;
1108         int err;
1109         int drop_inode = 0;
1110
1111         if (inode->i_nlink == 0)
1112                 return -ENOENT;
1113
1114         inc_nlink(inode);
1115         mutex_lock(&root->fs_info->fs_mutex);
1116         trans = btrfs_start_transaction(root, 1);
1117         btrfs_set_trans_block_group(trans, dir);
1118         atomic_inc(&inode->i_count);
1119         err = btrfs_add_nondir(trans, dentry, inode);
1120         if (err)
1121                 drop_inode = 1;
1122         dir->i_sb->s_dirt = 1;
1123         btrfs_update_inode_block_group(trans, dir);
1124         btrfs_update_inode(trans, root, inode);
1125
1126         btrfs_end_transaction(trans, root);
1127         mutex_unlock(&root->fs_info->fs_mutex);
1128
1129         if (drop_inode) {
1130                 inode_dec_link_count(inode);
1131                 iput(inode);
1132         }
1133         btrfs_btree_balance_dirty(root);
1134         return err;
1135 }
1136
1137 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1138                                 struct btrfs_root *root,
1139                                 u64 objectid, u64 dirid)
1140 {
1141         int ret;
1142         char buf[2];
1143         struct btrfs_key key;
1144
1145         buf[0] = '.';
1146         buf[1] = '.';
1147
1148         key.objectid = objectid;
1149         key.offset = 0;
1150         key.flags = 0;
1151         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1152
1153         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1154                                     &key, BTRFS_FT_DIR);
1155         if (ret)
1156                 goto error;
1157         key.objectid = dirid;
1158         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1159                                     &key, BTRFS_FT_DIR);
1160         if (ret)
1161                 goto error;
1162 error:
1163         return ret;
1164 }
1165
1166 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1167 {
1168         struct inode *inode;
1169         struct btrfs_trans_handle *trans;
1170         struct btrfs_root *root = BTRFS_I(dir)->root;
1171         int err = 0;
1172         int drop_on_err = 0;
1173         u64 objectid;
1174
1175         mutex_lock(&root->fs_info->fs_mutex);
1176         trans = btrfs_start_transaction(root, 1);
1177         btrfs_set_trans_block_group(trans, dir);
1178         if (IS_ERR(trans)) {
1179                 err = PTR_ERR(trans);
1180                 goto out_unlock;
1181         }
1182
1183         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1184         if (err) {
1185                 err = -ENOSPC;
1186                 goto out_unlock;
1187         }
1188
1189         inode = btrfs_new_inode(trans, root, objectid,
1190                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1191         if (IS_ERR(inode)) {
1192                 err = PTR_ERR(inode);
1193                 goto out_fail;
1194         }
1195         drop_on_err = 1;
1196         inode->i_op = &btrfs_dir_inode_operations;
1197         inode->i_fop = &btrfs_dir_file_operations;
1198         btrfs_set_trans_block_group(trans, inode);
1199
1200         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1201         if (err)
1202                 goto out_fail;
1203
1204         inode->i_size = 6;
1205         err = btrfs_update_inode(trans, root, inode);
1206         if (err)
1207                 goto out_fail;
1208         err = btrfs_add_link(trans, dentry, inode);
1209         if (err)
1210                 goto out_fail;
1211         d_instantiate(dentry, inode);
1212         drop_on_err = 0;
1213         dir->i_sb->s_dirt = 1;
1214         btrfs_update_inode_block_group(trans, inode);
1215         btrfs_update_inode_block_group(trans, dir);
1216
1217 out_fail:
1218         btrfs_end_transaction(trans, root);
1219 out_unlock:
1220         mutex_unlock(&root->fs_info->fs_mutex);
1221         if (drop_on_err)
1222                 iput(inode);
1223         btrfs_btree_balance_dirty(root);
1224         return err;
1225 }
1226
1227 /*
1228  * FIBMAP and others want to pass in a fake buffer head.  They need to
1229  * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy
1230  * any packed file data into the fake bh
1231  */
1232 #define BTRFS_GET_BLOCK_NO_CREATE 0
1233 #define BTRFS_GET_BLOCK_CREATE 1
1234 #define BTRFS_GET_BLOCK_NO_DIRECT 2
1235
1236 /*
1237  * FIXME create==1 doe not work.
1238  */
1239 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1240                                 struct buffer_head *result, int create)
1241 {
1242         int ret;
1243         int err = 0;
1244         u64 blocknr;
1245         u64 extent_start = 0;
1246         u64 extent_end = 0;
1247         u64 objectid = inode->i_ino;
1248         u32 found_type;
1249         u64 alloc_hint = 0;
1250         struct btrfs_path *path;
1251         struct btrfs_root *root = BTRFS_I(inode)->root;
1252         struct btrfs_file_extent_item *item;
1253         struct btrfs_leaf *leaf;
1254         struct btrfs_disk_key *found_key;
1255         struct btrfs_trans_handle *trans = NULL;
1256
1257         path = btrfs_alloc_path();
1258         BUG_ON(!path);
1259         if (create & BTRFS_GET_BLOCK_CREATE) {
1260                 WARN_ON(1);
1261                 /* this almost but not quite works */
1262                 trans = btrfs_start_transaction(root, 1);
1263                 if (!trans) {
1264                         err = -ENOMEM;
1265                         goto out;
1266                 }
1267                 ret = btrfs_drop_extents(trans, root, inode,
1268                                          iblock << inode->i_blkbits,
1269                                          (iblock + 1) << inode->i_blkbits,
1270                                          &alloc_hint);
1271                 BUG_ON(ret);
1272         }
1273
1274         ret = btrfs_lookup_file_extent(NULL, root, path,
1275                                        objectid,
1276                                        iblock << inode->i_blkbits, 0);
1277         if (ret < 0) {
1278                 err = ret;
1279                 goto out;
1280         }
1281
1282         if (ret != 0) {
1283                 if (path->slots[0] == 0) {
1284                         btrfs_release_path(root, path);
1285                         goto not_found;
1286                 }
1287                 path->slots[0]--;
1288         }
1289
1290         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1291                               struct btrfs_file_extent_item);
1292         leaf = btrfs_buffer_leaf(path->nodes[0]);
1293         blocknr = btrfs_file_extent_disk_blocknr(item);
1294         blocknr += btrfs_file_extent_offset(item);
1295
1296         /* are we inside the extent that was found? */
1297         found_key = &leaf->items[path->slots[0]].key;
1298         found_type = btrfs_disk_key_type(found_key);
1299         if (btrfs_disk_key_objectid(found_key) != objectid ||
1300             found_type != BTRFS_EXTENT_DATA_KEY) {
1301                 extent_end = 0;
1302                 extent_start = 0;
1303                 goto not_found;
1304         }
1305         found_type = btrfs_file_extent_type(item);
1306         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1307         if (found_type == BTRFS_FILE_EXTENT_REG) {
1308                 extent_start = extent_start >> inode->i_blkbits;
1309                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1310                 err = 0;
1311                 if (btrfs_file_extent_disk_blocknr(item) == 0)
1312                         goto out;
1313                 if (iblock >= extent_start && iblock < extent_end) {
1314                         btrfs_map_bh_to_logical(root, result, blocknr +
1315                                                 iblock - extent_start);
1316                         goto out;
1317                 }
1318         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1319                 char *ptr;
1320                 char *map;
1321                 u32 size;
1322
1323                 if (create & BTRFS_GET_BLOCK_NO_DIRECT) {
1324                         err = -EINVAL;
1325                         goto out;
1326                 }
1327                 size = btrfs_file_extent_inline_len(leaf->items +
1328                                                     path->slots[0]);
1329                 extent_end = (extent_start + size) >> inode->i_blkbits;
1330                 extent_start >>= inode->i_blkbits;
1331                 if (iblock < extent_start || iblock > extent_end) {
1332                         goto not_found;
1333                 }
1334                 ptr = btrfs_file_extent_inline_start(item);
1335                 map = kmap(result->b_page);
1336                 memcpy(map, ptr, size);
1337                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1338                 flush_dcache_page(result->b_page);
1339                 kunmap(result->b_page);
1340                 set_buffer_uptodate(result);
1341                 SetPageChecked(result->b_page);
1342                 btrfs_map_bh_to_logical(root, result, 0);
1343         }
1344 not_found:
1345         if (create & BTRFS_GET_BLOCK_CREATE) {
1346                 struct btrfs_key ins;
1347                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1348                                          1, alloc_hint, (u64)-1,
1349                                          &ins, 1);
1350                 BUG_ON(ret);
1351                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1352                                                iblock << inode->i_blkbits,
1353                                                ins.objectid, ins.offset,
1354                                                ins.offset);
1355                 BUG_ON(ret);
1356                 SetPageChecked(result->b_page);
1357                 btrfs_map_bh_to_logical(root, result, ins.objectid);
1358         }
1359 out:
1360         if (trans)
1361                 err = btrfs_end_transaction(trans, root);
1362         btrfs_free_path(path);
1363         return err;
1364 }
1365
1366 int btrfs_get_block(struct inode *inode, sector_t iblock,
1367                     struct buffer_head *result, int create)
1368 {
1369         int err;
1370         struct btrfs_root *root = BTRFS_I(inode)->root;
1371         mutex_lock(&root->fs_info->fs_mutex);
1372         err = btrfs_get_block_lock(inode, iblock, result, create);
1373         mutex_unlock(&root->fs_info->fs_mutex);
1374         return err;
1375 }
1376
1377 static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
1378                            struct buffer_head *result, int create)
1379 {
1380         struct btrfs_root *root = BTRFS_I(inode)->root;
1381         mutex_lock(&root->fs_info->fs_mutex);
1382         btrfs_get_block_lock(inode, iblock, result, BTRFS_GET_BLOCK_NO_DIRECT);
1383         mutex_unlock(&root->fs_info->fs_mutex);
1384         return 0;
1385 }
1386
1387 static sector_t btrfs_bmap(struct address_space *as, sector_t block)
1388 {
1389         return generic_block_bmap(as, block, btrfs_get_block_bmap);
1390 }
1391
1392 static int btrfs_prepare_write(struct file *file, struct page *page,
1393                                unsigned from, unsigned to)
1394 {
1395         return block_prepare_write(page, from, to, btrfs_get_block);
1396 }
1397
1398 static int btrfs_readpage(struct file *file, struct page *page)
1399 {
1400         return mpage_readpage(page, btrfs_get_block);
1401 }
1402
1403 /*
1404  * Aside from a tiny bit of packed file data handling, this is the
1405  * same as the generic code.
1406  *
1407  * While block_write_full_page is writing back the dirty buffers under
1408  * the page lock, whoever dirtied the buffers may decide to clean them
1409  * again at any time.  We handle that by only looking at the buffer
1410  * state inside lock_buffer().
1411  *
1412  * If block_write_full_page() is called for regular writeback
1413  * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a
1414  * locked buffer.   This only can happen if someone has written the buffer
1415  * directly, with submit_bh().  At the address_space level PageWriteback
1416  * prevents this contention from occurring.
1417  */
1418 static int __btrfs_write_full_page(struct inode *inode, struct page *page,
1419                                    struct writeback_control *wbc)
1420 {
1421         int err;
1422         sector_t block;
1423         sector_t last_block;
1424         struct buffer_head *bh, *head;
1425         const unsigned blocksize = 1 << inode->i_blkbits;
1426         int nr_underway = 0;
1427
1428         BUG_ON(!PageLocked(page));
1429
1430         last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
1431
1432         if (!page_has_buffers(page)) {
1433                 create_empty_buffers(page, blocksize,
1434                                         (1 << BH_Dirty)|(1 << BH_Uptodate));
1435         }
1436
1437         /*
1438          * Be very careful.  We have no exclusion from __set_page_dirty_buffers
1439          * here, and the (potentially unmapped) buffers may become dirty at
1440          * any time.  If a buffer becomes dirty here after we've inspected it
1441          * then we just miss that fact, and the page stays dirty.
1442          *
1443          * Buffers outside i_size may be dirtied by __set_page_dirty_buffers;
1444          * handle that here by just cleaning them.
1445          */
1446
1447         block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
1448         head = page_buffers(page);
1449         bh = head;
1450
1451         /*
1452          * Get all the dirty buffers mapped to disk addresses and
1453          * handle any aliases from the underlying blockdev's mapping.
1454          */
1455         do {
1456                 if (block > last_block) {
1457                         /*
1458                          * mapped buffers outside i_size will occur, because
1459                          * this page can be outside i_size when there is a
1460                          * truncate in progress.
1461                          */
1462                         /*
1463                          * The buffer was zeroed by block_write_full_page()
1464                          */
1465                         clear_buffer_dirty(bh);
1466                         set_buffer_uptodate(bh);
1467                 } else if (!buffer_mapped(bh) && buffer_dirty(bh)) {
1468                         WARN_ON(bh->b_size != blocksize);
1469                         err = btrfs_get_block(inode, block, bh, 0);
1470                         if (err) {
1471                                 goto recover;
1472                         }
1473                         if (buffer_new(bh)) {
1474                                 /* blockdev mappings never come here */
1475                                 clear_buffer_new(bh);
1476                         }
1477                 }
1478                 bh = bh->b_this_page;
1479                 block++;
1480         } while (bh != head);
1481
1482         do {
1483                 if (!buffer_mapped(bh))
1484                         continue;
1485                 /*
1486                  * If it's a fully non-blocking write attempt and we cannot
1487                  * lock the buffer then redirty the page.  Note that this can
1488                  * potentially cause a busy-wait loop from pdflush and kswapd
1489                  * activity, but those code paths have their own higher-level
1490                  * throttling.
1491                  */
1492                 if (wbc->sync_mode != WB_SYNC_NONE || !wbc->nonblocking) {
1493                         lock_buffer(bh);
1494                 } else if (test_set_buffer_locked(bh)) {
1495                         redirty_page_for_writepage(wbc, page);
1496                         continue;
1497                 }
1498                 if (test_clear_buffer_dirty(bh) && bh->b_blocknr != 0) {
1499                         mark_buffer_async_write(bh);
1500                 } else {
1501                         unlock_buffer(bh);
1502                 }
1503         } while ((bh = bh->b_this_page) != head);
1504
1505         /*
1506          * The page and its buffers are protected by PageWriteback(), so we can
1507          * drop the bh refcounts early.
1508          */
1509         BUG_ON(PageWriteback(page));
1510         set_page_writeback(page);
1511
1512         do {
1513                 struct buffer_head *next = bh->b_this_page;
1514                 if (buffer_async_write(bh)) {
1515                         submit_bh(WRITE, bh);
1516                         nr_underway++;
1517                 }
1518                 bh = next;
1519         } while (bh != head);
1520         unlock_page(page);
1521
1522         err = 0;
1523 done:
1524         if (nr_underway == 0) {
1525                 /*
1526                  * The page was marked dirty, but the buffers were
1527                  * clean.  Someone wrote them back by hand with
1528                  * ll_rw_block/submit_bh.  A rare case.
1529                  */
1530                 int uptodate = 1;
1531                 do {
1532                         if (!buffer_uptodate(bh)) {
1533                                 uptodate = 0;
1534                                 break;
1535                         }
1536                         bh = bh->b_this_page;
1537                 } while (bh != head);
1538                 if (uptodate)
1539                         SetPageUptodate(page);
1540                 end_page_writeback(page);
1541         }
1542         return err;
1543
1544 recover:
1545         /*
1546          * ENOSPC, or some other error.  We may already have added some
1547          * blocks to the file, so we need to write these out to avoid
1548          * exposing stale data.
1549          * The page is currently locked and not marked for writeback
1550          */
1551         bh = head;
1552         /* Recovery: lock and submit the mapped buffers */
1553         do {
1554                 if (buffer_mapped(bh) && buffer_dirty(bh)) {
1555                         lock_buffer(bh);
1556                         mark_buffer_async_write(bh);
1557                 } else {
1558                         /*
1559                          * The buffer may have been set dirty during
1560                          * attachment to a dirty page.
1561                          */
1562                         clear_buffer_dirty(bh);
1563                 }
1564         } while ((bh = bh->b_this_page) != head);
1565         SetPageError(page);
1566         BUG_ON(PageWriteback(page));
1567         set_page_writeback(page);
1568         do {
1569                 struct buffer_head *next = bh->b_this_page;
1570                 if (buffer_async_write(bh)) {
1571                         clear_buffer_dirty(bh);
1572                         submit_bh(WRITE, bh);
1573                         nr_underway++;
1574                 }
1575                 bh = next;
1576         } while (bh != head);
1577         unlock_page(page);
1578         goto done;
1579 }
1580
1581 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1582 {
1583         struct inode * const inode = page->mapping->host;
1584         loff_t i_size = i_size_read(inode);
1585         const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
1586         unsigned offset;
1587         void *kaddr;
1588
1589         /* Is the page fully inside i_size? */
1590         if (page->index < end_index)
1591                 return __btrfs_write_full_page(inode, page, wbc);
1592
1593         /* Is the page fully outside i_size? (truncate in progress) */
1594         offset = i_size & (PAGE_CACHE_SIZE-1);
1595         if (page->index >= end_index+1 || !offset) {
1596                 /*
1597                  * The page may have dirty, unmapped buffers.  For example,
1598                  * they may have been added in ext3_writepage().  Make them
1599                  * freeable here, so the page does not leak.
1600                  */
1601                 block_invalidatepage(page, 0);
1602                 unlock_page(page);
1603                 return 0; /* don't care */
1604         }
1605
1606         /*
1607          * The page straddles i_size.  It must be zeroed out on each and every
1608          * writepage invokation because it may be mmapped.  "A file is mapped
1609          * in multiples of the page size.  For a file that is not a multiple of
1610          * the  page size, the remaining memory is zeroed when mapped, and
1611          * writes to that region are not written out to the file."
1612          */
1613         kaddr = kmap_atomic(page, KM_USER0);
1614         memset(kaddr + offset, 0, PAGE_CACHE_SIZE - offset);
1615         flush_dcache_page(page);
1616         kunmap_atomic(kaddr, KM_USER0);
1617         return __btrfs_write_full_page(inode, page, wbc);
1618 }
1619
1620 static void btrfs_truncate(struct inode *inode)
1621 {
1622         struct btrfs_root *root = BTRFS_I(inode)->root;
1623         int ret;
1624         struct btrfs_trans_handle *trans;
1625
1626         if (!S_ISREG(inode->i_mode))
1627                 return;
1628         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1629                 return;
1630
1631         btrfs_truncate_page(inode->i_mapping, inode->i_size);
1632
1633         mutex_lock(&root->fs_info->fs_mutex);
1634         trans = btrfs_start_transaction(root, 1);
1635         btrfs_set_trans_block_group(trans, inode);
1636
1637         /* FIXME, add redo link to tree so we don't leak on crash */
1638         ret = btrfs_truncate_in_trans(trans, root, inode);
1639         BUG_ON(ret);
1640         btrfs_update_inode(trans, root, inode);
1641         ret = btrfs_end_transaction(trans, root);
1642         BUG_ON(ret);
1643         mutex_unlock(&root->fs_info->fs_mutex);
1644         btrfs_btree_balance_dirty(root);
1645 }
1646
1647 int btrfs_commit_write(struct file *file, struct page *page,
1648                        unsigned from, unsigned to)
1649 {
1650         struct inode *inode = page->mapping->host;
1651         struct buffer_head *bh;
1652         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
1653
1654         SetPageUptodate(page);
1655         bh = page_buffers(page);
1656         set_buffer_uptodate(bh);
1657         if (buffer_mapped(bh) && bh->b_blocknr != 0) {
1658                 set_page_dirty(page);
1659         }
1660         if (pos > inode->i_size) {
1661                 i_size_write(inode, pos);
1662                 mark_inode_dirty(inode);
1663         }
1664         return 0;
1665 }
1666
1667 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
1668 {
1669         struct btrfs_trans_handle *trans;
1670         struct btrfs_key key;
1671         struct btrfs_root_item root_item;
1672         struct btrfs_inode_item *inode_item;
1673         struct buffer_head *subvol;
1674         struct btrfs_leaf *leaf;
1675         struct btrfs_root *new_root;
1676         struct inode *inode;
1677         struct inode *dir;
1678         int ret;
1679         u64 objectid;
1680         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
1681
1682         mutex_lock(&root->fs_info->fs_mutex);
1683         trans = btrfs_start_transaction(root, 1);
1684         BUG_ON(!trans);
1685
1686         subvol = btrfs_alloc_free_block(trans, root, 0);
1687         if (subvol == NULL)
1688                 return -ENOSPC;
1689         leaf = btrfs_buffer_leaf(subvol);
1690         btrfs_set_header_nritems(&leaf->header, 0);
1691         btrfs_set_header_level(&leaf->header, 0);
1692         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
1693         btrfs_set_header_generation(&leaf->header, trans->transid);
1694         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
1695         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
1696                sizeof(leaf->header.fsid));
1697         mark_buffer_dirty(subvol);
1698
1699         inode_item = &root_item.inode;
1700         memset(inode_item, 0, sizeof(*inode_item));
1701         btrfs_set_inode_generation(inode_item, 1);
1702         btrfs_set_inode_size(inode_item, 3);
1703         btrfs_set_inode_nlink(inode_item, 1);
1704         btrfs_set_inode_nblocks(inode_item, 1);
1705         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
1706
1707         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
1708         btrfs_set_root_refs(&root_item, 1);
1709         brelse(subvol);
1710         subvol = NULL;
1711
1712         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
1713                                        0, &objectid);
1714         BUG_ON(ret);
1715
1716         btrfs_set_root_dirid(&root_item, new_dirid);
1717
1718         key.objectid = objectid;
1719         key.offset = 1;
1720         key.flags = 0;
1721         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
1722         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
1723                                 &root_item);
1724         BUG_ON(ret);
1725
1726         /*
1727          * insert the directory item
1728          */
1729         key.offset = (u64)-1;
1730         dir = root->fs_info->sb->s_root->d_inode;
1731         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
1732                                     name, namelen, dir->i_ino, &key,
1733                                     BTRFS_FT_DIR);
1734         BUG_ON(ret);
1735
1736         ret = btrfs_commit_transaction(trans, root);
1737         BUG_ON(ret);
1738
1739         new_root = btrfs_read_fs_root(root->fs_info, &key);
1740         BUG_ON(!new_root);
1741
1742         trans = btrfs_start_transaction(new_root, 1);
1743         BUG_ON(!trans);
1744
1745         inode = btrfs_new_inode(trans, new_root, new_dirid,
1746                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
1747         inode->i_op = &btrfs_dir_inode_operations;
1748         inode->i_fop = &btrfs_dir_file_operations;
1749         new_root->inode = inode;
1750
1751         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
1752         BUG_ON(ret);
1753
1754         inode->i_nlink = 1;
1755         inode->i_size = 6;
1756         ret = btrfs_update_inode(trans, new_root, inode);
1757         BUG_ON(ret);
1758
1759         ret = btrfs_commit_transaction(trans, new_root);
1760         BUG_ON(ret);
1761
1762         mutex_unlock(&root->fs_info->fs_mutex);
1763         btrfs_btree_balance_dirty(root);
1764         return 0;
1765 }
1766
1767 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
1768 {
1769         struct btrfs_trans_handle *trans;
1770         struct btrfs_key key;
1771         struct btrfs_root_item new_root_item;
1772         int ret;
1773         u64 objectid;
1774
1775         if (!root->ref_cows)
1776                 return -EINVAL;
1777
1778         mutex_lock(&root->fs_info->fs_mutex);
1779         trans = btrfs_start_transaction(root, 1);
1780         BUG_ON(!trans);
1781
1782         ret = btrfs_update_inode(trans, root, root->inode);
1783         BUG_ON(ret);
1784
1785         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
1786                                        0, &objectid);
1787         BUG_ON(ret);
1788
1789         memcpy(&new_root_item, &root->root_item,
1790                sizeof(new_root_item));
1791
1792         key.objectid = objectid;
1793         key.offset = 1;
1794         key.flags = 0;
1795         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
1796         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
1797
1798         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
1799                                 &new_root_item);
1800         BUG_ON(ret);
1801
1802         /*
1803          * insert the directory item
1804          */
1805         key.offset = (u64)-1;
1806         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
1807                                     name, namelen,
1808                                     root->fs_info->sb->s_root->d_inode->i_ino,
1809                                     &key, BTRFS_FT_DIR);
1810
1811         BUG_ON(ret);
1812
1813         ret = btrfs_inc_root_ref(trans, root);
1814         BUG_ON(ret);
1815
1816         ret = btrfs_commit_transaction(trans, root);
1817         BUG_ON(ret);
1818         mutex_unlock(&root->fs_info->fs_mutex);
1819         btrfs_btree_balance_dirty(root);
1820         return 0;
1821 }
1822
1823 int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
1824                 cmd, unsigned long arg)
1825 {
1826         struct btrfs_root *root = BTRFS_I(inode)->root;
1827         struct btrfs_ioctl_vol_args vol_args;
1828         int ret = 0;
1829         struct btrfs_dir_item *di;
1830         int namelen;
1831         struct btrfs_path *path;
1832         u64 root_dirid;
1833
1834         switch (cmd) {
1835         case BTRFS_IOC_SNAP_CREATE:
1836                 if (copy_from_user(&vol_args,
1837                                    (struct btrfs_ioctl_vol_args __user *)arg,
1838                                    sizeof(vol_args)))
1839                         return -EFAULT;
1840                 namelen = strlen(vol_args.name);
1841                 if (namelen > BTRFS_VOL_NAME_MAX)
1842                         return -EINVAL;
1843                 if (strchr(vol_args.name, '/'))
1844                         return -EINVAL;
1845                 path = btrfs_alloc_path();
1846                 if (!path)
1847                         return -ENOMEM;
1848                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
1849                 mutex_lock(&root->fs_info->fs_mutex);
1850                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
1851                                     path, root_dirid,
1852                                     vol_args.name, namelen, 0);
1853                 mutex_unlock(&root->fs_info->fs_mutex);
1854                 btrfs_free_path(path);
1855                 if (di && !IS_ERR(di))
1856                         return -EEXIST;
1857
1858                 if (root == root->fs_info->tree_root)
1859                         ret = create_subvol(root, vol_args.name, namelen);
1860                 else
1861                         ret = create_snapshot(root, vol_args.name, namelen);
1862                 WARN_ON(ret);
1863                 break;
1864         default:
1865                 return -ENOTTY;
1866         }
1867         return ret;
1868 }
1869
1870 #ifdef CONFIG_COMPAT
1871 long btrfs_compat_ioctl(struct file *file, unsigned int cmd,
1872                                unsigned long arg)
1873 {
1874         struct inode *inode = file->f_path.dentry->d_inode;
1875         int ret;
1876         lock_kernel();
1877         ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
1878         unlock_kernel();
1879         return ret;
1880
1881 }
1882 #endif
1883
1884 /*
1885  * Called inside transaction, so use GFP_NOFS
1886  */
1887 struct inode *btrfs_alloc_inode(struct super_block *sb)
1888 {
1889         struct btrfs_inode *ei;
1890
1891         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
1892         if (!ei)
1893                 return NULL;
1894         return &ei->vfs_inode;
1895 }
1896
1897 void btrfs_destroy_inode(struct inode *inode)
1898 {
1899         WARN_ON(!list_empty(&inode->i_dentry));
1900         WARN_ON(inode->i_data.nrpages);
1901
1902         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
1903 }
1904
1905 static void init_once(void * foo, struct kmem_cache * cachep,
1906                       unsigned long flags)
1907 {
1908         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
1909
1910         inode_init_once(&ei->vfs_inode);
1911 }
1912
1913 void btrfs_destroy_cachep(void)
1914 {
1915         if (btrfs_inode_cachep)
1916                 kmem_cache_destroy(btrfs_inode_cachep);
1917         if (btrfs_trans_handle_cachep)
1918                 kmem_cache_destroy(btrfs_trans_handle_cachep);
1919         if (btrfs_transaction_cachep)
1920                 kmem_cache_destroy(btrfs_transaction_cachep);
1921         if (btrfs_bit_radix_cachep)
1922                 kmem_cache_destroy(btrfs_bit_radix_cachep);
1923         if (btrfs_path_cachep)
1924                 kmem_cache_destroy(btrfs_path_cachep);
1925 }
1926
1927 int btrfs_init_cachep(void)
1928 {
1929         btrfs_inode_cachep = kmem_cache_create("btrfs_inode_cache",
1930                                              sizeof(struct btrfs_inode),
1931                                              0, (SLAB_RECLAIM_ACCOUNT|
1932                                                 SLAB_MEM_SPREAD),
1933                                              init_once, NULL);
1934         if (!btrfs_inode_cachep)
1935                 goto fail;
1936         btrfs_trans_handle_cachep = kmem_cache_create("btrfs_trans_handle_cache",
1937                                              sizeof(struct btrfs_trans_handle),
1938                                              0, (SLAB_RECLAIM_ACCOUNT|
1939                                                 SLAB_MEM_SPREAD),
1940                                              NULL, NULL);
1941         if (!btrfs_trans_handle_cachep)
1942                 goto fail;
1943         btrfs_transaction_cachep = kmem_cache_create("btrfs_transaction_cache",
1944                                              sizeof(struct btrfs_transaction),
1945                                              0, (SLAB_RECLAIM_ACCOUNT|
1946                                                 SLAB_MEM_SPREAD),
1947                                              NULL, NULL);
1948         if (!btrfs_transaction_cachep)
1949                 goto fail;
1950         btrfs_path_cachep = kmem_cache_create("btrfs_path_cache",
1951                                              sizeof(struct btrfs_transaction),
1952                                              0, (SLAB_RECLAIM_ACCOUNT|
1953                                                 SLAB_MEM_SPREAD),
1954                                              NULL, NULL);
1955         if (!btrfs_path_cachep)
1956                 goto fail;
1957         btrfs_bit_radix_cachep = kmem_cache_create("btrfs_radix",
1958                                              256,
1959                                              0, (SLAB_RECLAIM_ACCOUNT|
1960                                                 SLAB_MEM_SPREAD |
1961                                                 SLAB_DESTROY_BY_RCU),
1962                                              NULL, NULL);
1963         if (!btrfs_bit_radix_cachep)
1964                 goto fail;
1965         return 0;
1966 fail:
1967         btrfs_destroy_cachep();
1968         return -ENOMEM;
1969 }
1970
1971 static int btrfs_getattr(struct vfsmount *mnt,
1972                          struct dentry *dentry, struct kstat *stat)
1973 {
1974         struct inode *inode = dentry->d_inode;
1975         generic_fillattr(inode, stat);
1976         stat->blksize = 256 * 1024;
1977         return 0;
1978 }
1979
1980 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
1981                            struct inode * new_dir,struct dentry *new_dentry)
1982 {
1983         struct btrfs_trans_handle *trans;
1984         struct btrfs_root *root = BTRFS_I(old_dir)->root;
1985         struct inode *new_inode = new_dentry->d_inode;
1986         struct inode *old_inode = old_dentry->d_inode;
1987         struct timespec ctime = CURRENT_TIME;
1988         struct btrfs_path *path;
1989         struct btrfs_dir_item *di;
1990         int ret;
1991
1992         if (S_ISDIR(old_inode->i_mode) && new_inode &&
1993             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
1994                 return -ENOTEMPTY;
1995         }
1996         mutex_lock(&root->fs_info->fs_mutex);
1997         trans = btrfs_start_transaction(root, 1);
1998         btrfs_set_trans_block_group(trans, new_dir);
1999         path = btrfs_alloc_path();
2000         if (!path) {
2001                 ret = -ENOMEM;
2002                 goto out_fail;
2003         }
2004
2005         old_dentry->d_inode->i_nlink++;
2006         old_dir->i_ctime = old_dir->i_mtime = ctime;
2007         new_dir->i_ctime = new_dir->i_mtime = ctime;
2008         old_inode->i_ctime = ctime;
2009         if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2010                 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2011                 u64 old_parent_oid;
2012                 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2013                                            "..", 2, -1);
2014                 if (IS_ERR(di)) {
2015                         ret = PTR_ERR(di);
2016                         goto out_fail;
2017                 }
2018                 if (!di) {
2019                         ret = -ENOENT;
2020                         goto out_fail;
2021                 }
2022                 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2023                 ret = btrfs_del_item(trans, root, path);
2024                 if (ret) {
2025                         ret = -EIO;
2026                         goto out_fail;
2027                 }
2028                 btrfs_release_path(root, path);
2029
2030                 di = btrfs_lookup_dir_index_item(trans, root, path,
2031                                                  old_inode->i_ino,
2032                                                  old_parent_oid,
2033                                                  "..", 2, -1);
2034                 if (IS_ERR(di)) {
2035                         ret = PTR_ERR(di);
2036                         goto out_fail;
2037                 }
2038                 if (!di) {
2039                         ret = -ENOENT;
2040                         goto out_fail;
2041                 }
2042                 ret = btrfs_del_item(trans, root, path);
2043                 if (ret) {
2044                         ret = -EIO;
2045                         goto out_fail;
2046                 }
2047                 btrfs_release_path(root, path);
2048
2049                 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2050                                             old_inode->i_ino, location,
2051                                             BTRFS_FT_DIR);
2052                 if (ret)
2053                         goto out_fail;
2054         }
2055
2056
2057         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2058         if (ret)
2059                 goto out_fail;
2060
2061         if (new_inode) {
2062                 new_inode->i_ctime = CURRENT_TIME;
2063                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2064                 if (ret)
2065                         goto out_fail;
2066                 if (S_ISDIR(new_inode->i_mode))
2067                         clear_nlink(new_inode);
2068                 else
2069                         drop_nlink(new_inode);
2070                 btrfs_update_inode(trans, root, new_inode);
2071         }
2072         ret = btrfs_add_link(trans, new_dentry, old_inode);
2073         if (ret)
2074                 goto out_fail;
2075
2076 out_fail:
2077         btrfs_free_path(path);
2078         btrfs_end_transaction(trans, root);
2079         mutex_unlock(&root->fs_info->fs_mutex);
2080         return ret;
2081 }
2082
2083 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2084                          const char *symname)
2085 {
2086         struct btrfs_trans_handle *trans;
2087         struct btrfs_root *root = BTRFS_I(dir)->root;
2088         struct btrfs_path *path;
2089         struct btrfs_key key;
2090         struct inode *inode;
2091         int err;
2092         int drop_inode = 0;
2093         u64 objectid;
2094         int name_len;
2095         int datasize;
2096         char *ptr;
2097         struct btrfs_file_extent_item *ei;
2098
2099         name_len = strlen(symname) + 1;
2100         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2101                 return -ENAMETOOLONG;
2102         mutex_lock(&root->fs_info->fs_mutex);
2103         trans = btrfs_start_transaction(root, 1);
2104         btrfs_set_trans_block_group(trans, dir);
2105
2106         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2107         if (err) {
2108                 err = -ENOSPC;
2109                 goto out_unlock;
2110         }
2111
2112         inode = btrfs_new_inode(trans, root, objectid,
2113                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2114         err = PTR_ERR(inode);
2115         if (IS_ERR(inode))
2116                 goto out_unlock;
2117
2118         btrfs_set_trans_block_group(trans, inode);
2119         err = btrfs_add_nondir(trans, dentry, inode);
2120         if (err)
2121                 drop_inode = 1;
2122         else {
2123                 inode->i_mapping->a_ops = &btrfs_aops;
2124                 inode->i_fop = &btrfs_file_operations;
2125                 inode->i_op = &btrfs_file_inode_operations;
2126         }
2127         dir->i_sb->s_dirt = 1;
2128         btrfs_update_inode_block_group(trans, inode);
2129         btrfs_update_inode_block_group(trans, dir);
2130         if (drop_inode)
2131                 goto out_unlock;
2132
2133         path = btrfs_alloc_path();
2134         BUG_ON(!path);
2135         key.objectid = inode->i_ino;
2136         key.offset = 0;
2137         key.flags = 0;
2138         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2139         datasize = btrfs_file_extent_calc_inline_size(name_len);
2140         err = btrfs_insert_empty_item(trans, root, path, &key,
2141                                       datasize);
2142         BUG_ON(err);
2143         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2144                path->slots[0], struct btrfs_file_extent_item);
2145         btrfs_set_file_extent_generation(ei, trans->transid);
2146         btrfs_set_file_extent_type(ei,
2147                                    BTRFS_FILE_EXTENT_INLINE);
2148         ptr = btrfs_file_extent_inline_start(ei);
2149         btrfs_memcpy(root, path->nodes[0]->b_data,
2150                      ptr, symname, name_len);
2151         mark_buffer_dirty(path->nodes[0]);
2152         btrfs_free_path(path);
2153         inode->i_op = &btrfs_symlink_inode_operations;
2154         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2155         inode->i_size = name_len - 1;
2156         btrfs_update_inode(trans, root, inode);
2157         err = 0;
2158
2159 out_unlock:
2160         btrfs_end_transaction(trans, root);
2161         mutex_unlock(&root->fs_info->fs_mutex);
2162
2163         if (drop_inode) {
2164                 inode_dec_link_count(inode);
2165                 iput(inode);
2166         }
2167         btrfs_btree_balance_dirty(root);
2168         return err;
2169 }
2170
2171 static struct inode_operations btrfs_dir_inode_operations = {
2172         .lookup         = btrfs_lookup,
2173         .create         = btrfs_create,
2174         .unlink         = btrfs_unlink,
2175         .link           = btrfs_link,
2176         .mkdir          = btrfs_mkdir,
2177         .rmdir          = btrfs_rmdir,
2178         .rename         = btrfs_rename,
2179         .symlink        = btrfs_symlink,
2180         .setattr        = btrfs_setattr,
2181 };
2182
2183 static struct inode_operations btrfs_dir_ro_inode_operations = {
2184         .lookup         = btrfs_lookup,
2185 };
2186
2187 static struct file_operations btrfs_dir_file_operations = {
2188         .llseek         = generic_file_llseek,
2189         .read           = generic_read_dir,
2190         .readdir        = btrfs_readdir,
2191         .ioctl          = btrfs_ioctl,
2192 #ifdef CONFIG_COMPAT
2193         .compat_ioctl   = btrfs_compat_ioctl,
2194 #endif
2195 };
2196
2197 static struct address_space_operations btrfs_aops = {
2198         .readpage       = btrfs_readpage,
2199         .writepage      = btrfs_writepage,
2200         .sync_page      = block_sync_page,
2201         .prepare_write  = btrfs_prepare_write,
2202         .commit_write   = btrfs_commit_write,
2203         .bmap           = btrfs_bmap,
2204 };
2205
2206 static struct address_space_operations btrfs_symlink_aops = {
2207         .readpage       = btrfs_readpage,
2208         .writepage      = btrfs_writepage,
2209 };
2210
2211 static struct inode_operations btrfs_file_inode_operations = {
2212         .truncate       = btrfs_truncate,
2213         .getattr        = btrfs_getattr,
2214         .setattr        = btrfs_setattr,
2215 };
2216
2217 static struct inode_operations btrfs_symlink_inode_operations = {
2218         .readlink       = generic_readlink,
2219         .follow_link    = page_follow_link_light,
2220         .put_link       = page_put_link,
2221 };