]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/btrfs/inode.c
Btrfs: Extent based page cache code. This uses an rbtree of extents and tests
[mv-sheeva.git] / fs / btrfs / inode.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/buffer_head.h>
20 #include <linux/fs.h>
21 #include <linux/pagemap.h>
22 #include <linux/highmem.h>
23 #include <linux/time.h>
24 #include <linux/init.h>
25 #include <linux/string.h>
26 #include <linux/smp_lock.h>
27 #include <linux/backing-dev.h>
28 #include <linux/mpage.h>
29 #include <linux/swap.h>
30 #include <linux/writeback.h>
31 #include <linux/statfs.h>
32 #include <linux/compat.h>
33 #include <linux/bit_spinlock.h>
34 #include <linux/version.h>
35 #include "ctree.h"
36 #include "disk-io.h"
37 #include "transaction.h"
38 #include "btrfs_inode.h"
39 #include "ioctl.h"
40 #include "print-tree.h"
41
42 struct btrfs_iget_args {
43         u64 ino;
44         struct btrfs_root *root;
45 };
46
47 static struct inode_operations btrfs_dir_inode_operations;
48 static struct inode_operations btrfs_symlink_inode_operations;
49 static struct inode_operations btrfs_dir_ro_inode_operations;
50 static struct inode_operations btrfs_special_inode_operations;
51 static struct inode_operations btrfs_file_inode_operations;
52 static struct address_space_operations btrfs_aops;
53 static struct address_space_operations btrfs_symlink_aops;
54 static struct file_operations btrfs_dir_file_operations;
55
56 static struct kmem_cache *btrfs_inode_cachep;
57 struct kmem_cache *btrfs_trans_handle_cachep;
58 struct kmem_cache *btrfs_transaction_cachep;
59 struct kmem_cache *btrfs_bit_radix_cachep;
60 struct kmem_cache *btrfs_path_cachep;
61
62 #define S_SHIFT 12
63 static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
64         [S_IFREG >> S_SHIFT]    = BTRFS_FT_REG_FILE,
65         [S_IFDIR >> S_SHIFT]    = BTRFS_FT_DIR,
66         [S_IFCHR >> S_SHIFT]    = BTRFS_FT_CHRDEV,
67         [S_IFBLK >> S_SHIFT]    = BTRFS_FT_BLKDEV,
68         [S_IFIFO >> S_SHIFT]    = BTRFS_FT_FIFO,
69         [S_IFSOCK >> S_SHIFT]   = BTRFS_FT_SOCK,
70         [S_IFLNK >> S_SHIFT]    = BTRFS_FT_SYMLINK,
71 };
72
73 void btrfs_read_locked_inode(struct inode *inode)
74 {
75         struct btrfs_path *path;
76         struct btrfs_inode_item *inode_item;
77         struct btrfs_root *root = BTRFS_I(inode)->root;
78         struct btrfs_key location;
79         u64 alloc_group_block;
80         u32 rdev;
81         int ret;
82
83         path = btrfs_alloc_path();
84         BUG_ON(!path);
85         mutex_lock(&root->fs_info->fs_mutex);
86
87         memcpy(&location, &BTRFS_I(inode)->location, sizeof(location));
88         ret = btrfs_lookup_inode(NULL, root, path, &location, 0);
89         if (ret) {
90                 btrfs_free_path(path);
91                 goto make_bad;
92         }
93         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
94                                   path->slots[0],
95                                   struct btrfs_inode_item);
96
97         inode->i_mode = btrfs_inode_mode(inode_item);
98         inode->i_nlink = btrfs_inode_nlink(inode_item);
99         inode->i_uid = btrfs_inode_uid(inode_item);
100         inode->i_gid = btrfs_inode_gid(inode_item);
101         inode->i_size = btrfs_inode_size(inode_item);
102         inode->i_atime.tv_sec = btrfs_timespec_sec(&inode_item->atime);
103         inode->i_atime.tv_nsec = btrfs_timespec_nsec(&inode_item->atime);
104         inode->i_mtime.tv_sec = btrfs_timespec_sec(&inode_item->mtime);
105         inode->i_mtime.tv_nsec = btrfs_timespec_nsec(&inode_item->mtime);
106         inode->i_ctime.tv_sec = btrfs_timespec_sec(&inode_item->ctime);
107         inode->i_ctime.tv_nsec = btrfs_timespec_nsec(&inode_item->ctime);
108         inode->i_blocks = btrfs_inode_nblocks(inode_item);
109         inode->i_generation = btrfs_inode_generation(inode_item);
110         inode->i_rdev = 0;
111         rdev = btrfs_inode_rdev(inode_item);
112         alloc_group_block = btrfs_inode_block_group(inode_item);
113         BTRFS_I(inode)->block_group = btrfs_lookup_block_group(root->fs_info,
114                                                        alloc_group_block);
115
116         btrfs_free_path(path);
117         inode_item = NULL;
118
119         mutex_unlock(&root->fs_info->fs_mutex);
120
121         switch (inode->i_mode & S_IFMT) {
122         case S_IFREG:
123                 inode->i_mapping->a_ops = &btrfs_aops;
124                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
125                                      inode->i_mapping, GFP_NOFS);
126                 inode->i_fop = &btrfs_file_operations;
127                 inode->i_op = &btrfs_file_inode_operations;
128                 break;
129         case S_IFDIR:
130                 inode->i_fop = &btrfs_dir_file_operations;
131                 if (root == root->fs_info->tree_root)
132                         inode->i_op = &btrfs_dir_ro_inode_operations;
133                 else
134                         inode->i_op = &btrfs_dir_inode_operations;
135                 break;
136         case S_IFLNK:
137                 inode->i_op = &btrfs_symlink_inode_operations;
138                 inode->i_mapping->a_ops = &btrfs_symlink_aops;
139                 break;
140         default:
141                 init_special_inode(inode, inode->i_mode, rdev);
142                 break;
143         }
144         return;
145
146 make_bad:
147         btrfs_release_path(root, path);
148         btrfs_free_path(path);
149         mutex_unlock(&root->fs_info->fs_mutex);
150         make_bad_inode(inode);
151 }
152
153 static void fill_inode_item(struct btrfs_inode_item *item,
154                             struct inode *inode)
155 {
156         btrfs_set_inode_uid(item, inode->i_uid);
157         btrfs_set_inode_gid(item, inode->i_gid);
158         btrfs_set_inode_size(item, inode->i_size);
159         btrfs_set_inode_mode(item, inode->i_mode);
160         btrfs_set_inode_nlink(item, inode->i_nlink);
161         btrfs_set_timespec_sec(&item->atime, inode->i_atime.tv_sec);
162         btrfs_set_timespec_nsec(&item->atime, inode->i_atime.tv_nsec);
163         btrfs_set_timespec_sec(&item->mtime, inode->i_mtime.tv_sec);
164         btrfs_set_timespec_nsec(&item->mtime, inode->i_mtime.tv_nsec);
165         btrfs_set_timespec_sec(&item->ctime, inode->i_ctime.tv_sec);
166         btrfs_set_timespec_nsec(&item->ctime, inode->i_ctime.tv_nsec);
167         btrfs_set_inode_nblocks(item, inode->i_blocks);
168         btrfs_set_inode_generation(item, inode->i_generation);
169         btrfs_set_inode_rdev(item, inode->i_rdev);
170         btrfs_set_inode_block_group(item,
171                                     BTRFS_I(inode)->block_group->key.objectid);
172 }
173
174 int btrfs_update_inode(struct btrfs_trans_handle *trans,
175                               struct btrfs_root *root,
176                               struct inode *inode)
177 {
178         struct btrfs_inode_item *inode_item;
179         struct btrfs_path *path;
180         int ret;
181
182         path = btrfs_alloc_path();
183         BUG_ON(!path);
184         ret = btrfs_lookup_inode(trans, root, path,
185                                  &BTRFS_I(inode)->location, 1);
186         if (ret) {
187                 if (ret > 0)
188                         ret = -ENOENT;
189                 goto failed;
190         }
191
192         inode_item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
193                                   path->slots[0],
194                                   struct btrfs_inode_item);
195
196         fill_inode_item(inode_item, inode);
197         btrfs_mark_buffer_dirty(path->nodes[0]);
198         btrfs_set_inode_last_trans(trans, inode);
199         ret = 0;
200 failed:
201         btrfs_release_path(root, path);
202         btrfs_free_path(path);
203         return ret;
204 }
205
206
207 static int btrfs_unlink_trans(struct btrfs_trans_handle *trans,
208                               struct btrfs_root *root,
209                               struct inode *dir,
210                               struct dentry *dentry)
211 {
212         struct btrfs_path *path;
213         const char *name = dentry->d_name.name;
214         int name_len = dentry->d_name.len;
215         int ret = 0;
216         u64 objectid;
217         struct btrfs_dir_item *di;
218
219         path = btrfs_alloc_path();
220         if (!path) {
221                 ret = -ENOMEM;
222                 goto err;
223         }
224
225         di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino,
226                                     name, name_len, -1);
227         if (IS_ERR(di)) {
228                 ret = PTR_ERR(di);
229                 goto err;
230         }
231         if (!di) {
232                 ret = -ENOENT;
233                 goto err;
234         }
235         objectid = btrfs_disk_key_objectid(&di->location);
236         ret = btrfs_delete_one_dir_name(trans, root, path, di);
237         if (ret)
238                 goto err;
239         btrfs_release_path(root, path);
240
241         di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino,
242                                          objectid, name, name_len, -1);
243         if (IS_ERR(di)) {
244                 ret = PTR_ERR(di);
245                 goto err;
246         }
247         if (!di) {
248                 ret = -ENOENT;
249                 goto err;
250         }
251         ret = btrfs_delete_one_dir_name(trans, root, path, di);
252
253         dentry->d_inode->i_ctime = dir->i_ctime;
254 err:
255         btrfs_free_path(path);
256         if (!ret) {
257                 dir->i_size -= name_len * 2;
258                 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
259                 btrfs_update_inode(trans, root, dir);
260                 drop_nlink(dentry->d_inode);
261                 ret = btrfs_update_inode(trans, root, dentry->d_inode);
262                 dir->i_sb->s_dirt = 1;
263         }
264         return ret;
265 }
266
267 static int btrfs_unlink(struct inode *dir, struct dentry *dentry)
268 {
269         struct btrfs_root *root;
270         struct btrfs_trans_handle *trans;
271         int ret;
272
273         root = BTRFS_I(dir)->root;
274         mutex_lock(&root->fs_info->fs_mutex);
275         trans = btrfs_start_transaction(root, 1);
276         btrfs_set_trans_block_group(trans, dir);
277         ret = btrfs_unlink_trans(trans, root, dir, dentry);
278         btrfs_end_transaction(trans, root);
279         mutex_unlock(&root->fs_info->fs_mutex);
280         btrfs_btree_balance_dirty(root);
281         return ret;
282 }
283
284 static int btrfs_rmdir(struct inode *dir, struct dentry *dentry)
285 {
286         struct inode *inode = dentry->d_inode;
287         int err;
288         int ret;
289         struct btrfs_root *root = BTRFS_I(dir)->root;
290         struct btrfs_path *path;
291         struct btrfs_key key;
292         struct btrfs_trans_handle *trans;
293         struct btrfs_key found_key;
294         int found_type;
295         struct btrfs_leaf *leaf;
296         char *goodnames = "..";
297
298         path = btrfs_alloc_path();
299         BUG_ON(!path);
300         mutex_lock(&root->fs_info->fs_mutex);
301         trans = btrfs_start_transaction(root, 1);
302         btrfs_set_trans_block_group(trans, dir);
303         key.objectid = inode->i_ino;
304         key.offset = (u64)-1;
305         key.flags = (u32)-1;
306         while(1) {
307                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
308                 if (ret < 0) {
309                         err = ret;
310                         goto out;
311                 }
312                 BUG_ON(ret == 0);
313                 if (path->slots[0] == 0) {
314                         err = -ENOENT;
315                         goto out;
316                 }
317                 path->slots[0]--;
318                 leaf = btrfs_buffer_leaf(path->nodes[0]);
319                 btrfs_disk_key_to_cpu(&found_key,
320                                       &leaf->items[path->slots[0]].key);
321                 found_type = btrfs_key_type(&found_key);
322                 if (found_key.objectid != inode->i_ino) {
323                         err = -ENOENT;
324                         goto out;
325                 }
326                 if ((found_type != BTRFS_DIR_ITEM_KEY &&
327                      found_type != BTRFS_DIR_INDEX_KEY) ||
328                     (!btrfs_match_dir_item_name(root, path, goodnames, 2) &&
329                     !btrfs_match_dir_item_name(root, path, goodnames, 1))) {
330                         err = -ENOTEMPTY;
331                         goto out;
332                 }
333                 ret = btrfs_del_item(trans, root, path);
334                 BUG_ON(ret);
335
336                 if (found_type == BTRFS_DIR_ITEM_KEY && found_key.offset == 1)
337                         break;
338                 btrfs_release_path(root, path);
339         }
340         ret = 0;
341         btrfs_release_path(root, path);
342
343         /* now the directory is empty */
344         err = btrfs_unlink_trans(trans, root, dir, dentry);
345         if (!err) {
346                 inode->i_size = 0;
347         }
348 out:
349         btrfs_release_path(root, path);
350         btrfs_free_path(path);
351         mutex_unlock(&root->fs_info->fs_mutex);
352         ret = btrfs_end_transaction(trans, root);
353         btrfs_btree_balance_dirty(root);
354         if (ret && !err)
355                 err = ret;
356         return err;
357 }
358
359 static int btrfs_free_inode(struct btrfs_trans_handle *trans,
360                             struct btrfs_root *root,
361                             struct inode *inode)
362 {
363         struct btrfs_path *path;
364         int ret;
365
366         clear_inode(inode);
367
368         path = btrfs_alloc_path();
369         BUG_ON(!path);
370         ret = btrfs_lookup_inode(trans, root, path,
371                                  &BTRFS_I(inode)->location, -1);
372         if (ret > 0)
373                 ret = -ENOENT;
374         if (!ret)
375                 ret = btrfs_del_item(trans, root, path);
376         btrfs_free_path(path);
377         return ret;
378 }
379
380 /*
381  * this can truncate away extent items, csum items and directory items.
382  * It starts at a high offset and removes keys until it can't find
383  * any higher than i_size.
384  *
385  * csum items that cross the new i_size are truncated to the new size
386  * as well.
387  */
388 static int btrfs_truncate_in_trans(struct btrfs_trans_handle *trans,
389                                    struct btrfs_root *root,
390                                    struct inode *inode)
391 {
392         int ret;
393         struct btrfs_path *path;
394         struct btrfs_key key;
395         struct btrfs_disk_key *found_key;
396         u32 found_type;
397         struct btrfs_leaf *leaf;
398         struct btrfs_file_extent_item *fi;
399         u64 extent_start = 0;
400         u64 extent_num_blocks = 0;
401         u64 item_end = 0;
402         int found_extent;
403         int del_item;
404
405         btrfs_drop_extent_cache(inode, inode->i_size, (u64)-1);
406         path = btrfs_alloc_path();
407         path->reada = -1;
408         BUG_ON(!path);
409         /* FIXME, add redo link to tree so we don't leak on crash */
410         key.objectid = inode->i_ino;
411         key.offset = (u64)-1;
412         key.flags = (u32)-1;
413         while(1) {
414                 btrfs_init_path(path);
415                 fi = NULL;
416                 ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
417                 if (ret < 0) {
418                         goto error;
419                 }
420                 if (ret > 0) {
421                         BUG_ON(path->slots[0] == 0);
422                         path->slots[0]--;
423                 }
424                 leaf = btrfs_buffer_leaf(path->nodes[0]);
425                 found_key = &leaf->items[path->slots[0]].key;
426                 found_type = btrfs_disk_key_type(found_key);
427
428                 if (btrfs_disk_key_objectid(found_key) != inode->i_ino)
429                         break;
430                 if (found_type != BTRFS_CSUM_ITEM_KEY &&
431                     found_type != BTRFS_DIR_ITEM_KEY &&
432                     found_type != BTRFS_DIR_INDEX_KEY &&
433                     found_type != BTRFS_EXTENT_DATA_KEY)
434                         break;
435
436                 item_end = btrfs_disk_key_offset(found_key);
437                 if (found_type == BTRFS_EXTENT_DATA_KEY) {
438                         fi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
439                                             path->slots[0],
440                                             struct btrfs_file_extent_item);
441                         if (btrfs_file_extent_type(fi) !=
442                             BTRFS_FILE_EXTENT_INLINE) {
443                                 item_end += btrfs_file_extent_num_blocks(fi) <<
444                                                 inode->i_blkbits;
445                         }
446                 }
447                 if (found_type == BTRFS_CSUM_ITEM_KEY) {
448                         ret = btrfs_csum_truncate(trans, root, path,
449                                                   inode->i_size);
450                         BUG_ON(ret);
451                 }
452                 if (item_end < inode->i_size) {
453                         if (found_type) {
454                                 btrfs_set_key_type(&key, found_type - 1);
455                                 continue;
456                         }
457                         break;
458                 }
459                 if (btrfs_disk_key_offset(found_key) >= inode->i_size)
460                         del_item = 1;
461                 else
462                         del_item = 0;
463                 found_extent = 0;
464
465                 /* FIXME, shrink the extent if the ref count is only 1 */
466                 if (found_type == BTRFS_EXTENT_DATA_KEY &&
467                            btrfs_file_extent_type(fi) !=
468                            BTRFS_FILE_EXTENT_INLINE) {
469                         u64 num_dec;
470                         if (!del_item) {
471                                 u64 orig_num_blocks =
472                                         btrfs_file_extent_num_blocks(fi);
473                                 extent_num_blocks = inode->i_size -
474                                         btrfs_disk_key_offset(found_key) +
475                                         root->blocksize - 1;
476                                 extent_num_blocks >>= inode->i_blkbits;
477                                 btrfs_set_file_extent_num_blocks(fi,
478                                                          extent_num_blocks);
479                                 inode->i_blocks -= (orig_num_blocks -
480                                         extent_num_blocks) << 3;
481                                 btrfs_mark_buffer_dirty(path->nodes[0]);
482                         } else {
483                                 extent_start =
484                                         btrfs_file_extent_disk_blocknr(fi);
485                                 extent_num_blocks =
486                                         btrfs_file_extent_disk_num_blocks(fi);
487                                 /* FIXME blocksize != 4096 */
488                                 num_dec = btrfs_file_extent_num_blocks(fi) << 3;
489                                 if (extent_start != 0) {
490                                         found_extent = 1;
491                                         inode->i_blocks -= num_dec;
492                                 }
493                         }
494                 }
495                 if (del_item) {
496                         ret = btrfs_del_item(trans, root, path);
497                         if (ret)
498                                 goto error;
499                 } else {
500                         break;
501                 }
502                 btrfs_release_path(root, path);
503                 if (found_extent) {
504                         ret = btrfs_free_extent(trans, root, extent_start,
505                                                 extent_num_blocks, 0);
506                         BUG_ON(ret);
507                 }
508         }
509         ret = 0;
510 error:
511         btrfs_release_path(root, path);
512         btrfs_free_path(path);
513         inode->i_sb->s_dirt = 1;
514         return ret;
515 }
516
517 static int btrfs_cow_one_page(struct btrfs_trans_handle *trans,
518                               struct inode *inode, struct page *page,
519                               size_t zero_start)
520 {
521         char *kaddr;
522         int ret = 0;
523         struct btrfs_root *root = BTRFS_I(inode)->root;
524         u64 alloc_hint = 0;
525         u64 page_start = page->index << PAGE_CACHE_SHIFT;
526         struct btrfs_key ins;
527
528         if (!PagePrivate(page)) {
529                 SetPagePrivate(page);
530                 set_page_private(page, 1);
531                 page_cache_get(page);
532         }
533
534         btrfs_set_trans_block_group(trans, inode);
535
536         ret = btrfs_drop_extents(trans, root, inode,
537                                  page_start, page_start + PAGE_CACHE_SIZE,
538                                  &alloc_hint);
539         if (ret)
540                 goto out;
541         ret = btrfs_alloc_extent(trans, root, inode->i_ino, 1, 0,
542                                  alloc_hint, (u64)-1, &ins, 1);
543         if (ret)
544                 goto out;
545         ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
546                                        page_start, ins.objectid, 1, 1);
547         if (ret)
548                 goto out;
549         SetPageChecked(page);
550         kaddr = kmap(page);
551         if (zero_start != PAGE_CACHE_SIZE) {
552                 memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
553                 flush_dcache_page(page);
554         }
555         kunmap(page);
556
557 out:
558         return ret;
559 }
560
561 /*
562  * taken from block_truncate_page, but does cow as it zeros out
563  * any bytes left in the last page in the file.
564  */
565 static int btrfs_truncate_page(struct address_space *mapping, loff_t from)
566 {
567         struct inode *inode = mapping->host;
568         struct btrfs_root *root = BTRFS_I(inode)->root;
569         struct btrfs_trans_handle *trans;
570         unsigned blocksize = 1 << inode->i_blkbits;
571         pgoff_t index = from >> PAGE_CACHE_SHIFT;
572         unsigned offset = from & (PAGE_CACHE_SIZE-1);
573         struct page *page;
574         int ret = 0;
575         u64 page_start;
576
577         if ((offset & (blocksize - 1)) == 0)
578                 goto out;
579
580         ret = -ENOMEM;
581         page = grab_cache_page(mapping, index);
582         if (!page)
583                 goto out;
584         if (!PageUptodate(page)) {
585                 ret = btrfs_readpage(NULL, page);
586                 lock_page(page);
587                 if (!PageUptodate(page)) {
588                         ret = -EIO;
589                         goto out;
590                 }
591         }
592         page_start = page->index << PAGE_CACHE_SHIFT;
593
594         mutex_lock(&root->fs_info->fs_mutex);
595         trans = btrfs_start_transaction(root, 1);
596         ret = btrfs_cow_one_page(trans, inode, page, offset);
597         if (!ret) {
598                 char *kaddr = kmap(page);
599                 ret = btrfs_csum_file_block(trans, root, inode->i_ino,
600                                             page_start, kaddr, PAGE_CACHE_SIZE);
601                 kunmap(page);
602         }
603         set_extent_dirty(&BTRFS_I(inode)->extent_tree,
604                          page_start, page_start + PAGE_CACHE_SIZE - 1,
605                          GFP_NOFS);
606         set_page_dirty(page);
607         btrfs_end_transaction(trans, root);
608         mutex_unlock(&root->fs_info->fs_mutex);
609
610         unlock_page(page);
611         page_cache_release(page);
612 out:
613         return ret;
614 }
615
616 static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
617 {
618         struct inode *inode = dentry->d_inode;
619         int err;
620
621         err = inode_change_ok(inode, attr);
622         if (err)
623                 return err;
624
625         if (S_ISREG(inode->i_mode) &&
626             attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) {
627                 struct btrfs_trans_handle *trans;
628                 struct btrfs_root *root = BTRFS_I(inode)->root;
629                 u64 mask = root->blocksize - 1;
630                 u64 pos = (inode->i_size + mask) & ~mask;
631                 u64 hole_size;
632
633                 if (attr->ia_size <= pos)
634                         goto out;
635
636                 btrfs_truncate_page(inode->i_mapping, inode->i_size);
637
638                 hole_size = (attr->ia_size - pos + mask) & ~mask;
639                 hole_size >>= inode->i_blkbits;
640
641                 mutex_lock(&root->fs_info->fs_mutex);
642                 trans = btrfs_start_transaction(root, 1);
643                 btrfs_set_trans_block_group(trans, inode);
644                 err = btrfs_insert_file_extent(trans, root, inode->i_ino,
645                                                pos, 0, 0, hole_size);
646                 btrfs_end_transaction(trans, root);
647                 mutex_unlock(&root->fs_info->fs_mutex);
648                 if (err)
649                         return err;
650         }
651 out:
652         err = inode_setattr(inode, attr);
653
654         return err;
655 }
656 void btrfs_delete_inode(struct inode *inode)
657 {
658         struct btrfs_trans_handle *trans;
659         struct btrfs_root *root = BTRFS_I(inode)->root;
660         int ret;
661
662         truncate_inode_pages(&inode->i_data, 0);
663         if (is_bad_inode(inode)) {
664                 goto no_delete;
665         }
666         inode->i_size = 0;
667         mutex_lock(&root->fs_info->fs_mutex);
668         trans = btrfs_start_transaction(root, 1);
669         btrfs_set_trans_block_group(trans, inode);
670         ret = btrfs_truncate_in_trans(trans, root, inode);
671         if (ret)
672                 goto no_delete_lock;
673         ret = btrfs_free_inode(trans, root, inode);
674         if (ret)
675                 goto no_delete_lock;
676         btrfs_end_transaction(trans, root);
677         mutex_unlock(&root->fs_info->fs_mutex);
678         btrfs_btree_balance_dirty(root);
679         return;
680
681 no_delete_lock:
682         btrfs_end_transaction(trans, root);
683         mutex_unlock(&root->fs_info->fs_mutex);
684         btrfs_btree_balance_dirty(root);
685 no_delete:
686         clear_inode(inode);
687 }
688
689 /*
690  * this returns the key found in the dir entry in the location pointer.
691  * If no dir entries were found, location->objectid is 0.
692  */
693 static int btrfs_inode_by_name(struct inode *dir, struct dentry *dentry,
694                                struct btrfs_key *location)
695 {
696         const char *name = dentry->d_name.name;
697         int namelen = dentry->d_name.len;
698         struct btrfs_dir_item *di;
699         struct btrfs_path *path;
700         struct btrfs_root *root = BTRFS_I(dir)->root;
701         int ret;
702
703         path = btrfs_alloc_path();
704         BUG_ON(!path);
705         di = btrfs_lookup_dir_item(NULL, root, path, dir->i_ino, name,
706                                     namelen, 0);
707         if (!di || IS_ERR(di)) {
708                 location->objectid = 0;
709                 ret = 0;
710                 goto out;
711         }
712         btrfs_disk_key_to_cpu(location, &di->location);
713 out:
714         btrfs_release_path(root, path);
715         btrfs_free_path(path);
716         return ret;
717 }
718
719 /*
720  * when we hit a tree root in a directory, the btrfs part of the inode
721  * needs to be changed to reflect the root directory of the tree root.  This
722  * is kind of like crossing a mount point.
723  */
724 static int fixup_tree_root_location(struct btrfs_root *root,
725                              struct btrfs_key *location,
726                              struct btrfs_root **sub_root)
727 {
728         struct btrfs_path *path;
729         struct btrfs_root_item *ri;
730
731         if (btrfs_key_type(location) != BTRFS_ROOT_ITEM_KEY)
732                 return 0;
733         if (location->objectid == BTRFS_ROOT_TREE_OBJECTID)
734                 return 0;
735
736         path = btrfs_alloc_path();
737         BUG_ON(!path);
738         mutex_lock(&root->fs_info->fs_mutex);
739
740         *sub_root = btrfs_read_fs_root(root->fs_info, location);
741         if (IS_ERR(*sub_root))
742                 return PTR_ERR(*sub_root);
743
744         ri = &(*sub_root)->root_item;
745         location->objectid = btrfs_root_dirid(ri);
746         location->flags = 0;
747         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
748         location->offset = 0;
749
750         btrfs_free_path(path);
751         mutex_unlock(&root->fs_info->fs_mutex);
752         return 0;
753 }
754
755 static int btrfs_init_locked_inode(struct inode *inode, void *p)
756 {
757         struct btrfs_iget_args *args = p;
758         inode->i_ino = args->ino;
759         BTRFS_I(inode)->root = args->root;
760         return 0;
761 }
762
763 static int btrfs_find_actor(struct inode *inode, void *opaque)
764 {
765         struct btrfs_iget_args *args = opaque;
766         return (args->ino == inode->i_ino &&
767                 args->root == BTRFS_I(inode)->root);
768 }
769
770 struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
771                                 struct btrfs_root *root)
772 {
773         struct inode *inode;
774         struct btrfs_iget_args args;
775         args.ino = objectid;
776         args.root = root;
777
778         inode = iget5_locked(s, objectid, btrfs_find_actor,
779                              btrfs_init_locked_inode,
780                              (void *)&args);
781         return inode;
782 }
783
784 static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
785                                    struct nameidata *nd)
786 {
787         struct inode * inode;
788         struct btrfs_inode *bi = BTRFS_I(dir);
789         struct btrfs_root *root = bi->root;
790         struct btrfs_root *sub_root = root;
791         struct btrfs_key location;
792         int ret;
793
794         if (dentry->d_name.len > BTRFS_NAME_LEN)
795                 return ERR_PTR(-ENAMETOOLONG);
796         mutex_lock(&root->fs_info->fs_mutex);
797         ret = btrfs_inode_by_name(dir, dentry, &location);
798         mutex_unlock(&root->fs_info->fs_mutex);
799         if (ret < 0)
800                 return ERR_PTR(ret);
801         inode = NULL;
802         if (location.objectid) {
803                 ret = fixup_tree_root_location(root, &location, &sub_root);
804                 if (ret < 0)
805                         return ERR_PTR(ret);
806                 if (ret > 0)
807                         return ERR_PTR(-ENOENT);
808                 inode = btrfs_iget_locked(dir->i_sb, location.objectid,
809                                           sub_root);
810                 if (!inode)
811                         return ERR_PTR(-EACCES);
812                 if (inode->i_state & I_NEW) {
813                         /* the inode and parent dir are two different roots */
814                         if (sub_root != root) {
815                                 igrab(inode);
816                                 sub_root->inode = inode;
817                         }
818                         BTRFS_I(inode)->root = sub_root;
819                         memcpy(&BTRFS_I(inode)->location, &location,
820                                sizeof(location));
821                         btrfs_read_locked_inode(inode);
822                         unlock_new_inode(inode);
823                 }
824         }
825         return d_splice_alias(inode, dentry);
826 }
827
828 static unsigned char btrfs_filetype_table[] = {
829         DT_UNKNOWN, DT_REG, DT_DIR, DT_CHR, DT_BLK, DT_FIFO, DT_SOCK, DT_LNK
830 };
831
832 static int btrfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
833 {
834         struct inode *inode = filp->f_path.dentry->d_inode;
835         struct btrfs_root *root = BTRFS_I(inode)->root;
836         struct btrfs_item *item;
837         struct btrfs_dir_item *di;
838         struct btrfs_key key;
839         struct btrfs_path *path;
840         int ret;
841         u32 nritems;
842         struct btrfs_leaf *leaf;
843         int slot;
844         int advance;
845         unsigned char d_type;
846         int over = 0;
847         u32 di_cur;
848         u32 di_total;
849         u32 di_len;
850         int key_type = BTRFS_DIR_INDEX_KEY;
851
852         /* FIXME, use a real flag for deciding about the key type */
853         if (root->fs_info->tree_root == root)
854                 key_type = BTRFS_DIR_ITEM_KEY;
855         mutex_lock(&root->fs_info->fs_mutex);
856         key.objectid = inode->i_ino;
857         key.flags = 0;
858         btrfs_set_key_type(&key, key_type);
859         key.offset = filp->f_pos;
860         path = btrfs_alloc_path();
861         path->reada = 2;
862         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
863         if (ret < 0)
864                 goto err;
865         advance = 0;
866         while(1) {
867                 leaf = btrfs_buffer_leaf(path->nodes[0]);
868                 nritems = btrfs_header_nritems(&leaf->header);
869                 slot = path->slots[0];
870                 if (advance || slot >= nritems) {
871                         if (slot >= nritems -1) {
872                                 ret = btrfs_next_leaf(root, path);
873                                 if (ret)
874                                         break;
875                                 leaf = btrfs_buffer_leaf(path->nodes[0]);
876                                 nritems = btrfs_header_nritems(&leaf->header);
877                                 slot = path->slots[0];
878                         } else {
879                                 slot++;
880                                 path->slots[0]++;
881                         }
882                 }
883                 advance = 1;
884                 item = leaf->items + slot;
885                 if (btrfs_disk_key_objectid(&item->key) != key.objectid)
886                         break;
887                 if (btrfs_disk_key_type(&item->key) != key_type)
888                         break;
889                 if (btrfs_disk_key_offset(&item->key) < filp->f_pos)
890                         continue;
891                 filp->f_pos = btrfs_disk_key_offset(&item->key);
892                 advance = 1;
893                 di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
894                 di_cur = 0;
895                 di_total = btrfs_item_size(leaf->items + slot);
896                 while(di_cur < di_total) {
897                         d_type = btrfs_filetype_table[btrfs_dir_type(di)];
898                         over = filldir(dirent, (const char *)(di + 1),
899                                        btrfs_dir_name_len(di),
900                                        btrfs_disk_key_offset(&item->key),
901                                        btrfs_disk_key_objectid(&di->location),
902                                        d_type);
903                         if (over)
904                                 goto nopos;
905                         di_len = btrfs_dir_name_len(di) + sizeof(*di);
906                         di_cur += di_len;
907                         di = (struct btrfs_dir_item *)((char *)di + di_len);
908                 }
909         }
910         filp->f_pos++;
911 nopos:
912         ret = 0;
913 err:
914         btrfs_release_path(root, path);
915         btrfs_free_path(path);
916         mutex_unlock(&root->fs_info->fs_mutex);
917         return ret;
918 }
919
920 int btrfs_write_inode(struct inode *inode, int wait)
921 {
922         struct btrfs_root *root = BTRFS_I(inode)->root;
923         struct btrfs_trans_handle *trans;
924         int ret = 0;
925
926         if (wait) {
927                 mutex_lock(&root->fs_info->fs_mutex);
928                 trans = btrfs_start_transaction(root, 1);
929                 btrfs_set_trans_block_group(trans, inode);
930                 ret = btrfs_commit_transaction(trans, root);
931                 mutex_unlock(&root->fs_info->fs_mutex);
932         }
933         return ret;
934 }
935
936 /*
937  * This is somewhat expensive, updating the tree every time the
938  * inode changes.  But, it is most likely to find the inode in cache.
939  * FIXME, needs more benchmarking...there are no reasons other than performance
940  * to keep or drop this code.
941  */
942 void btrfs_dirty_inode(struct inode *inode)
943 {
944         struct btrfs_root *root = BTRFS_I(inode)->root;
945         struct btrfs_trans_handle *trans;
946
947         mutex_lock(&root->fs_info->fs_mutex);
948         trans = btrfs_start_transaction(root, 1);
949         btrfs_set_trans_block_group(trans, inode);
950         btrfs_update_inode(trans, root, inode);
951         btrfs_end_transaction(trans, root);
952         mutex_unlock(&root->fs_info->fs_mutex);
953 }
954
955 static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
956                                      struct btrfs_root *root,
957                                      u64 objectid,
958                                      struct btrfs_block_group_cache *group,
959                                      int mode)
960 {
961         struct inode *inode;
962         struct btrfs_inode_item inode_item;
963         struct btrfs_key *location;
964         int ret;
965         int owner;
966
967         inode = new_inode(root->fs_info->sb);
968         if (!inode)
969                 return ERR_PTR(-ENOMEM);
970
971         BTRFS_I(inode)->root = root;
972         if (mode & S_IFDIR)
973                 owner = 0;
974         else
975                 owner = 1;
976         group = btrfs_find_block_group(root, group, 0, 0, owner);
977         BTRFS_I(inode)->block_group = group;
978
979         inode->i_uid = current->fsuid;
980         inode->i_gid = current->fsgid;
981         inode->i_mode = mode;
982         inode->i_ino = objectid;
983         inode->i_blocks = 0;
984         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
985         fill_inode_item(&inode_item, inode);
986         location = &BTRFS_I(inode)->location;
987         location->objectid = objectid;
988         location->flags = 0;
989         location->offset = 0;
990         btrfs_set_key_type(location, BTRFS_INODE_ITEM_KEY);
991
992         ret = btrfs_insert_inode(trans, root, objectid, &inode_item);
993         if (ret)
994                 return ERR_PTR(ret);
995         insert_inode_hash(inode);
996         return inode;
997 }
998
999 static inline u8 btrfs_inode_type(struct inode *inode)
1000 {
1001         return btrfs_type_by_mode[(inode->i_mode & S_IFMT) >> S_SHIFT];
1002 }
1003
1004 static int btrfs_add_link(struct btrfs_trans_handle *trans,
1005                             struct dentry *dentry, struct inode *inode)
1006 {
1007         int ret;
1008         struct btrfs_key key;
1009         struct btrfs_root *root = BTRFS_I(dentry->d_parent->d_inode)->root;
1010         struct inode *parent_inode;
1011         key.objectid = inode->i_ino;
1012         key.flags = 0;
1013         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1014         key.offset = 0;
1015
1016         ret = btrfs_insert_dir_item(trans, root,
1017                                     dentry->d_name.name, dentry->d_name.len,
1018                                     dentry->d_parent->d_inode->i_ino,
1019                                     &key, btrfs_inode_type(inode));
1020         if (ret == 0) {
1021                 parent_inode = dentry->d_parent->d_inode;
1022                 parent_inode->i_size += dentry->d_name.len * 2;
1023                 parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
1024                 ret = btrfs_update_inode(trans, root,
1025                                          dentry->d_parent->d_inode);
1026         }
1027         return ret;
1028 }
1029
1030 static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
1031                             struct dentry *dentry, struct inode *inode)
1032 {
1033         int err = btrfs_add_link(trans, dentry, inode);
1034         if (!err) {
1035                 d_instantiate(dentry, inode);
1036                 return 0;
1037         }
1038         if (err > 0)
1039                 err = -EEXIST;
1040         return err;
1041 }
1042
1043 static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
1044                         int mode, dev_t rdev)
1045 {
1046         struct btrfs_trans_handle *trans;
1047         struct btrfs_root *root = BTRFS_I(dir)->root;
1048         struct inode *inode;
1049         int err;
1050         int drop_inode = 0;
1051         u64 objectid;
1052
1053         if (!new_valid_dev(rdev))
1054                 return -EINVAL;
1055
1056         mutex_lock(&root->fs_info->fs_mutex);
1057         trans = btrfs_start_transaction(root, 1);
1058         btrfs_set_trans_block_group(trans, dir);
1059
1060         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1061         if (err) {
1062                 err = -ENOSPC;
1063                 goto out_unlock;
1064         }
1065
1066         inode = btrfs_new_inode(trans, root, objectid,
1067                                 BTRFS_I(dir)->block_group, mode);
1068         err = PTR_ERR(inode);
1069         if (IS_ERR(inode))
1070                 goto out_unlock;
1071
1072         btrfs_set_trans_block_group(trans, inode);
1073         err = btrfs_add_nondir(trans, dentry, inode);
1074         if (err)
1075                 drop_inode = 1;
1076         else {
1077                 inode->i_op = &btrfs_special_inode_operations;
1078                 init_special_inode(inode, inode->i_mode, rdev);
1079         }
1080         dir->i_sb->s_dirt = 1;
1081         btrfs_update_inode_block_group(trans, inode);
1082         btrfs_update_inode_block_group(trans, dir);
1083 out_unlock:
1084         btrfs_end_transaction(trans, root);
1085         mutex_unlock(&root->fs_info->fs_mutex);
1086
1087         if (drop_inode) {
1088                 inode_dec_link_count(inode);
1089                 iput(inode);
1090         }
1091         btrfs_btree_balance_dirty(root);
1092         return err;
1093 }
1094
1095 static int btrfs_create(struct inode *dir, struct dentry *dentry,
1096                         int mode, struct nameidata *nd)
1097 {
1098         struct btrfs_trans_handle *trans;
1099         struct btrfs_root *root = BTRFS_I(dir)->root;
1100         struct inode *inode;
1101         int err;
1102         int drop_inode = 0;
1103         u64 objectid;
1104
1105         mutex_lock(&root->fs_info->fs_mutex);
1106         trans = btrfs_start_transaction(root, 1);
1107         btrfs_set_trans_block_group(trans, dir);
1108
1109         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1110         if (err) {
1111                 err = -ENOSPC;
1112                 goto out_unlock;
1113         }
1114
1115         inode = btrfs_new_inode(trans, root, objectid,
1116                                 BTRFS_I(dir)->block_group, mode);
1117         err = PTR_ERR(inode);
1118         if (IS_ERR(inode))
1119                 goto out_unlock;
1120
1121         btrfs_set_trans_block_group(trans, inode);
1122         err = btrfs_add_nondir(trans, dentry, inode);
1123         if (err)
1124                 drop_inode = 1;
1125         else {
1126                 inode->i_mapping->a_ops = &btrfs_aops;
1127                 inode->i_fop = &btrfs_file_operations;
1128                 inode->i_op = &btrfs_file_inode_operations;
1129                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
1130                                      inode->i_mapping, GFP_NOFS);
1131         }
1132         dir->i_sb->s_dirt = 1;
1133         btrfs_update_inode_block_group(trans, inode);
1134         btrfs_update_inode_block_group(trans, dir);
1135 out_unlock:
1136         btrfs_end_transaction(trans, root);
1137         mutex_unlock(&root->fs_info->fs_mutex);
1138
1139         if (drop_inode) {
1140                 inode_dec_link_count(inode);
1141                 iput(inode);
1142         }
1143         btrfs_btree_balance_dirty(root);
1144         return err;
1145 }
1146
1147 static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
1148                       struct dentry *dentry)
1149 {
1150         struct btrfs_trans_handle *trans;
1151         struct btrfs_root *root = BTRFS_I(dir)->root;
1152         struct inode *inode = old_dentry->d_inode;
1153         int err;
1154         int drop_inode = 0;
1155
1156         if (inode->i_nlink == 0)
1157                 return -ENOENT;
1158
1159         inc_nlink(inode);
1160         mutex_lock(&root->fs_info->fs_mutex);
1161         trans = btrfs_start_transaction(root, 1);
1162         btrfs_set_trans_block_group(trans, dir);
1163         atomic_inc(&inode->i_count);
1164         err = btrfs_add_nondir(trans, dentry, inode);
1165         if (err)
1166                 drop_inode = 1;
1167         dir->i_sb->s_dirt = 1;
1168         btrfs_update_inode_block_group(trans, dir);
1169         err = btrfs_update_inode(trans, root, inode);
1170         if (err)
1171                 drop_inode = 1;
1172
1173         btrfs_end_transaction(trans, root);
1174         mutex_unlock(&root->fs_info->fs_mutex);
1175
1176         if (drop_inode) {
1177                 inode_dec_link_count(inode);
1178                 iput(inode);
1179         }
1180         btrfs_btree_balance_dirty(root);
1181         return err;
1182 }
1183
1184 static int btrfs_make_empty_dir(struct btrfs_trans_handle *trans,
1185                                 struct btrfs_root *root,
1186                                 u64 objectid, u64 dirid)
1187 {
1188         int ret;
1189         char buf[2];
1190         struct btrfs_key key;
1191
1192         buf[0] = '.';
1193         buf[1] = '.';
1194
1195         key.objectid = objectid;
1196         key.offset = 0;
1197         key.flags = 0;
1198         btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
1199
1200         ret = btrfs_insert_dir_item(trans, root, buf, 1, objectid,
1201                                     &key, BTRFS_FT_DIR);
1202         if (ret)
1203                 goto error;
1204         key.objectid = dirid;
1205         ret = btrfs_insert_dir_item(trans, root, buf, 2, objectid,
1206                                     &key, BTRFS_FT_DIR);
1207         if (ret)
1208                 goto error;
1209 error:
1210         return ret;
1211 }
1212
1213 static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1214 {
1215         struct inode *inode;
1216         struct btrfs_trans_handle *trans;
1217         struct btrfs_root *root = BTRFS_I(dir)->root;
1218         int err = 0;
1219         int drop_on_err = 0;
1220         u64 objectid;
1221
1222         mutex_lock(&root->fs_info->fs_mutex);
1223         trans = btrfs_start_transaction(root, 1);
1224         btrfs_set_trans_block_group(trans, dir);
1225         if (IS_ERR(trans)) {
1226                 err = PTR_ERR(trans);
1227                 goto out_unlock;
1228         }
1229
1230         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
1231         if (err) {
1232                 err = -ENOSPC;
1233                 goto out_unlock;
1234         }
1235
1236         inode = btrfs_new_inode(trans, root, objectid,
1237                                 BTRFS_I(dir)->block_group, S_IFDIR | mode);
1238         if (IS_ERR(inode)) {
1239                 err = PTR_ERR(inode);
1240                 goto out_fail;
1241         }
1242         drop_on_err = 1;
1243         inode->i_op = &btrfs_dir_inode_operations;
1244         inode->i_fop = &btrfs_dir_file_operations;
1245         btrfs_set_trans_block_group(trans, inode);
1246
1247         err = btrfs_make_empty_dir(trans, root, inode->i_ino, dir->i_ino);
1248         if (err)
1249                 goto out_fail;
1250
1251         inode->i_size = 6;
1252         err = btrfs_update_inode(trans, root, inode);
1253         if (err)
1254                 goto out_fail;
1255         err = btrfs_add_link(trans, dentry, inode);
1256         if (err)
1257                 goto out_fail;
1258         d_instantiate(dentry, inode);
1259         drop_on_err = 0;
1260         dir->i_sb->s_dirt = 1;
1261         btrfs_update_inode_block_group(trans, inode);
1262         btrfs_update_inode_block_group(trans, dir);
1263
1264 out_fail:
1265         btrfs_end_transaction(trans, root);
1266 out_unlock:
1267         mutex_unlock(&root->fs_info->fs_mutex);
1268         if (drop_on_err)
1269                 iput(inode);
1270         btrfs_btree_balance_dirty(root);
1271         return err;
1272 }
1273
1274 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
1275                                     size_t page_offset, u64 start, u64 end,
1276                                     int create)
1277 {
1278         int ret;
1279         int err = 0;
1280         u64 blocknr;
1281         u64 extent_start = 0;
1282         u64 extent_end = 0;
1283         u64 objectid = inode->i_ino;
1284         u32 found_type;
1285         int failed_insert = 0;
1286         struct btrfs_path *path;
1287         struct btrfs_root *root = BTRFS_I(inode)->root;
1288         struct btrfs_file_extent_item *item;
1289         struct btrfs_leaf *leaf;
1290         struct btrfs_disk_key *found_key;
1291         struct extent_map *em = NULL;
1292         struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
1293         struct btrfs_trans_handle *trans = NULL;
1294
1295         path = btrfs_alloc_path();
1296         BUG_ON(!path);
1297         mutex_lock(&root->fs_info->fs_mutex);
1298
1299 again:
1300         em = lookup_extent_mapping(em_tree, start, end);
1301         if (em) {
1302                 goto out;
1303         }
1304         if (!em) {
1305                 em = alloc_extent_map(GFP_NOFS);
1306                 if (!em) {
1307                         err = -ENOMEM;
1308                         goto out;
1309                 }
1310                 em->start = 0;
1311                 em->end = 0;
1312         }
1313         em->bdev = inode->i_sb->s_bdev;
1314         ret = btrfs_lookup_file_extent(NULL, root, path,
1315                                        objectid, start, 0);
1316         if (ret < 0) {
1317                 err = ret;
1318                 goto out;
1319         }
1320
1321         if (ret != 0) {
1322                 if (path->slots[0] == 0)
1323                         goto not_found;
1324                 path->slots[0]--;
1325         }
1326
1327         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1328                               struct btrfs_file_extent_item);
1329         leaf = btrfs_buffer_leaf(path->nodes[0]);
1330         blocknr = btrfs_file_extent_disk_blocknr(item);
1331         blocknr += btrfs_file_extent_offset(item);
1332
1333         /* are we inside the extent that was found? */
1334         found_key = &leaf->items[path->slots[0]].key;
1335         found_type = btrfs_disk_key_type(found_key);
1336         if (btrfs_disk_key_objectid(found_key) != objectid ||
1337             found_type != BTRFS_EXTENT_DATA_KEY) {
1338                 goto not_found;
1339         }
1340
1341         found_type = btrfs_file_extent_type(item);
1342         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1343         if (found_type == BTRFS_FILE_EXTENT_REG) {
1344                 extent_end = extent_start +
1345                        (btrfs_file_extent_num_blocks(item) << inode->i_blkbits);
1346                 err = 0;
1347                 if (start < extent_start || start > extent_end) {
1348                         em->start = start;
1349                         if (start < extent_start) {
1350                                 em->end = extent_end - 1;
1351                         } else {
1352                                 em->end = end;
1353                         }
1354                         goto not_found_em;
1355                 }
1356                 if (btrfs_file_extent_disk_blocknr(item) == 0) {
1357                         em->start = extent_start;
1358                         em->end = extent_end - 1;
1359                         em->block_start = 0;
1360                         em->block_end = 0;
1361                         goto insert;
1362                 }
1363                 em->block_start = blocknr << inode->i_blkbits;
1364                 em->block_end = em->block_start +
1365                         (btrfs_file_extent_num_blocks(item) <<
1366                          inode->i_blkbits) - 1;
1367                 em->start = extent_start;
1368                 em->end = extent_end - 1;
1369                 goto insert;
1370         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1371                 char *ptr;
1372                 char *map;
1373                 u32 size;
1374
1375                 size = btrfs_file_extent_inline_len(leaf->items +
1376                                                     path->slots[0]);
1377                 extent_end = extent_start + size;
1378                 if (start < extent_start || start > extent_end) {
1379                         em->start = start;
1380                         if (start < extent_start) {
1381                                 em->end = extent_end - 1;
1382                         } else {
1383                                 em->end = end;
1384                         }
1385                         goto not_found_em;
1386                 }
1387                 em->block_start = EXTENT_MAP_INLINE;
1388                 em->block_end = EXTENT_MAP_INLINE;
1389                 em->start = extent_start;
1390                 em->end = extent_end - 1;
1391                 if (!page) {
1392                         goto insert;
1393                 }
1394                 ptr = btrfs_file_extent_inline_start(item);
1395                 map = kmap(page);
1396                 memcpy(map + page_offset, ptr, size);
1397                 flush_dcache_page(result->b_page);
1398                 kunmap(page);
1399                 set_extent_uptodate(em_tree, extent_start,
1400                                     extent_end, GFP_NOFS);
1401                 goto insert;
1402         } else {
1403                 printk("unkknown found_type %d\n", found_type);
1404                 WARN_ON(1);
1405         }
1406 not_found:
1407         em->start = start;
1408         em->end = end;
1409 not_found_em:
1410         em->block_start = 0;
1411         em->block_end = 0;
1412 insert:
1413         btrfs_release_path(root, path);
1414         if (em->start > start || em->end < start) {
1415                 printk("bad extent! %Lu %Lu start %Lu end %Lu\n", em->start, em->end, start, end);
1416                 WARN_ON(1);
1417                 err = -EIO;
1418                 goto out;
1419         }
1420         ret = add_extent_mapping(em_tree, em);
1421         if (ret == -EEXIST) {
1422                 free_extent_map(em);
1423                 failed_insert++;
1424                 if (failed_insert > 5) {
1425                         printk("failing to insert %Lu %Lu\n", start, end);
1426                         err = -EIO;
1427                         goto out;
1428                 }
1429                 em = NULL;
1430                 goto again;
1431         }
1432         err = 0;
1433 out:
1434         btrfs_free_path(path);
1435         if (trans) {
1436                 ret = btrfs_end_transaction(trans, root);
1437                 if (!err)
1438                         err = ret;
1439         }
1440         mutex_unlock(&root->fs_info->fs_mutex);
1441         if (err) {
1442                 free_extent_map(em);
1443                 WARN_ON(1);
1444                 return ERR_PTR(err);
1445         }
1446         return em;
1447 }
1448
1449
1450 /*
1451  * FIBMAP and others want to pass in a fake buffer head.  They need to
1452  * use BTRFS_GET_BLOCK_NO_DIRECT to make sure we don't try to memcpy
1453  * any packed file data into the fake bh
1454  */
1455 #define BTRFS_GET_BLOCK_NO_CREATE 0
1456 #define BTRFS_GET_BLOCK_CREATE 1
1457 #define BTRFS_GET_BLOCK_NO_DIRECT 2
1458
1459 /*
1460  * FIXME create==1 doe not work.
1461  */
1462 static int btrfs_get_block_lock(struct inode *inode, sector_t iblock,
1463                                 struct buffer_head *result, int create)
1464 {
1465         int ret;
1466         int err = 0;
1467         u64 blocknr;
1468         u64 extent_start = 0;
1469         u64 extent_end = 0;
1470         u64 objectid = inode->i_ino;
1471         u32 found_type;
1472         u64 alloc_hint = 0;
1473         struct btrfs_path *path;
1474         struct btrfs_root *root = BTRFS_I(inode)->root;
1475         struct btrfs_file_extent_item *item;
1476         struct btrfs_leaf *leaf;
1477         struct btrfs_disk_key *found_key;
1478         struct btrfs_trans_handle *trans = NULL;
1479
1480         path = btrfs_alloc_path();
1481         BUG_ON(!path);
1482         if (create & BTRFS_GET_BLOCK_CREATE) {
1483                 /*
1484                  * danger!, this only works if the page is properly up
1485                  * to date somehow
1486                  */
1487                 trans = btrfs_start_transaction(root, 1);
1488                 if (!trans) {
1489                         err = -ENOMEM;
1490                         goto out;
1491                 }
1492                 ret = btrfs_drop_extents(trans, root, inode,
1493                                          iblock << inode->i_blkbits,
1494                                          (iblock + 1) << inode->i_blkbits,
1495                                          &alloc_hint);
1496                 BUG_ON(ret);
1497         }
1498
1499         ret = btrfs_lookup_file_extent(NULL, root, path,
1500                                        objectid,
1501                                        iblock << inode->i_blkbits, 0);
1502         if (ret < 0) {
1503                 err = ret;
1504                 goto out;
1505         }
1506
1507         if (ret != 0) {
1508                 if (path->slots[0] == 0) {
1509                         btrfs_release_path(root, path);
1510                         goto not_found;
1511                 }
1512                 path->slots[0]--;
1513         }
1514
1515         item = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
1516                               struct btrfs_file_extent_item);
1517         leaf = btrfs_buffer_leaf(path->nodes[0]);
1518         blocknr = btrfs_file_extent_disk_blocknr(item);
1519         blocknr += btrfs_file_extent_offset(item);
1520
1521         /* are we inside the extent that was found? */
1522         found_key = &leaf->items[path->slots[0]].key;
1523         found_type = btrfs_disk_key_type(found_key);
1524         if (btrfs_disk_key_objectid(found_key) != objectid ||
1525             found_type != BTRFS_EXTENT_DATA_KEY) {
1526                 extent_end = 0;
1527                 extent_start = 0;
1528                 goto not_found;
1529         }
1530         found_type = btrfs_file_extent_type(item);
1531         extent_start = btrfs_disk_key_offset(&leaf->items[path->slots[0]].key);
1532         if (found_type == BTRFS_FILE_EXTENT_REG) {
1533                 extent_start = extent_start >> inode->i_blkbits;
1534                 extent_end = extent_start + btrfs_file_extent_num_blocks(item);
1535                 err = 0;
1536                 if (btrfs_file_extent_disk_blocknr(item) == 0)
1537                         goto out;
1538                 if (iblock >= extent_start && iblock < extent_end) {
1539                         btrfs_map_bh_to_logical(root, result, blocknr +
1540                                                 iblock - extent_start);
1541                         goto out;
1542                 }
1543         } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
1544                 char *ptr;
1545                 char *map;
1546                 u32 size;
1547
1548                 if (create & BTRFS_GET_BLOCK_NO_DIRECT) {
1549                         err = -EINVAL;
1550                         goto out;
1551                 }
1552                 size = btrfs_file_extent_inline_len(leaf->items +
1553                                                     path->slots[0]);
1554                 extent_end = (extent_start + size) >> inode->i_blkbits;
1555                 extent_start >>= inode->i_blkbits;
1556                 if (iblock < extent_start || iblock > extent_end) {
1557                         goto not_found;
1558                 }
1559                 ptr = btrfs_file_extent_inline_start(item);
1560                 map = kmap(result->b_page);
1561                 memcpy(map, ptr, size);
1562                 memset(map + size, 0, PAGE_CACHE_SIZE - size);
1563                 flush_dcache_page(result->b_page);
1564                 kunmap(result->b_page);
1565                 set_buffer_uptodate(result);
1566                 SetPageChecked(result->b_page);
1567                 btrfs_map_bh_to_logical(root, result, 0);
1568         }
1569 not_found:
1570         if (create & BTRFS_GET_BLOCK_CREATE) {
1571                 struct btrfs_key ins;
1572                 ret = btrfs_alloc_extent(trans, root, inode->i_ino,
1573                                          1, 0, alloc_hint, (u64)-1,
1574                                          &ins, 1);
1575                 if (ret) {
1576                         err = ret;
1577                         goto out;
1578                 }
1579                 ret = btrfs_insert_file_extent(trans, root, inode->i_ino,
1580                                                iblock << inode->i_blkbits,
1581                                                ins.objectid, ins.offset,
1582                                                ins.offset);
1583                 if (ret) {
1584                         err = ret;
1585                         goto out;
1586                 }
1587                 btrfs_map_bh_to_logical(root, result, ins.objectid);
1588         }
1589 out:
1590         if (trans) {
1591                 ret = btrfs_end_transaction(trans, root);
1592                 if (!err)
1593                         err = ret;
1594         }
1595         btrfs_free_path(path);
1596         return err;
1597 }
1598
1599 int btrfs_get_block(struct inode *inode, sector_t iblock,
1600                     struct buffer_head *result, int create)
1601 {
1602         int err;
1603         struct btrfs_root *root = BTRFS_I(inode)->root;
1604         mutex_lock(&root->fs_info->fs_mutex);
1605         err = btrfs_get_block_lock(inode, iblock, result, create);
1606         mutex_unlock(&root->fs_info->fs_mutex);
1607         return err;
1608 }
1609
1610 static int btrfs_get_block_bmap(struct inode *inode, sector_t iblock,
1611                            struct buffer_head *result, int create)
1612 {
1613         struct btrfs_root *root = BTRFS_I(inode)->root;
1614         u64 start = iblock << inode->i_blkbits;
1615         u64 end = start + root->blocksize -1;
1616         struct extent_map *em;
1617
1618         em = btrfs_get_extent(inode, NULL, 0, start, end, 0);
1619         if (em && !IS_ERR(em) && em->block_start != EXTENT_MAP_INLINE &&
1620             em->block_start != 0) {
1621                 u64 offset;
1622                 offset = start - em->start;
1623                 start = (em->block_start + offset) >> inode->i_blkbits;
1624                 btrfs_map_bh_to_logical(root, result, start);
1625         }
1626         return 0;
1627 }
1628
1629 static sector_t btrfs_bmap(struct address_space *as, sector_t block)
1630 {
1631         return generic_block_bmap(as, block, btrfs_get_block_bmap);
1632 }
1633
1634 static int btrfs_prepare_write(struct file *file, struct page *page,
1635                                unsigned from, unsigned to)
1636 {
1637         return extent_prepare_write(&BTRFS_I(page->mapping->host)->extent_tree,
1638                                     page->mapping->host, page, from, to,
1639                                     btrfs_get_extent);
1640 }
1641
1642 int btrfs_readpage(struct file *file, struct page *page)
1643 {
1644         struct extent_map_tree *tree;
1645         tree = &BTRFS_I(page->mapping->host)->extent_tree;
1646         return extent_read_full_page(tree, page, btrfs_get_extent);
1647 }
1648 static int btrfs_writepage(struct page *page, struct writeback_control *wbc)
1649 {
1650         struct extent_map_tree *tree;
1651         tree = &BTRFS_I(page->mapping->host)->extent_tree;
1652         return extent_write_full_page(tree, page, btrfs_get_extent, wbc);
1653 }
1654
1655 static int btrfs_releasepage(struct page *page, gfp_t unused_gfp_flags)
1656 {
1657         struct extent_map_tree *tree;
1658         int ret;
1659
1660         if (page->private != 1) {
1661                 WARN_ON(1);
1662                 return try_to_free_buffers(page);
1663         }
1664         tree = &BTRFS_I(page->mapping->host)->extent_tree;
1665         ret = try_release_extent_mapping(tree, page);
1666         if (ret == 1) {
1667                 ClearPagePrivate(page);
1668                 set_page_private(page, 0);
1669                 page_cache_release(page);
1670         }
1671         return ret;
1672 }
1673
1674 static void btrfs_invalidatepage(struct page *page, unsigned long offset)
1675 {
1676         struct extent_map_tree *tree;
1677
1678         tree = &BTRFS_I(page->mapping->host)->extent_tree;
1679         extent_invalidatepage(tree, page, offset);
1680         btrfs_releasepage(page, GFP_NOFS);
1681 }
1682
1683 /*
1684  * btrfs_page_mkwrite() is not allowed to change the file size as it gets
1685  * called from a page fault handler when a page is first dirtied. Hence we must
1686  * be careful to check for EOF conditions here. We set the page up correctly
1687  * for a written page which means we get ENOSPC checking when writing into
1688  * holes and correct delalloc and unwritten extent mapping on filesystems that
1689  * support these features.
1690  *
1691  * We are not allowed to take the i_mutex here so we have to play games to
1692  * protect against truncate races as the page could now be beyond EOF.  Because
1693  * vmtruncate() writes the inode size before removing pages, once we have the
1694  * page lock we can determine safely if the page is beyond EOF. If it is not
1695  * beyond EOF, then the page is guaranteed safe against truncation until we
1696  * unlock the page.
1697  */
1698 int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page)
1699 {
1700         struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
1701         struct btrfs_root *root = BTRFS_I(inode)->root;
1702         struct btrfs_trans_handle *trans;
1703         unsigned long end;
1704         loff_t size;
1705         int ret = -EINVAL;
1706         u64 page_start;
1707
1708         lock_page(page);
1709         wait_on_page_writeback(page);
1710         size = i_size_read(inode);
1711         page_start = page->index << PAGE_CACHE_SHIFT;
1712
1713         if ((page->mapping != inode->i_mapping) ||
1714             (page_start > size)) {
1715                 /* page got truncated out from underneath us */
1716                 goto out_unlock;
1717         }
1718
1719         /* page is wholly or partially inside EOF */
1720         if (page_start + PAGE_CACHE_SIZE > size)
1721                 end = size & ~PAGE_CACHE_MASK;
1722         else
1723                 end = PAGE_CACHE_SIZE;
1724
1725         mutex_lock(&root->fs_info->fs_mutex);
1726         trans = btrfs_start_transaction(root, 1);
1727         ret = btrfs_cow_one_page(trans, inode, page, end);
1728         btrfs_end_transaction(trans, root);
1729         mutex_unlock(&root->fs_info->fs_mutex);
1730         set_extent_dirty(&BTRFS_I(inode)->extent_tree,
1731                          page_start, page_start + PAGE_CACHE_SIZE - 1,
1732                          GFP_NOFS);
1733         set_page_dirty(page);
1734
1735 out_unlock:
1736         unlock_page(page);
1737         return ret;
1738 }
1739
1740 static void btrfs_truncate(struct inode *inode)
1741 {
1742         struct btrfs_root *root = BTRFS_I(inode)->root;
1743         int ret;
1744         struct btrfs_trans_handle *trans;
1745
1746         if (!S_ISREG(inode->i_mode))
1747                 return;
1748         if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1749                 return;
1750
1751         btrfs_truncate_page(inode->i_mapping, inode->i_size);
1752
1753         mutex_lock(&root->fs_info->fs_mutex);
1754         trans = btrfs_start_transaction(root, 1);
1755         btrfs_set_trans_block_group(trans, inode);
1756
1757         /* FIXME, add redo link to tree so we don't leak on crash */
1758         ret = btrfs_truncate_in_trans(trans, root, inode);
1759         btrfs_update_inode(trans, root, inode);
1760         ret = btrfs_end_transaction(trans, root);
1761         BUG_ON(ret);
1762         mutex_unlock(&root->fs_info->fs_mutex);
1763         btrfs_btree_balance_dirty(root);
1764 }
1765
1766 int btrfs_commit_write(struct file *file, struct page *page,
1767                        unsigned from, unsigned to)
1768 {
1769         return extent_commit_write(&BTRFS_I(page->mapping->host)->extent_tree,
1770                                    page->mapping->host, page, from, to);
1771 }
1772
1773 static int create_subvol(struct btrfs_root *root, char *name, int namelen)
1774 {
1775         struct btrfs_trans_handle *trans;
1776         struct btrfs_key key;
1777         struct btrfs_root_item root_item;
1778         struct btrfs_inode_item *inode_item;
1779         struct buffer_head *subvol;
1780         struct btrfs_leaf *leaf;
1781         struct btrfs_root *new_root;
1782         struct inode *inode;
1783         struct inode *dir;
1784         int ret;
1785         int err;
1786         u64 objectid;
1787         u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
1788
1789         mutex_lock(&root->fs_info->fs_mutex);
1790         trans = btrfs_start_transaction(root, 1);
1791         BUG_ON(!trans);
1792
1793         subvol = btrfs_alloc_free_block(trans, root, 0, 0);
1794         if (IS_ERR(subvol))
1795                 return PTR_ERR(subvol);
1796         leaf = btrfs_buffer_leaf(subvol);
1797         btrfs_set_header_nritems(&leaf->header, 0);
1798         btrfs_set_header_level(&leaf->header, 0);
1799         btrfs_set_header_blocknr(&leaf->header, bh_blocknr(subvol));
1800         btrfs_set_header_generation(&leaf->header, trans->transid);
1801         btrfs_set_header_owner(&leaf->header, root->root_key.objectid);
1802         memcpy(leaf->header.fsid, root->fs_info->disk_super->fsid,
1803                sizeof(leaf->header.fsid));
1804         btrfs_mark_buffer_dirty(subvol);
1805
1806         inode_item = &root_item.inode;
1807         memset(inode_item, 0, sizeof(*inode_item));
1808         btrfs_set_inode_generation(inode_item, 1);
1809         btrfs_set_inode_size(inode_item, 3);
1810         btrfs_set_inode_nlink(inode_item, 1);
1811         btrfs_set_inode_nblocks(inode_item, 1);
1812         btrfs_set_inode_mode(inode_item, S_IFDIR | 0755);
1813
1814         btrfs_set_root_blocknr(&root_item, bh_blocknr(subvol));
1815         btrfs_set_root_refs(&root_item, 1);
1816         memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
1817         root_item.drop_level = 0;
1818         brelse(subvol);
1819         subvol = NULL;
1820
1821         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
1822                                        0, &objectid);
1823         if (ret)
1824                 goto fail;
1825
1826         btrfs_set_root_dirid(&root_item, new_dirid);
1827
1828         key.objectid = objectid;
1829         key.offset = 1;
1830         key.flags = 0;
1831         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
1832         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
1833                                 &root_item);
1834         if (ret)
1835                 goto fail;
1836
1837         /*
1838          * insert the directory item
1839          */
1840         key.offset = (u64)-1;
1841         dir = root->fs_info->sb->s_root->d_inode;
1842         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
1843                                     name, namelen, dir->i_ino, &key,
1844                                     BTRFS_FT_DIR);
1845         if (ret)
1846                 goto fail;
1847
1848         ret = btrfs_commit_transaction(trans, root);
1849         if (ret)
1850                 goto fail_commit;
1851
1852         new_root = btrfs_read_fs_root(root->fs_info, &key);
1853         BUG_ON(!new_root);
1854
1855         trans = btrfs_start_transaction(new_root, 1);
1856         BUG_ON(!trans);
1857
1858         inode = btrfs_new_inode(trans, new_root, new_dirid,
1859                                 BTRFS_I(dir)->block_group, S_IFDIR | 0700);
1860         if (IS_ERR(inode))
1861                 goto fail;
1862         inode->i_op = &btrfs_dir_inode_operations;
1863         inode->i_fop = &btrfs_dir_file_operations;
1864         new_root->inode = inode;
1865
1866         ret = btrfs_make_empty_dir(trans, new_root, new_dirid, new_dirid);
1867         if (ret)
1868                 goto fail;
1869
1870         inode->i_nlink = 1;
1871         inode->i_size = 6;
1872         ret = btrfs_update_inode(trans, new_root, inode);
1873         if (ret)
1874                 goto fail;
1875 fail:
1876         err = btrfs_commit_transaction(trans, root);
1877         if (err && !ret)
1878                 ret = err;
1879 fail_commit:
1880         mutex_unlock(&root->fs_info->fs_mutex);
1881         btrfs_btree_balance_dirty(root);
1882         return ret;
1883 }
1884
1885 static int create_snapshot(struct btrfs_root *root, char *name, int namelen)
1886 {
1887         struct btrfs_trans_handle *trans;
1888         struct btrfs_key key;
1889         struct btrfs_root_item new_root_item;
1890         struct buffer_head *tmp;
1891         int ret;
1892         int err;
1893         u64 objectid;
1894
1895         if (!root->ref_cows)
1896                 return -EINVAL;
1897
1898         mutex_lock(&root->fs_info->fs_mutex);
1899         trans = btrfs_start_transaction(root, 1);
1900         BUG_ON(!trans);
1901
1902         ret = btrfs_update_inode(trans, root, root->inode);
1903         if (ret)
1904                 goto fail;
1905
1906         ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root,
1907                                        0, &objectid);
1908         if (ret)
1909                 goto fail;
1910
1911         memcpy(&new_root_item, &root->root_item,
1912                sizeof(new_root_item));
1913
1914         key.objectid = objectid;
1915         key.offset = 1;
1916         key.flags = 0;
1917         btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
1918         btrfs_cow_block(trans, root, root->node, NULL, 0, &tmp);
1919         btrfs_set_root_blocknr(&new_root_item, bh_blocknr(root->node));
1920
1921         ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key,
1922                                 &new_root_item);
1923         if (ret)
1924                 goto fail;
1925
1926         /*
1927          * insert the directory item
1928          */
1929         key.offset = (u64)-1;
1930         ret = btrfs_insert_dir_item(trans, root->fs_info->tree_root,
1931                                     name, namelen,
1932                                     root->fs_info->sb->s_root->d_inode->i_ino,
1933                                     &key, BTRFS_FT_DIR);
1934
1935         if (ret)
1936                 goto fail;
1937
1938         ret = btrfs_inc_root_ref(trans, root);
1939         if (ret)
1940                 goto fail;
1941
1942 fail:
1943         err = btrfs_commit_transaction(trans, root);
1944         if (err && !ret)
1945                 ret = err;
1946         mutex_unlock(&root->fs_info->fs_mutex);
1947         btrfs_btree_balance_dirty(root);
1948         return ret;
1949 }
1950
1951 int btrfs_ioctl(struct inode *inode, struct file *filp, unsigned int
1952                 cmd, unsigned long arg)
1953 {
1954         struct btrfs_root *root = BTRFS_I(inode)->root;
1955         struct btrfs_ioctl_vol_args vol_args;
1956         int ret = 0;
1957         struct btrfs_dir_item *di;
1958         int namelen;
1959         struct btrfs_path *path;
1960         u64 root_dirid;
1961
1962         switch (cmd) {
1963         case BTRFS_IOC_SNAP_CREATE:
1964                 if (copy_from_user(&vol_args,
1965                                    (struct btrfs_ioctl_vol_args __user *)arg,
1966                                    sizeof(vol_args)))
1967                         return -EFAULT;
1968                 namelen = strlen(vol_args.name);
1969                 if (namelen > BTRFS_VOL_NAME_MAX)
1970                         return -EINVAL;
1971                 if (strchr(vol_args.name, '/'))
1972                         return -EINVAL;
1973                 path = btrfs_alloc_path();
1974                 if (!path)
1975                         return -ENOMEM;
1976                 root_dirid = root->fs_info->sb->s_root->d_inode->i_ino,
1977                 mutex_lock(&root->fs_info->fs_mutex);
1978                 di = btrfs_lookup_dir_item(NULL, root->fs_info->tree_root,
1979                                     path, root_dirid,
1980                                     vol_args.name, namelen, 0);
1981                 mutex_unlock(&root->fs_info->fs_mutex);
1982                 btrfs_free_path(path);
1983                 if (di && !IS_ERR(di))
1984                         return -EEXIST;
1985                 if (IS_ERR(di))
1986                         return PTR_ERR(di);
1987
1988                 if (root == root->fs_info->tree_root)
1989                         ret = create_subvol(root, vol_args.name, namelen);
1990                 else
1991                         ret = create_snapshot(root, vol_args.name, namelen);
1992                 break;
1993
1994         case BTRFS_IOC_DEFRAG:
1995                 mutex_lock(&root->fs_info->fs_mutex);
1996                 btrfs_defrag_root(root, 0);
1997                 btrfs_defrag_root(root->fs_info->extent_root, 0);
1998                 mutex_unlock(&root->fs_info->fs_mutex);
1999                 ret = 0;
2000                 break;
2001         default:
2002                 return -ENOTTY;
2003         }
2004         return ret;
2005 }
2006
2007 #ifdef CONFIG_COMPAT
2008 long btrfs_compat_ioctl(struct file *file, unsigned int cmd,
2009                                unsigned long arg)
2010 {
2011         struct inode *inode = file->f_path.dentry->d_inode;
2012         int ret;
2013         lock_kernel();
2014         ret = btrfs_ioctl(inode, file, cmd, (unsigned long) compat_ptr(arg));
2015         unlock_kernel();
2016         return ret;
2017
2018 }
2019 #endif
2020
2021 /*
2022  * Called inside transaction, so use GFP_NOFS
2023  */
2024 struct inode *btrfs_alloc_inode(struct super_block *sb)
2025 {
2026         struct btrfs_inode *ei;
2027
2028         ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS);
2029         if (!ei)
2030                 return NULL;
2031         ei->last_trans = 0;
2032         return &ei->vfs_inode;
2033 }
2034
2035 void btrfs_destroy_inode(struct inode *inode)
2036 {
2037         WARN_ON(!list_empty(&inode->i_dentry));
2038         WARN_ON(inode->i_data.nrpages);
2039
2040         kmem_cache_free(btrfs_inode_cachep, BTRFS_I(inode));
2041 }
2042
2043 static void init_once(void * foo, struct kmem_cache * cachep,
2044                       unsigned long flags)
2045 {
2046         struct btrfs_inode *ei = (struct btrfs_inode *) foo;
2047
2048         inode_init_once(&ei->vfs_inode);
2049 }
2050
2051 void btrfs_destroy_cachep(void)
2052 {
2053         if (btrfs_inode_cachep)
2054                 kmem_cache_destroy(btrfs_inode_cachep);
2055         if (btrfs_trans_handle_cachep)
2056                 kmem_cache_destroy(btrfs_trans_handle_cachep);
2057         if (btrfs_transaction_cachep)
2058                 kmem_cache_destroy(btrfs_transaction_cachep);
2059         if (btrfs_bit_radix_cachep)
2060                 kmem_cache_destroy(btrfs_bit_radix_cachep);
2061         if (btrfs_path_cachep)
2062                 kmem_cache_destroy(btrfs_path_cachep);
2063 }
2064
2065 static struct kmem_cache *cache_create(const char *name, size_t size,
2066                                        unsigned long extra_flags,
2067                                        void (*ctor)(void *, struct kmem_cache *,
2068                                                     unsigned long))
2069 {
2070         return kmem_cache_create(name, size, 0, (SLAB_RECLAIM_ACCOUNT |
2071                                  SLAB_MEM_SPREAD | extra_flags), ctor
2072 #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
2073                                  ,NULL
2074 #endif
2075                                 );
2076 }
2077
2078 int btrfs_init_cachep(void)
2079 {
2080         btrfs_inode_cachep = cache_create("btrfs_inode_cache",
2081                                           sizeof(struct btrfs_inode),
2082                                           0, init_once);
2083         if (!btrfs_inode_cachep)
2084                 goto fail;
2085         btrfs_trans_handle_cachep = cache_create("btrfs_trans_handle_cache",
2086                                              sizeof(struct btrfs_trans_handle),
2087                                              0, NULL);
2088         if (!btrfs_trans_handle_cachep)
2089                 goto fail;
2090         btrfs_transaction_cachep = cache_create("btrfs_transaction_cache",
2091                                              sizeof(struct btrfs_transaction),
2092                                              0, NULL);
2093         if (!btrfs_transaction_cachep)
2094                 goto fail;
2095         btrfs_path_cachep = cache_create("btrfs_path_cache",
2096                                          sizeof(struct btrfs_transaction),
2097                                          0, NULL);
2098         if (!btrfs_path_cachep)
2099                 goto fail;
2100         btrfs_bit_radix_cachep = cache_create("btrfs_radix", 256,
2101                                               SLAB_DESTROY_BY_RCU, NULL);
2102         if (!btrfs_bit_radix_cachep)
2103                 goto fail;
2104         return 0;
2105 fail:
2106         btrfs_destroy_cachep();
2107         return -ENOMEM;
2108 }
2109
2110 static int btrfs_getattr(struct vfsmount *mnt,
2111                          struct dentry *dentry, struct kstat *stat)
2112 {
2113         struct inode *inode = dentry->d_inode;
2114         generic_fillattr(inode, stat);
2115         stat->blksize = 256 * 1024;
2116         return 0;
2117 }
2118
2119 static int btrfs_rename(struct inode * old_dir, struct dentry *old_dentry,
2120                            struct inode * new_dir,struct dentry *new_dentry)
2121 {
2122         struct btrfs_trans_handle *trans;
2123         struct btrfs_root *root = BTRFS_I(old_dir)->root;
2124         struct inode *new_inode = new_dentry->d_inode;
2125         struct inode *old_inode = old_dentry->d_inode;
2126         struct timespec ctime = CURRENT_TIME;
2127         struct btrfs_path *path;
2128         struct btrfs_dir_item *di;
2129         int ret;
2130
2131         if (S_ISDIR(old_inode->i_mode) && new_inode &&
2132             new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) {
2133                 return -ENOTEMPTY;
2134         }
2135         mutex_lock(&root->fs_info->fs_mutex);
2136         trans = btrfs_start_transaction(root, 1);
2137         btrfs_set_trans_block_group(trans, new_dir);
2138         path = btrfs_alloc_path();
2139         if (!path) {
2140                 ret = -ENOMEM;
2141                 goto out_fail;
2142         }
2143
2144         old_dentry->d_inode->i_nlink++;
2145         old_dir->i_ctime = old_dir->i_mtime = ctime;
2146         new_dir->i_ctime = new_dir->i_mtime = ctime;
2147         old_inode->i_ctime = ctime;
2148         if (S_ISDIR(old_inode->i_mode) && old_dir != new_dir) {
2149                 struct btrfs_key *location = &BTRFS_I(new_dir)->location;
2150                 u64 old_parent_oid;
2151                 di = btrfs_lookup_dir_item(trans, root, path, old_inode->i_ino,
2152                                            "..", 2, -1);
2153                 if (IS_ERR(di)) {
2154                         ret = PTR_ERR(di);
2155                         goto out_fail;
2156                 }
2157                 if (!di) {
2158                         ret = -ENOENT;
2159                         goto out_fail;
2160                 }
2161                 old_parent_oid = btrfs_disk_key_objectid(&di->location);
2162                 ret = btrfs_del_item(trans, root, path);
2163                 if (ret) {
2164                         goto out_fail;
2165                 }
2166                 btrfs_release_path(root, path);
2167
2168                 di = btrfs_lookup_dir_index_item(trans, root, path,
2169                                                  old_inode->i_ino,
2170                                                  old_parent_oid,
2171                                                  "..", 2, -1);
2172                 if (IS_ERR(di)) {
2173                         ret = PTR_ERR(di);
2174                         goto out_fail;
2175                 }
2176                 if (!di) {
2177                         ret = -ENOENT;
2178                         goto out_fail;
2179                 }
2180                 ret = btrfs_del_item(trans, root, path);
2181                 if (ret) {
2182                         goto out_fail;
2183                 }
2184                 btrfs_release_path(root, path);
2185
2186                 ret = btrfs_insert_dir_item(trans, root, "..", 2,
2187                                             old_inode->i_ino, location,
2188                                             BTRFS_FT_DIR);
2189                 if (ret)
2190                         goto out_fail;
2191         }
2192
2193
2194         ret = btrfs_unlink_trans(trans, root, old_dir, old_dentry);
2195         if (ret)
2196                 goto out_fail;
2197
2198         if (new_inode) {
2199                 new_inode->i_ctime = CURRENT_TIME;
2200                 ret = btrfs_unlink_trans(trans, root, new_dir, new_dentry);
2201                 if (ret)
2202                         goto out_fail;
2203                 if (S_ISDIR(new_inode->i_mode))
2204                         clear_nlink(new_inode);
2205                 else
2206                         drop_nlink(new_inode);
2207                 ret = btrfs_update_inode(trans, root, new_inode);
2208                 if (ret)
2209                         goto out_fail;
2210         }
2211         ret = btrfs_add_link(trans, new_dentry, old_inode);
2212         if (ret)
2213                 goto out_fail;
2214
2215 out_fail:
2216         btrfs_free_path(path);
2217         btrfs_end_transaction(trans, root);
2218         mutex_unlock(&root->fs_info->fs_mutex);
2219         return ret;
2220 }
2221
2222 static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
2223                          const char *symname)
2224 {
2225         struct btrfs_trans_handle *trans;
2226         struct btrfs_root *root = BTRFS_I(dir)->root;
2227         struct btrfs_path *path;
2228         struct btrfs_key key;
2229         struct inode *inode;
2230         int err;
2231         int drop_inode = 0;
2232         u64 objectid;
2233         int name_len;
2234         int datasize;
2235         char *ptr;
2236         struct btrfs_file_extent_item *ei;
2237
2238         name_len = strlen(symname) + 1;
2239         if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root))
2240                 return -ENAMETOOLONG;
2241         mutex_lock(&root->fs_info->fs_mutex);
2242         trans = btrfs_start_transaction(root, 1);
2243         btrfs_set_trans_block_group(trans, dir);
2244
2245         err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid);
2246         if (err) {
2247                 err = -ENOSPC;
2248                 goto out_unlock;
2249         }
2250
2251         inode = btrfs_new_inode(trans, root, objectid,
2252                                 BTRFS_I(dir)->block_group, S_IFLNK|S_IRWXUGO);
2253         err = PTR_ERR(inode);
2254         if (IS_ERR(inode))
2255                 goto out_unlock;
2256
2257         btrfs_set_trans_block_group(trans, inode);
2258         err = btrfs_add_nondir(trans, dentry, inode);
2259         if (err)
2260                 drop_inode = 1;
2261         else {
2262                 inode->i_mapping->a_ops = &btrfs_aops;
2263                 inode->i_fop = &btrfs_file_operations;
2264                 inode->i_op = &btrfs_file_inode_operations;
2265                 extent_map_tree_init(&BTRFS_I(inode)->extent_tree,
2266                                      inode->i_mapping, GFP_NOFS);
2267         }
2268         dir->i_sb->s_dirt = 1;
2269         btrfs_update_inode_block_group(trans, inode);
2270         btrfs_update_inode_block_group(trans, dir);
2271         if (drop_inode)
2272                 goto out_unlock;
2273
2274         path = btrfs_alloc_path();
2275         BUG_ON(!path);
2276         key.objectid = inode->i_ino;
2277         key.offset = 0;
2278         key.flags = 0;
2279         btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
2280         datasize = btrfs_file_extent_calc_inline_size(name_len);
2281         err = btrfs_insert_empty_item(trans, root, path, &key,
2282                                       datasize);
2283         if (err) {
2284                 drop_inode = 1;
2285                 goto out_unlock;
2286         }
2287         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]),
2288                path->slots[0], struct btrfs_file_extent_item);
2289         btrfs_set_file_extent_generation(ei, trans->transid);
2290         btrfs_set_file_extent_type(ei,
2291                                    BTRFS_FILE_EXTENT_INLINE);
2292         ptr = btrfs_file_extent_inline_start(ei);
2293         btrfs_memcpy(root, path->nodes[0]->b_data,
2294                      ptr, symname, name_len);
2295         btrfs_mark_buffer_dirty(path->nodes[0]);
2296         btrfs_free_path(path);
2297         inode->i_op = &btrfs_symlink_inode_operations;
2298         inode->i_mapping->a_ops = &btrfs_symlink_aops;
2299         inode->i_size = name_len - 1;
2300         err = btrfs_update_inode(trans, root, inode);
2301         if (err)
2302                 drop_inode = 1;
2303
2304 out_unlock:
2305         btrfs_end_transaction(trans, root);
2306         mutex_unlock(&root->fs_info->fs_mutex);
2307         if (drop_inode) {
2308                 inode_dec_link_count(inode);
2309                 iput(inode);
2310         }
2311         btrfs_btree_balance_dirty(root);
2312         return err;
2313 }
2314
2315 static struct inode_operations btrfs_dir_inode_operations = {
2316         .lookup         = btrfs_lookup,
2317         .create         = btrfs_create,
2318         .unlink         = btrfs_unlink,
2319         .link           = btrfs_link,
2320         .mkdir          = btrfs_mkdir,
2321         .rmdir          = btrfs_rmdir,
2322         .rename         = btrfs_rename,
2323         .symlink        = btrfs_symlink,
2324         .setattr        = btrfs_setattr,
2325         .mknod          = btrfs_mknod,
2326 };
2327
2328 static struct inode_operations btrfs_dir_ro_inode_operations = {
2329         .lookup         = btrfs_lookup,
2330 };
2331
2332 static struct file_operations btrfs_dir_file_operations = {
2333         .llseek         = generic_file_llseek,
2334         .read           = generic_read_dir,
2335         .readdir        = btrfs_readdir,
2336         .ioctl          = btrfs_ioctl,
2337 #ifdef CONFIG_COMPAT
2338         .compat_ioctl   = btrfs_compat_ioctl,
2339 #endif
2340 };
2341
2342 static struct address_space_operations btrfs_aops = {
2343         .readpage       = btrfs_readpage,
2344         .writepage      = btrfs_writepage,
2345         .sync_page      = block_sync_page,
2346         .prepare_write  = btrfs_prepare_write,
2347         .commit_write   = btrfs_commit_write,
2348         .bmap           = btrfs_bmap,
2349         .invalidatepage = btrfs_invalidatepage,
2350         .releasepage    = btrfs_releasepage,
2351         .set_page_dirty = __set_page_dirty_nobuffers,
2352 };
2353
2354 static struct address_space_operations btrfs_symlink_aops = {
2355         .readpage       = btrfs_readpage,
2356         .writepage      = btrfs_writepage,
2357 };
2358
2359 static struct inode_operations btrfs_file_inode_operations = {
2360         .truncate       = btrfs_truncate,
2361         .getattr        = btrfs_getattr,
2362         .setattr        = btrfs_setattr,
2363 };
2364
2365 static struct inode_operations btrfs_special_inode_operations = {
2366         .getattr        = btrfs_getattr,
2367         .setattr        = btrfs_setattr,
2368 };
2369
2370 static struct inode_operations btrfs_symlink_inode_operations = {
2371         .readlink       = generic_readlink,
2372         .follow_link    = page_follow_link_light,
2373         .put_link       = page_put_link,
2374 };