2 * Copyright (c) 2012 Taobao.
3 * Written by Tao Ma <boyu.mt@taobao.com>
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms of version 2.1 of the GNU Lesser General Public License
7 * as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 #include "ext4_jbd2.h"
19 #define EXT4_XATTR_SYSTEM_DATA "data"
20 #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS))
22 int ext4_get_inline_size(struct inode *inode)
24 if (EXT4_I(inode)->i_inline_off)
25 return EXT4_I(inode)->i_inline_size;
30 static int get_max_inline_xattr_value_size(struct inode *inode,
31 struct ext4_iloc *iloc)
33 struct ext4_xattr_ibody_header *header;
34 struct ext4_xattr_entry *entry;
35 struct ext4_inode *raw_inode;
38 min_offs = EXT4_SB(inode->i_sb)->s_inode_size -
39 EXT4_GOOD_OLD_INODE_SIZE -
40 EXT4_I(inode)->i_extra_isize -
41 sizeof(struct ext4_xattr_ibody_header);
44 * We need to subtract another sizeof(__u32) since an in-inode xattr
45 * needs an empty 4 bytes to indicate the gap between the xattr entry
46 * and the name/value pair.
48 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
49 return EXT4_XATTR_SIZE(min_offs -
50 EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) -
51 EXT4_XATTR_ROUND - sizeof(__u32));
53 raw_inode = ext4_raw_inode(iloc);
54 header = IHDR(inode, raw_inode);
55 entry = IFIRST(header);
57 /* Compute min_offs. */
58 for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
59 if (!entry->e_value_block && entry->e_value_size) {
60 size_t offs = le16_to_cpu(entry->e_value_offs);
66 ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32);
68 if (EXT4_I(inode)->i_inline_off) {
69 entry = (struct ext4_xattr_entry *)
70 ((void *)raw_inode + EXT4_I(inode)->i_inline_off);
72 free += le32_to_cpu(entry->e_value_size);
76 free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA));
78 if (free > EXT4_XATTR_ROUND)
79 free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND);
88 * Get the maximum size we now can store in an inode.
89 * If we can't find the space for a xattr entry, don't use the space
90 * of the extents since we have no space to indicate the inline data.
92 int ext4_get_max_inline_size(struct inode *inode)
94 int error, max_inline_size;
95 struct ext4_iloc iloc;
97 if (EXT4_I(inode)->i_extra_isize == 0)
100 error = ext4_get_inode_loc(inode, &iloc);
102 ext4_error_inode(inode, __func__, __LINE__, 0,
103 "can't get inode location %lu",
108 down_read(&EXT4_I(inode)->xattr_sem);
109 max_inline_size = get_max_inline_xattr_value_size(inode, &iloc);
110 up_read(&EXT4_I(inode)->xattr_sem);
114 if (!max_inline_size)
117 return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE;
120 int ext4_has_inline_data(struct inode *inode)
122 return ext4_test_inode_flag(inode, EXT4_INODE_INLINE_DATA) &&
123 EXT4_I(inode)->i_inline_off;
127 * this function does not take xattr_sem, which is OK because it is
128 * currently only used in a code path coming form ext4_iget, before
129 * the new inode has been unlocked
131 int ext4_find_inline_data_nolock(struct inode *inode)
133 struct ext4_xattr_ibody_find is = {
134 .s = { .not_found = -ENODATA, },
136 struct ext4_xattr_info i = {
137 .name_index = EXT4_XATTR_INDEX_SYSTEM,
138 .name = EXT4_XATTR_SYSTEM_DATA,
142 if (EXT4_I(inode)->i_extra_isize == 0)
145 error = ext4_get_inode_loc(inode, &is.iloc);
149 error = ext4_xattr_ibody_find(inode, &i, &is);
153 if (!is.s.not_found) {
154 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
155 (void *)ext4_raw_inode(&is.iloc));
156 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
157 le32_to_cpu(is.s.here->e_value_size);
158 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
165 static int ext4_read_inline_data(struct inode *inode, void *buffer,
167 struct ext4_iloc *iloc)
169 struct ext4_xattr_entry *entry;
170 struct ext4_xattr_ibody_header *header;
172 struct ext4_inode *raw_inode;
177 BUG_ON(len > EXT4_I(inode)->i_inline_size);
179 cp_len = len < EXT4_MIN_INLINE_DATA_SIZE ?
180 len : EXT4_MIN_INLINE_DATA_SIZE;
182 raw_inode = ext4_raw_inode(iloc);
183 memcpy(buffer, (void *)(raw_inode->i_block), cp_len);
191 header = IHDR(inode, raw_inode);
192 entry = (struct ext4_xattr_entry *)((void *)raw_inode +
193 EXT4_I(inode)->i_inline_off);
194 len = min_t(unsigned int, len,
195 (unsigned int)le32_to_cpu(entry->e_value_size));
198 (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len);
206 * write the buffer to the inline inode.
207 * If 'create' is set, we don't need to do the extra copy in the xattr
208 * value since it is already handled by ext4_xattr_ibody_set. That saves
211 void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
212 void *buffer, loff_t pos, unsigned int len)
214 struct ext4_xattr_entry *entry;
215 struct ext4_xattr_ibody_header *header;
216 struct ext4_inode *raw_inode;
219 BUG_ON(!EXT4_I(inode)->i_inline_off);
220 BUG_ON(pos + len > EXT4_I(inode)->i_inline_size);
222 raw_inode = ext4_raw_inode(iloc);
225 if (pos < EXT4_MIN_INLINE_DATA_SIZE) {
226 cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ?
227 EXT4_MIN_INLINE_DATA_SIZE - pos : len;
228 memcpy((void *)raw_inode->i_block + pos, buffer, cp_len);
238 pos -= EXT4_MIN_INLINE_DATA_SIZE;
239 header = IHDR(inode, raw_inode);
240 entry = (struct ext4_xattr_entry *)((void *)raw_inode +
241 EXT4_I(inode)->i_inline_off);
243 memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos,
247 static int ext4_create_inline_data(handle_t *handle,
248 struct inode *inode, unsigned len)
252 struct ext4_xattr_ibody_find is = {
253 .s = { .not_found = -ENODATA, },
255 struct ext4_xattr_info i = {
256 .name_index = EXT4_XATTR_INDEX_SYSTEM,
257 .name = EXT4_XATTR_SYSTEM_DATA,
260 error = ext4_get_inode_loc(inode, &is.iloc);
264 error = ext4_journal_get_write_access(handle, is.iloc.bh);
268 if (len > EXT4_MIN_INLINE_DATA_SIZE) {
269 value = (void *)empty_zero_page;
270 len -= EXT4_MIN_INLINE_DATA_SIZE;
276 /* Insert the the xttr entry. */
280 error = ext4_xattr_ibody_find(inode, &i, &is);
284 BUG_ON(!is.s.not_found);
286 error = ext4_xattr_ibody_set(handle, inode, &i, &is);
288 if (error == -ENOSPC)
289 ext4_clear_inode_state(inode,
290 EXT4_STATE_MAY_INLINE_DATA);
294 memset((void *)ext4_raw_inode(&is.iloc)->i_block,
295 0, EXT4_MIN_INLINE_DATA_SIZE);
297 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
298 (void *)ext4_raw_inode(&is.iloc));
299 EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE;
300 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS);
301 ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA);
303 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
310 static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
315 struct ext4_xattr_ibody_find is = {
316 .s = { .not_found = -ENODATA, },
318 struct ext4_xattr_info i = {
319 .name_index = EXT4_XATTR_INDEX_SYSTEM,
320 .name = EXT4_XATTR_SYSTEM_DATA,
323 /* If the old space is ok, write the data directly. */
324 if (len <= EXT4_I(inode)->i_inline_size)
327 error = ext4_get_inode_loc(inode, &is.iloc);
331 error = ext4_xattr_ibody_find(inode, &i, &is);
335 BUG_ON(is.s.not_found);
337 len -= EXT4_MIN_INLINE_DATA_SIZE;
338 value = kzalloc(len, GFP_NOFS);
342 error = ext4_xattr_ibody_get(inode, i.name_index, i.name,
344 if (error == -ENODATA)
347 error = ext4_journal_get_write_access(handle, is.iloc.bh);
351 /* Update the xttr entry. */
355 error = ext4_xattr_ibody_set(handle, inode, &i, &is);
359 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here -
360 (void *)ext4_raw_inode(&is.iloc));
361 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE +
362 le32_to_cpu(is.s.here->e_value_size);
363 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
365 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
373 int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
377 struct ext4_inode_info *ei = EXT4_I(inode);
379 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA))
382 size = ext4_get_max_inline_size(inode);
386 down_write(&EXT4_I(inode)->xattr_sem);
388 if (ei->i_inline_off)
389 ret = ext4_update_inline_data(handle, inode, len);
391 ret = ext4_create_inline_data(handle, inode, len);
393 up_write(&EXT4_I(inode)->xattr_sem);
398 static int ext4_destroy_inline_data_nolock(handle_t *handle,
401 struct ext4_inode_info *ei = EXT4_I(inode);
402 struct ext4_xattr_ibody_find is = {
403 .s = { .not_found = 0, },
405 struct ext4_xattr_info i = {
406 .name_index = EXT4_XATTR_INDEX_SYSTEM,
407 .name = EXT4_XATTR_SYSTEM_DATA,
413 if (!ei->i_inline_off)
416 error = ext4_get_inode_loc(inode, &is.iloc);
420 error = ext4_xattr_ibody_find(inode, &i, &is);
424 error = ext4_journal_get_write_access(handle, is.iloc.bh);
428 error = ext4_xattr_ibody_set(handle, inode, &i, &is);
432 memset((void *)ext4_raw_inode(&is.iloc)->i_block,
433 0, EXT4_MIN_INLINE_DATA_SIZE);
435 if (EXT4_HAS_INCOMPAT_FEATURE(inode->i_sb,
436 EXT4_FEATURE_INCOMPAT_EXTENTS)) {
437 if (S_ISDIR(inode->i_mode) ||
438 S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) {
439 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
440 ext4_ext_tree_init(handle, inode);
443 ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA);
446 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
448 EXT4_I(inode)->i_inline_off = 0;
449 EXT4_I(inode)->i_inline_size = 0;
450 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
453 if (error == -ENODATA)
458 static int ext4_read_inline_page(struct inode *inode, struct page *page)
463 struct ext4_iloc iloc;
465 BUG_ON(!PageLocked(page));
466 BUG_ON(!ext4_has_inline_data(inode));
469 if (!EXT4_I(inode)->i_inline_off) {
470 ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.",
475 ret = ext4_get_inode_loc(inode, &iloc);
479 len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode));
480 kaddr = kmap_atomic(page);
481 ret = ext4_read_inline_data(inode, kaddr, len, &iloc);
482 flush_dcache_page(page);
483 kunmap_atomic(kaddr);
484 zero_user_segment(page, len, PAGE_CACHE_SIZE);
485 SetPageUptodate(page);
492 int ext4_readpage_inline(struct inode *inode, struct page *page)
496 down_read(&EXT4_I(inode)->xattr_sem);
497 if (!ext4_has_inline_data(inode)) {
498 up_read(&EXT4_I(inode)->xattr_sem);
503 * Current inline data can only exist in the 1st page,
504 * So for all the other pages, just set them uptodate.
507 ret = ext4_read_inline_page(inode, page);
508 else if (!PageUptodate(page)) {
509 zero_user_segment(page, 0, PAGE_CACHE_SIZE);
510 SetPageUptodate(page);
513 up_read(&EXT4_I(inode)->xattr_sem);
516 return ret >= 0 ? 0 : ret;
519 static int ext4_convert_inline_data_to_extent(struct address_space *mapping,
523 int ret, needed_blocks;
524 handle_t *handle = NULL;
525 int retries = 0, sem_held = 0;
526 struct page *page = NULL;
528 struct ext4_iloc iloc;
530 if (!ext4_has_inline_data(inode)) {
532 * clear the flag so that no new write
533 * will trap here again.
535 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA);
539 needed_blocks = ext4_writepage_trans_blocks(inode);
541 ret = ext4_get_inode_loc(inode, &iloc);
546 handle = ext4_journal_start(inode, needed_blocks);
547 if (IS_ERR(handle)) {
548 ret = PTR_ERR(handle);
553 /* We cannot recurse into the filesystem as the transaction is already
555 flags |= AOP_FLAG_NOFS;
557 page = grab_cache_page_write_begin(mapping, 0, flags);
563 down_write(&EXT4_I(inode)->xattr_sem);
565 /* If some one has already done this for us, just exit. */
566 if (!ext4_has_inline_data(inode)) {
572 to = ext4_get_inline_size(inode);
573 if (!PageUptodate(page)) {
574 ret = ext4_read_inline_page(inode, page);
579 ret = ext4_destroy_inline_data_nolock(handle, inode);
583 if (ext4_should_dioread_nolock(inode))
584 ret = __block_write_begin(page, from, to, ext4_get_block_write);
586 ret = __block_write_begin(page, from, to, ext4_get_block);
588 if (!ret && ext4_should_journal_data(inode)) {
589 ret = ext4_walk_page_buffers(handle, page_buffers(page),
591 do_journal_get_write_access);
596 page_cache_release(page);
597 ext4_orphan_add(handle, inode);
598 up_write(&EXT4_I(inode)->xattr_sem);
600 ext4_journal_stop(handle);
602 ext4_truncate_failed_write(inode);
604 * If truncate failed early the inode might
605 * still be on the orphan list; we need to
606 * make sure the inode is removed from the
607 * orphan list in that case.
610 ext4_orphan_del(NULL, inode);
613 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
616 block_commit_write(page, from, to);
620 page_cache_release(page);
623 up_write(&EXT4_I(inode)->xattr_sem);
625 ext4_journal_stop(handle);
631 * Try to write data in the inode.
632 * If the inode has inline data, check whether the new write can be
633 * in the inode also. If not, create the page the handle, move the data
634 * to the page make it update and let the later codes create extent for it.
636 int ext4_try_to_write_inline_data(struct address_space *mapping,
638 loff_t pos, unsigned len,
645 struct ext4_iloc iloc;
647 if (pos + len > ext4_get_max_inline_size(inode))
650 ret = ext4_get_inode_loc(inode, &iloc);
655 * The possible write could happen in the inode,
656 * so try to reserve the space in inode first.
658 handle = ext4_journal_start(inode, 1);
659 if (IS_ERR(handle)) {
660 ret = PTR_ERR(handle);
665 ret = ext4_prepare_inline_data(handle, inode, pos + len);
666 if (ret && ret != -ENOSPC)
669 /* We don't have space in inline inode, so convert it to extent. */
670 if (ret == -ENOSPC) {
671 ext4_journal_stop(handle);
676 flags |= AOP_FLAG_NOFS;
678 page = grab_cache_page_write_begin(mapping, 0, flags);
685 down_read(&EXT4_I(inode)->xattr_sem);
686 if (!ext4_has_inline_data(inode)) {
689 page_cache_release(page);
693 if (!PageUptodate(page)) {
694 ret = ext4_read_inline_page(inode, page);
702 up_read(&EXT4_I(inode)->xattr_sem);
705 ext4_journal_stop(handle);
709 return ext4_convert_inline_data_to_extent(mapping,
713 int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len,
714 unsigned copied, struct page *page)
718 struct ext4_iloc iloc;
720 if (unlikely(copied < len)) {
721 if (!PageUptodate(page)) {
727 ret = ext4_get_inode_loc(inode, &iloc);
729 ext4_std_error(inode->i_sb, ret);
734 down_write(&EXT4_I(inode)->xattr_sem);
735 BUG_ON(!ext4_has_inline_data(inode));
737 kaddr = kmap_atomic(page);
738 ext4_write_inline_data(inode, &iloc, kaddr, pos, len);
739 kunmap_atomic(kaddr);
740 SetPageUptodate(page);
741 /* clear page dirty so that writepages wouldn't work for us. */
742 ClearPageDirty(page);
744 up_write(&EXT4_I(inode)->xattr_sem);
751 int ext4_destroy_inline_data(handle_t *handle, struct inode *inode)
755 down_write(&EXT4_I(inode)->xattr_sem);
756 ret = ext4_destroy_inline_data_nolock(handle, inode);
757 up_write(&EXT4_I(inode)->xattr_sem);