]> git.karo-electronics.de Git - karo-tx-linux.git/blob - fs/logfs/dev_bdev.c
block: prep work for batch completion
[karo-tx-linux.git] / fs / logfs / dev_bdev.c
1 /*
2  * fs/logfs/dev_bdev.c  - Device access methods for block devices
3  *
4  * As should be obvious for Linux kernel code, license is GPLv2
5  *
6  * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7  */
8 #include "logfs.h"
9 #include <linux/bio.h>
10 #include <linux/blkdev.h>
11 #include <linux/buffer_head.h>
12 #include <linux/gfp.h>
13 #include <linux/prefetch.h>
14
15 #define PAGE_OFS(ofs) ((ofs) & (PAGE_SIZE-1))
16
17 static void request_complete(struct bio *bio, int err,
18                              struct batch_complete *batch)
19 {
20         complete((struct completion *)bio->bi_private);
21 }
22
23 static int sync_request(struct page *page, struct block_device *bdev, int rw)
24 {
25         struct bio bio;
26         struct bio_vec bio_vec;
27         struct completion complete;
28
29         bio_init(&bio);
30         bio.bi_max_vecs = 1;
31         bio.bi_io_vec = &bio_vec;
32         bio_vec.bv_page = page;
33         bio_vec.bv_len = PAGE_SIZE;
34         bio_vec.bv_offset = 0;
35         bio.bi_vcnt = 1;
36         bio.bi_size = PAGE_SIZE;
37         bio.bi_bdev = bdev;
38         bio.bi_sector = page->index * (PAGE_SIZE >> 9);
39         init_completion(&complete);
40         bio.bi_private = &complete;
41         bio.bi_end_io = request_complete;
42
43         submit_bio(rw, &bio);
44         wait_for_completion(&complete);
45         return test_bit(BIO_UPTODATE, &bio.bi_flags) ? 0 : -EIO;
46 }
47
48 static int bdev_readpage(void *_sb, struct page *page)
49 {
50         struct super_block *sb = _sb;
51         struct block_device *bdev = logfs_super(sb)->s_bdev;
52         int err;
53
54         err = sync_request(page, bdev, READ);
55         if (err) {
56                 ClearPageUptodate(page);
57                 SetPageError(page);
58         } else {
59                 SetPageUptodate(page);
60                 ClearPageError(page);
61         }
62         unlock_page(page);
63         return err;
64 }
65
66 static DECLARE_WAIT_QUEUE_HEAD(wq);
67
68 static void writeseg_end_io(struct bio *bio, int err,
69                             struct batch_complete *batch)
70 {
71         const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
72         struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
73         struct super_block *sb = bio->bi_private;
74         struct logfs_super *super = logfs_super(sb);
75         struct page *page;
76
77         BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */
78         BUG_ON(err);
79         BUG_ON(bio->bi_vcnt == 0);
80         do {
81                 page = bvec->bv_page;
82                 if (--bvec >= bio->bi_io_vec)
83                         prefetchw(&bvec->bv_page->flags);
84
85                 end_page_writeback(page);
86                 page_cache_release(page);
87         } while (bvec >= bio->bi_io_vec);
88         bio_put(bio);
89         if (atomic_dec_and_test(&super->s_pending_writes))
90                 wake_up(&wq);
91 }
92
93 static int __bdev_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
94                 size_t nr_pages)
95 {
96         struct logfs_super *super = logfs_super(sb);
97         struct address_space *mapping = super->s_mapping_inode->i_mapping;
98         struct bio *bio;
99         struct page *page;
100         unsigned int max_pages;
101         int i;
102
103         max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
104
105         bio = bio_alloc(GFP_NOFS, max_pages);
106         BUG_ON(!bio);
107
108         for (i = 0; i < nr_pages; i++) {
109                 if (i >= max_pages) {
110                         /* Block layer cannot split bios :( */
111                         bio->bi_vcnt = i;
112                         bio->bi_size = i * PAGE_SIZE;
113                         bio->bi_bdev = super->s_bdev;
114                         bio->bi_sector = ofs >> 9;
115                         bio->bi_private = sb;
116                         bio->bi_end_io = writeseg_end_io;
117                         atomic_inc(&super->s_pending_writes);
118                         submit_bio(WRITE, bio);
119
120                         ofs += i * PAGE_SIZE;
121                         index += i;
122                         nr_pages -= i;
123                         i = 0;
124
125                         bio = bio_alloc(GFP_NOFS, max_pages);
126                         BUG_ON(!bio);
127                 }
128                 page = find_lock_page(mapping, index + i);
129                 BUG_ON(!page);
130                 bio->bi_io_vec[i].bv_page = page;
131                 bio->bi_io_vec[i].bv_len = PAGE_SIZE;
132                 bio->bi_io_vec[i].bv_offset = 0;
133
134                 BUG_ON(PageWriteback(page));
135                 set_page_writeback(page);
136                 unlock_page(page);
137         }
138         bio->bi_vcnt = nr_pages;
139         bio->bi_size = nr_pages * PAGE_SIZE;
140         bio->bi_bdev = super->s_bdev;
141         bio->bi_sector = ofs >> 9;
142         bio->bi_private = sb;
143         bio->bi_end_io = writeseg_end_io;
144         atomic_inc(&super->s_pending_writes);
145         submit_bio(WRITE, bio);
146         return 0;
147 }
148
149 static void bdev_writeseg(struct super_block *sb, u64 ofs, size_t len)
150 {
151         struct logfs_super *super = logfs_super(sb);
152         int head;
153
154         BUG_ON(super->s_flags & LOGFS_SB_FLAG_RO);
155
156         if (len == 0) {
157                 /* This can happen when the object fit perfectly into a
158                  * segment, the segment gets written per sync and subsequently
159                  * closed.
160                  */
161                 return;
162         }
163         head = ofs & (PAGE_SIZE - 1);
164         if (head) {
165                 ofs -= head;
166                 len += head;
167         }
168         len = PAGE_ALIGN(len);
169         __bdev_writeseg(sb, ofs, ofs >> PAGE_SHIFT, len >> PAGE_SHIFT);
170 }
171
172
173 static void erase_end_io(struct bio *bio, int err, struct batch_complete *batch)
174
175         const int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); 
176         struct super_block *sb = bio->bi_private; 
177         struct logfs_super *super = logfs_super(sb); 
178
179         BUG_ON(!uptodate); /* FIXME: Retry io or write elsewhere */ 
180         BUG_ON(err); 
181         BUG_ON(bio->bi_vcnt == 0); 
182         bio_put(bio); 
183         if (atomic_dec_and_test(&super->s_pending_writes))
184                 wake_up(&wq); 
185
186
187 static int do_erase(struct super_block *sb, u64 ofs, pgoff_t index,
188                 size_t nr_pages)
189 {
190         struct logfs_super *super = logfs_super(sb);
191         struct bio *bio;
192         unsigned int max_pages;
193         int i;
194
195         max_pages = min(nr_pages, (size_t) bio_get_nr_vecs(super->s_bdev));
196
197         bio = bio_alloc(GFP_NOFS, max_pages);
198         BUG_ON(!bio);
199
200         for (i = 0; i < nr_pages; i++) {
201                 if (i >= max_pages) {
202                         /* Block layer cannot split bios :( */
203                         bio->bi_vcnt = i;
204                         bio->bi_size = i * PAGE_SIZE;
205                         bio->bi_bdev = super->s_bdev;
206                         bio->bi_sector = ofs >> 9;
207                         bio->bi_private = sb;
208                         bio->bi_end_io = erase_end_io;
209                         atomic_inc(&super->s_pending_writes);
210                         submit_bio(WRITE, bio);
211
212                         ofs += i * PAGE_SIZE;
213                         index += i;
214                         nr_pages -= i;
215                         i = 0;
216
217                         bio = bio_alloc(GFP_NOFS, max_pages);
218                         BUG_ON(!bio);
219                 }
220                 bio->bi_io_vec[i].bv_page = super->s_erase_page;
221                 bio->bi_io_vec[i].bv_len = PAGE_SIZE;
222                 bio->bi_io_vec[i].bv_offset = 0;
223         }
224         bio->bi_vcnt = nr_pages;
225         bio->bi_size = nr_pages * PAGE_SIZE;
226         bio->bi_bdev = super->s_bdev;
227         bio->bi_sector = ofs >> 9;
228         bio->bi_private = sb;
229         bio->bi_end_io = erase_end_io;
230         atomic_inc(&super->s_pending_writes);
231         submit_bio(WRITE, bio);
232         return 0;
233 }
234
235 static int bdev_erase(struct super_block *sb, loff_t to, size_t len,
236                 int ensure_write)
237 {
238         struct logfs_super *super = logfs_super(sb);
239
240         BUG_ON(to & (PAGE_SIZE - 1));
241         BUG_ON(len & (PAGE_SIZE - 1));
242
243         if (super->s_flags & LOGFS_SB_FLAG_RO)
244                 return -EROFS;
245
246         if (ensure_write) {
247                 /*
248                  * Object store doesn't care whether erases happen or not.
249                  * But for the journal they are required.  Otherwise a scan
250                  * can find an old commit entry and assume it is the current
251                  * one, travelling back in time.
252                  */
253                 do_erase(sb, to, to >> PAGE_SHIFT, len >> PAGE_SHIFT);
254         }
255
256         return 0;
257 }
258
259 static void bdev_sync(struct super_block *sb)
260 {
261         struct logfs_super *super = logfs_super(sb);
262
263         wait_event(wq, atomic_read(&super->s_pending_writes) == 0);
264 }
265
266 static struct page *bdev_find_first_sb(struct super_block *sb, u64 *ofs)
267 {
268         struct logfs_super *super = logfs_super(sb);
269         struct address_space *mapping = super->s_mapping_inode->i_mapping;
270         filler_t *filler = bdev_readpage;
271
272         *ofs = 0;
273         return read_cache_page(mapping, 0, filler, sb);
274 }
275
276 static struct page *bdev_find_last_sb(struct super_block *sb, u64 *ofs)
277 {
278         struct logfs_super *super = logfs_super(sb);
279         struct address_space *mapping = super->s_mapping_inode->i_mapping;
280         filler_t *filler = bdev_readpage;
281         u64 pos = (super->s_bdev->bd_inode->i_size & ~0xfffULL) - 0x1000;
282         pgoff_t index = pos >> PAGE_SHIFT;
283
284         *ofs = pos;
285         return read_cache_page(mapping, index, filler, sb);
286 }
287
288 static int bdev_write_sb(struct super_block *sb, struct page *page)
289 {
290         struct block_device *bdev = logfs_super(sb)->s_bdev;
291
292         /* Nothing special to do for block devices. */
293         return sync_request(page, bdev, WRITE);
294 }
295
296 static void bdev_put_device(struct logfs_super *s)
297 {
298         blkdev_put(s->s_bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
299 }
300
301 static int bdev_can_write_buf(struct super_block *sb, u64 ofs)
302 {
303         return 0;
304 }
305
306 static const struct logfs_device_ops bd_devops = {
307         .find_first_sb  = bdev_find_first_sb,
308         .find_last_sb   = bdev_find_last_sb,
309         .write_sb       = bdev_write_sb,
310         .readpage       = bdev_readpage,
311         .writeseg       = bdev_writeseg,
312         .erase          = bdev_erase,
313         .can_write_buf  = bdev_can_write_buf,
314         .sync           = bdev_sync,
315         .put_device     = bdev_put_device,
316 };
317
318 int logfs_get_sb_bdev(struct logfs_super *p, struct file_system_type *type,
319                 const char *devname)
320 {
321         struct block_device *bdev;
322
323         bdev = blkdev_get_by_path(devname, FMODE_READ|FMODE_WRITE|FMODE_EXCL,
324                                   type);
325         if (IS_ERR(bdev))
326                 return PTR_ERR(bdev);
327
328         if (MAJOR(bdev->bd_dev) == MTD_BLOCK_MAJOR) {
329                 int mtdnr = MINOR(bdev->bd_dev);
330                 blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
331                 return logfs_get_sb_mtd(p, mtdnr);
332         }
333
334         p->s_bdev = bdev;
335         p->s_mtd = NULL;
336         p->s_devops = &bd_devops;
337         return 0;
338 }