22 #include <linux/kernel.h>
23 #include <linux/module.h>
24 #include <linux/types.h>
27 #include <linux/slab.h>
32 #include <linux/wait.h>
37 #include <linux/uio.h>
39 #include <linux/prefetch.h>
151 static inline unsigned dio_pages_present(
struct dio_submit *sdio)
159 static inline int dio_refill_pages(
struct dio *
dio,
struct dio_submit *sdio)
171 if (ret < 0 && sdio->blocks_available && (dio->
rw &
WRITE)) {
205 static inline struct page *dio_get_page(
struct dio *dio,
208 if (dio_pages_present(sdio) == 0) {
211 ret = dio_refill_pages(dio, sdio);
214 BUG_ON(dio_pages_present(sdio) == 0);
246 transferred = dio->
result;
249 if ((dio->
rw ==
READ) && ((offset + transferred) > dio->
i_size))
272 static int dio_bio_complete(
struct dio *dio,
struct bio *bio);
276 static void dio_bio_end_aio(
struct bio *bio,
int error)
278 struct dio *dio = bio->bi_private;
279 unsigned long remaining;
283 dio_bio_complete(dio, bio);
287 if (remaining == 1 && dio->
waiter)
289 spin_unlock_irqrestore(&dio->
bio_lock, flags);
291 if (remaining == 0) {
292 dio_complete(dio, dio->
iocb->ki_pos, 0,
true);
304 static void dio_bio_end_io(
struct bio *bio,
int error)
306 struct dio *dio = bio->bi_private;
314 spin_unlock_irqrestore(&dio->
bio_lock, flags);
328 struct dio *dio = bio->bi_private;
331 dio_bio_end_aio(bio, error);
333 dio_bio_end_io(bio, error);
338 dio_bio_alloc(
struct dio *dio,
struct dio_submit *sdio,
351 bio->bi_sector = first_sector;
353 bio->bi_end_io = dio_bio_end_aio;
355 bio->bi_end_io = dio_bio_end_io;
368 static inline void dio_bio_submit(
struct dio *dio,
struct dio_submit *sdio)
370 struct bio *bio = sdio->
bio;
373 bio->bi_private = dio;
377 spin_unlock_irqrestore(&dio->
bio_lock, flags);
396 static inline void dio_cleanup(
struct dio *dio,
struct dio_submit *sdio)
398 while (dio_pages_present(sdio))
408 static struct bio *dio_await_one(
struct dio *dio)
411 struct bio *bio =
NULL;
424 spin_unlock_irqrestore(&dio->
bio_lock, flags);
434 spin_unlock_irqrestore(&dio->
bio_lock, flags);
441 static int dio_bio_complete(
struct dio *dio,
struct bio *bio)
443 const int uptodate =
test_bit(BIO_UPTODATE, &bio->bi_flags);
444 struct bio_vec *bvec = bio->bi_io_vec;
453 for (page_no = 0; page_no < bio->bi_vcnt; page_no++) {
454 struct page *page = bvec[page_no].bv_page;
456 if (dio->
rw ==
READ && !PageCompound(page))
462 return uptodate ? 0 : -
EIO;
472 static void dio_await_completion(
struct dio *dio)
476 bio = dio_await_one(dio);
478 dio_bio_complete(dio, bio);
489 static inline int dio_bio_reap(
struct dio *dio,
struct dio_submit *sdio)
502 spin_unlock_irqrestore(&dio->
bio_lock, flags);
503 ret2 = dio_bio_complete(dio, bio);
535 static int get_more_blocks(
struct dio *dio,
struct dio_submit *sdio,
536 struct buffer_head *map_bh)
555 fs_count = fs_endblk - fs_startblk + 1;
558 map_bh->b_size = fs_count << i_blkbits;
572 if (dio->
flags & DIO_SKIP_HOLES) {
582 dio->
private = map_bh->b_private;
590 static inline int dio_new_bio(
struct dio *dio,
struct dio_submit *sdio,
596 ret = dio_bio_reap(dio, sdio);
599 sector = start_sector << (sdio->
blkbits - 9);
601 nr_pages =
min(nr_pages, BIO_MAX_PAGES);
603 dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
616 static inline int dio_bio_add_page(
struct dio_submit *sdio)
648 static inline int dio_send_cur_page(
struct dio *dio,
struct dio_submit *sdio,
649 struct buffer_head *map_bh)
673 cur_offset != bio_next_offset)
674 dio_bio_submit(dio, sdio);
680 dio_bio_submit(dio, sdio);
689 if (dio_bio_add_page(sdio) != 0) {
690 dio_bio_submit(dio, sdio);
693 ret = dio_bio_add_page(sdio);
719 submit_page_section(
struct dio *dio,
struct dio_submit *sdio,
struct page *page,
721 struct buffer_head *map_bh)
729 task_io_account_write(len);
746 ret = dio_send_cur_page(dio, sdio, map_bh);
757 ret = dio_send_cur_page(dio, sdio, map_bh);
779 static void clean_blockdev_aliases(
struct dio *dio,
struct buffer_head *map_bh)
784 nblocks = map_bh->b_size >> dio->
inode->i_blkbits;
786 for (i = 0; i < nblocks; i++) {
788 map_bh->b_blocknr + i);
801 static inline void dio_zero_block(
struct dio *dio,
struct dio_submit *sdio,
802 int end,
struct buffer_head *map_bh)
804 unsigned dio_blocks_per_fs_block;
805 unsigned this_chunk_blocks;
806 unsigned this_chunk_bytes;
810 if (!sdio->
blkfactor || !buffer_new(map_bh))
813 dio_blocks_per_fs_block = 1 << sdio->
blkfactor;
814 this_chunk_blocks = sdio->
block_in_file & (dio_blocks_per_fs_block - 1);
816 if (!this_chunk_blocks)
824 this_chunk_blocks = dio_blocks_per_fs_block - this_chunk_blocks;
826 this_chunk_bytes = this_chunk_blocks << sdio->
blkbits;
829 if (submit_page_section(dio, sdio, page, 0, this_chunk_bytes,
852 static int do_direct_IO(
struct dio *dio,
struct dio_submit *sdio,
853 struct buffer_head *map_bh)
855 const unsigned blkbits = sdio->
blkbits;
856 const unsigned blocks_per_page =
PAGE_SIZE >> blkbits;
858 unsigned block_in_page;
865 page = dio_get_page(dio, sdio);
871 while (block_in_page < blocks_per_page) {
873 unsigned this_chunk_bytes;
874 unsigned this_chunk_blocks;
881 unsigned long blkmask;
882 unsigned long dio_remainder;
884 ret = get_more_blocks(dio, sdio, map_bh);
889 if (!buffer_mapped(map_bh))
893 map_bh->b_size >> sdio->
blkbits;
896 if (buffer_new(map_bh))
897 clean_blockdev_aliases(dio, map_bh);
916 if (!buffer_new(map_bh))
922 if (!buffer_mapped(map_bh)) {
923 loff_t i_size_aligned;
935 i_size_aligned =
ALIGN(i_size_read(dio->
inode),
938 i_size_aligned >> blkbits) {
943 zero_user(page, block_in_page << blkbits,
956 dio_zero_block(dio, sdio, 0, map_bh);
964 if (this_chunk_blocks > u)
965 this_chunk_blocks =
u;
967 if (this_chunk_blocks > u)
968 this_chunk_blocks =
u;
969 this_chunk_bytes = this_chunk_blocks << blkbits;
970 BUG_ON(this_chunk_bytes == 0);
972 sdio->
boundary = buffer_boundary(map_bh);
973 ret = submit_page_section(dio, sdio, page,
985 block_in_page += this_chunk_blocks;
1001 static inline int drop_refcount(
struct dio *dio)
1004 unsigned long flags;
1019 spin_unlock_irqrestore(&dio->
bio_lock, flags);
1052 dio_submit_t submit_io,
int flags)
1058 unsigned blkbits = i_blkbits;
1059 unsigned blocksize_mask = (1 << blkbits) - 1;
1064 unsigned long user_addr;
1066 struct buffer_head map_bh = { 0, };
1077 if (offset & blocksize_mask) {
1079 blkbits = blksize_bits(bdev_logical_block_size(bdev));
1080 blocksize_mask = (1 << blkbits) - 1;
1081 if (offset & blocksize_mask)
1086 for (seg = 0; seg < nr_segs; seg++) {
1087 addr = (
unsigned long)iov[seg].iov_base;
1090 if (
unlikely((addr & blocksize_mask) ||
1091 (size & blocksize_mask))) {
1093 blkbits = blksize_bits(
1094 bdev_logical_block_size(bdev));
1095 blocksize_mask = (1 << blkbits) - 1;
1096 if ((addr & blocksize_mask) || (size & blocksize_mask))
1102 if (rw ==
READ && end == offset)
1117 if (dio->
flags & DIO_LOCKING) {
1147 (end > i_size_read(inode)));
1164 dio->
i_size = i_size_read(inode);
1176 for (seg = 0; seg < nr_segs; seg++) {
1177 user_addr = (
unsigned long)iov[seg].iov_base;
1185 for (seg = 0; seg < nr_segs; seg++) {
1186 user_addr = (
unsigned long)iov[seg].iov_base;
1206 retval = do_direct_IO(dio, &sdio, &map_bh);
1213 dio_cleanup(dio, &sdio);
1229 dio_zero_block(dio, &sdio, 1, &map_bh);
1234 ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
1241 dio_bio_submit(dio, &sdio);
1249 dio_cleanup(dio, &sdio);
1256 if (rw ==
READ && (dio->
flags & DIO_LOCKING))
1272 dio_await_completion(dio);
1274 if (drop_refcount(dio) == 0) {
1275 retval = dio_complete(dio, offset, retval,
false);
1288 dio_submit_t submit_io,
int flags)
1302 return do_blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
1303 nr_segs, get_block, end_io,
1309 static __init int dio_init(
void)