Linux Kernel
3.7.1
|
#include <linux/kernel.h>
#include <linux/export.h>
#include <linux/mm.h>
#include <linux/kdev_t.h>
#include <linux/gfp.h>
#include <linux/bio.h>
#include <linux/fs.h>
#include <linux/buffer_head.h>
#include <linux/blkdev.h>
#include <linux/highmem.h>
#include <linux/prefetch.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
#include <linux/cleancache.h>
Go to the source code of this file.
Data Structures | |
struct | mpage_data |
Functions | |
int | mpage_readpages (struct address_space *mapping, struct list_head *pages, unsigned nr_pages, get_block_t get_block) |
EXPORT_SYMBOL (mpage_readpages) | |
int | mpage_readpage (struct page *page, get_block_t get_block) |
EXPORT_SYMBOL (mpage_readpage) | |
int | mpage_writepages (struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block) |
EXPORT_SYMBOL (mpage_writepages) | |
int | mpage_writepage (struct page *page, get_block_t get_block, struct writeback_control *wbc) |
EXPORT_SYMBOL (mpage_writepage) | |
EXPORT_SYMBOL | ( | mpage_readpages | ) |
EXPORT_SYMBOL | ( | mpage_readpage | ) |
EXPORT_SYMBOL | ( | mpage_writepages | ) |
EXPORT_SYMBOL | ( | mpage_writepage | ) |
int mpage_readpage | ( | struct page * | page, |
get_block_t | get_block | ||
) |
int mpage_readpages | ( | struct address_space * | mapping, |
struct list_head * | pages, | ||
unsigned | nr_pages, | ||
get_block_t | get_block | ||
) |
mpage_readpages - populate an address space with some pages & start reads against them : the address_space : The address of a list_head which contains the target pages. These pages have their ->index populated and are otherwise uninitialised. The page at ->prev has the lowest file offset, and reads should be issued in ->prev to ->next order. : The number of pages at * : The filesystem's block mapper function.
This function walks the pages and the blocks within each page, building and emitting large BIOs.
If anything unusual happens, such as:
then this code just gives up and calls the buffer_head-based read function. It does handle a page which has holes at the end - that is a common case: the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
BH_Boundary explanation:
There is a problem. The mpage read code assembles several pages, gets all their disk mappings, and then submits them all. That's fine, but obtaining the disk mappings may require I/O. Reads of indirect blocks, for example.
So an mpage read of the first 16 blocks of an ext2 file will cause I/O to be submitted in the following order: 12 0 1 2 3 4 5 6 7 8 9 10 11 13 14 15 16
because the indirect block has to be read to get the mappings of blocks 13,14,15,16. Obviously, this impacts performance.
So what we do it to allow the filesystem's get_block() function to set BH_Boundary when it maps block 11. BH_Boundary says: mapping of the block after this one will require I/O against a block which is probably close to this one. So you should push what I/O you have currently accumulated.
This all causes the disk requests to be issued in the correct order.
int mpage_writepage | ( | struct page * | page, |
get_block_t | get_block, | ||
struct writeback_control * | wbc | ||
) |
int mpage_writepages | ( | struct address_space * | mapping, |
struct writeback_control * | wbc, | ||
get_block_t | get_block | ||
) |
mpage_writepages - walk the list of dirty pages of the given address space & writepage() all of them : address space structure to write : subtract the number of written pages from *->nr_to_write : the filesystem's block mapper function. If this is NULL then use a_ops->writepage. Otherwise, go direct-to-BIO.
This is a library function, which implements the writepages() address_space_operation.
If a page is already under I/O, generic_writepages() skips it, even if it's dirty. This is desirable behaviour for memory-cleaning writeback, but it is INCORRECT for data-integrity system calls such as fsync(). fsync() and msync() need to guarantee that all the data which was dirty at the time the call was made get new I/O started against them. If wbc->sync_mode is WB_SYNC_ALL then we were called for data integrity and we must wait for existing IO to complete.