diff options
author | Ken Sumrall <ksumrall@android.com> | 2013-03-27 21:15:44 +0000 |
---|---|---|
committer | Android (Google) Code Review <android-gerrit@google.com> | 2013-03-27 21:15:44 +0000 |
commit | e1718470fb4abc74a8c054188834e280ffeb29f3 (patch) | |
tree | f2a2008ff5ec0c986f54c7433f394bfeac7b069c | |
parent | 4733a3a67eab8ac500a5a8f1f508d1c444fb713a (diff) | |
parent | c215b357f793930725e09f14312309c69885109b (diff) | |
download | kernel-headers-e1718470fb4abc74a8c054188834e280ffeb29f3.tar.gz |
Merge "Update kernel fs.h header file" into jb-mr2-dev
-rw-r--r-- | original/linux/blk_types.h | 195 | ||||
-rw-r--r-- | original/linux/fs.h | 1940 |
2 files changed, 1495 insertions, 640 deletions
diff --git a/original/linux/blk_types.h b/original/linux/blk_types.h new file mode 100644 index 0000000..6395692 --- /dev/null +++ b/original/linux/blk_types.h @@ -0,0 +1,195 @@ +/* + * Block data types and constants. Directly include this file only to + * break include dependency loop. + */ +#ifndef __LINUX_BLK_TYPES_H +#define __LINUX_BLK_TYPES_H + +#ifdef CONFIG_BLOCK + +#include <linux/types.h> + +struct bio_set; +struct bio; +struct bio_integrity_payload; +struct page; +struct block_device; +typedef void (bio_end_io_t) (struct bio *, int); +typedef void (bio_destructor_t) (struct bio *); + +/* + * was unsigned short, but we might as well be ready for > 64kB I/O pages + */ +struct bio_vec { + struct page *bv_page; + unsigned int bv_len; + unsigned int bv_offset; +}; + +/* + * main unit of I/O for the block layer and lower layers (ie drivers and + * stacking drivers) + */ +struct bio { + sector_t bi_sector; /* device address in 512 byte + sectors */ + struct bio *bi_next; /* request queue link */ + struct block_device *bi_bdev; + unsigned long bi_flags; /* status, command, etc */ + unsigned long bi_rw; /* bottom bits READ/WRITE, + * top bits priority + */ + + unsigned short bi_vcnt; /* how many bio_vec's */ + unsigned short bi_idx; /* current index into bvl_vec */ + + /* Number of segments in this BIO after + * physical address coalescing is performed. + */ + unsigned int bi_phys_segments; + + unsigned int bi_size; /* residual I/O count */ + + /* + * To keep track of the max segment size, we account for the + * sizes of the first and last mergeable segments in this bio. + */ + unsigned int bi_seg_front_size; + unsigned int bi_seg_back_size; + + unsigned int bi_max_vecs; /* max bvl_vecs we can hold */ + + unsigned int bi_comp_cpu; /* completion CPU */ + + atomic_t bi_cnt; /* pin count */ + + struct bio_vec *bi_io_vec; /* the actual vec list */ + + bio_end_io_t *bi_end_io; + + void *bi_private; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + struct bio_integrity_payload *bi_integrity; /* data integrity */ +#endif + + bio_destructor_t *bi_destructor; /* destructor */ + + /* + * We can inline a number of vecs at the end of the bio, to avoid + * double allocations for a small number of bio_vecs. This member + * MUST obviously be kept at the very end of the bio. + */ + struct bio_vec bi_inline_vecs[0]; +}; + +/* + * bio flags + */ +#define BIO_UPTODATE 0 /* ok after I/O completion */ +#define BIO_RW_BLOCK 1 /* RW_AHEAD set, and read/write would block */ +#define BIO_EOF 2 /* out-out-bounds error */ +#define BIO_SEG_VALID 3 /* bi_phys_segments valid */ +#define BIO_CLONED 4 /* doesn't own data */ +#define BIO_BOUNCED 5 /* bio is a bounce bio */ +#define BIO_USER_MAPPED 6 /* contains user pages */ +#define BIO_EOPNOTSUPP 7 /* not supported */ +#define BIO_CPU_AFFINE 8 /* complete bio on same CPU as submitted */ +#define BIO_NULL_MAPPED 9 /* contains invalid user pages */ +#define BIO_FS_INTEGRITY 10 /* fs owns integrity data, not block layer */ +#define BIO_QUIET 11 /* Make BIO Quiet */ +#define BIO_MAPPED_INTEGRITY 12/* integrity metadata has been remapped */ +#define bio_flagged(bio, flag) ((bio)->bi_flags & (1 << (flag))) + +/* + * top 4 bits of bio flags indicate the pool this bio came from + */ +#define BIO_POOL_BITS (4) +#define BIO_POOL_NONE ((1UL << BIO_POOL_BITS) - 1) +#define BIO_POOL_OFFSET (BITS_PER_LONG - BIO_POOL_BITS) +#define BIO_POOL_MASK (1UL << BIO_POOL_OFFSET) +#define BIO_POOL_IDX(bio) ((bio)->bi_flags >> BIO_POOL_OFFSET) + +#endif /* CONFIG_BLOCK */ + +/* + * Request flags. For use in the cmd_flags field of struct request, and in + * bi_rw of struct bio. Note that some flags are only valid in either one. + */ +enum rq_flag_bits { + /* common flags */ + __REQ_WRITE, /* not set, read. set, write */ + __REQ_FAILFAST_DEV, /* no driver retries of device errors */ + __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ + __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ + + __REQ_SYNC, /* request is sync (sync write or read) */ + __REQ_META, /* metadata io request */ + __REQ_DISCARD, /* request to discard sectors */ + __REQ_NOIDLE, /* don't anticipate more IO after this one */ + + /* bio only flags */ + __REQ_RAHEAD, /* read ahead, can fail anytime */ + __REQ_THROTTLED, /* This bio has already been subjected to + * throttling rules. Don't do it again. */ + + /* request only flags */ + __REQ_SORTED, /* elevator knows about this request */ + __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ + __REQ_FUA, /* forced unit access */ + __REQ_NOMERGE, /* don't touch this for merging */ + __REQ_STARTED, /* drive already may have started this one */ + __REQ_DONTPREP, /* don't call prep for this one */ + __REQ_QUEUED, /* uses queueing */ + __REQ_ELVPRIV, /* elevator private data attached */ + __REQ_FAILED, /* set if the request failed */ + __REQ_QUIET, /* don't worry about errors */ + __REQ_PREEMPT, /* set for "ide_preempt" requests */ + __REQ_ALLOCED, /* request came from our alloc pool */ + __REQ_COPY_USER, /* contains copies of user pages */ + __REQ_FLUSH, /* request for cache flush */ + __REQ_FLUSH_SEQ, /* request for flush sequence */ + __REQ_IO_STAT, /* account I/O stat */ + __REQ_MIXED_MERGE, /* merge of different types, fail separately */ + __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ + __REQ_NR_BITS, /* stops here */ +}; + +#define REQ_WRITE (1 << __REQ_WRITE) +#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) +#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) +#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) +#define REQ_SYNC (1 << __REQ_SYNC) +#define REQ_META (1 << __REQ_META) +#define REQ_DISCARD (1 << __REQ_DISCARD) +#define REQ_NOIDLE (1 << __REQ_NOIDLE) + +#define REQ_FAILFAST_MASK \ + (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) +#define REQ_COMMON_MASK \ + (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_DISCARD | \ + REQ_NOIDLE | REQ_FLUSH | REQ_FUA | REQ_SECURE) +#define REQ_CLONE_MASK REQ_COMMON_MASK + +#define REQ_RAHEAD (1 << __REQ_RAHEAD) +#define REQ_THROTTLED (1 << __REQ_THROTTLED) + +#define REQ_SORTED (1 << __REQ_SORTED) +#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) +#define REQ_FUA (1 << __REQ_FUA) +#define REQ_NOMERGE (1 << __REQ_NOMERGE) +#define REQ_STARTED (1 << __REQ_STARTED) +#define REQ_DONTPREP (1 << __REQ_DONTPREP) +#define REQ_QUEUED (1 << __REQ_QUEUED) +#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) +#define REQ_FAILED (1 << __REQ_FAILED) +#define REQ_QUIET (1 << __REQ_QUIET) +#define REQ_PREEMPT (1 << __REQ_PREEMPT) +#define REQ_ALLOCED (1 << __REQ_ALLOCED) +#define REQ_COPY_USER (1 << __REQ_COPY_USER) +#define REQ_FLUSH (1 << __REQ_FLUSH) +#define REQ_FLUSH_SEQ (1 << __REQ_FLUSH_SEQ) +#define REQ_IO_STAT (1 << __REQ_IO_STAT) +#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) +#define REQ_SECURE (1 << __REQ_SECURE) + +#endif /* __LINUX_BLK_TYPES_H */ diff --git a/original/linux/fs.h b/original/linux/fs.h index 555bc19..d8ecb01 100644 --- a/original/linux/fs.h +++ b/original/linux/fs.h @@ -8,6 +8,8 @@ #include <linux/limits.h> #include <linux/ioctl.h> +#include <linux/blk_types.h> +#include <linux/types.h> /* * It's silly to have NR_OPEN bigger than NR_FILE, but you can change @@ -21,8 +23,8 @@ /* Fixed constants first: */ #undef NR_OPEN -#define NR_OPEN (1024*1024) /* Absolute upper limit on fd num */ -#define INR_OPEN 1024 /* Initial setting for nfile rlimits */ +#define INR_OPEN_CUR 1024 /* Initial setting for nfile rlimits */ +#define INR_OPEN_MAX 4096 /* Hard limit for nfile rlimits */ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1<<BLOCK_SIZE_BITS) @@ -30,28 +32,27 @@ #define SEEK_SET 0 /* seek relative to beginning of file */ #define SEEK_CUR 1 /* seek relative to current file position */ #define SEEK_END 2 /* seek relative to end of file */ +#define SEEK_MAX SEEK_END + +struct fstrim_range { + __u64 start; + __u64 len; + __u64 minlen; +}; /* And dynamically-tunable limits and defaults: */ struct files_stat_struct { - int nr_files; /* read only */ - int nr_free_files; /* read only */ - int max_files; /* tunable */ + unsigned long nr_files; /* read only */ + unsigned long nr_free_files; /* read only */ + unsigned long max_files; /* tunable */ }; -extern struct files_stat_struct files_stat; -extern int get_max_files(void); struct inodes_stat_t { int nr_inodes; int nr_unused; - int dummy[5]; + int dummy[5]; /* padding for sysctl ABI compatibility */ }; -extern struct inodes_stat_t inodes_stat; -extern int leases_enable, lease_break_time; - -#ifdef CONFIG_DNOTIFY -extern int dir_notify_enable; -#endif #define NR_FILE 8192 /* this can well be larger on a larger system */ @@ -59,30 +60,112 @@ extern int dir_notify_enable; #define MAY_WRITE 2 #define MAY_READ 4 #define MAY_APPEND 8 +#define MAY_ACCESS 16 +#define MAY_OPEN 32 +#define MAY_CHDIR 64 + +/* + * flags in file.f_mode. Note that FMODE_READ and FMODE_WRITE must correspond + * to O_WRONLY and O_RDWR via the strange trick in __dentry_open() + */ + +/* file is open for reading */ +#define FMODE_READ ((__force fmode_t)0x1) +/* file is open for writing */ +#define FMODE_WRITE ((__force fmode_t)0x2) +/* file is seekable */ +#define FMODE_LSEEK ((__force fmode_t)0x4) +/* file can be accessed using pread */ +#define FMODE_PREAD ((__force fmode_t)0x8) +/* file can be accessed using pwrite */ +#define FMODE_PWRITE ((__force fmode_t)0x10) +/* File is opened for execution with sys_execve / sys_uselib */ +#define FMODE_EXEC ((__force fmode_t)0x20) +/* File is opened with O_NDELAY (only set for block devices) */ +#define FMODE_NDELAY ((__force fmode_t)0x40) +/* File is opened with O_EXCL (only set for block devices) */ +#define FMODE_EXCL ((__force fmode_t)0x80) +/* File is opened using open(.., 3, ..) and is writeable only for ioctls + (specialy hack for floppy.c) */ +#define FMODE_WRITE_IOCTL ((__force fmode_t)0x100) + +/* + * Don't update ctime and mtime. + * + * Currently a special hack for the XFS open_by_handle ioctl, but we'll + * hopefully graduate it to a proper O_CMTIME flag supported by open(2) soon. + */ +#define FMODE_NOCMTIME ((__force fmode_t)0x800) + +/* Expect random access pattern */ +#define FMODE_RANDOM ((__force fmode_t)0x1000) + +/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */ +#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000) -#define FMODE_READ 1 -#define FMODE_WRITE 2 - -/* Internal kernel extensions */ -#define FMODE_LSEEK 4 -#define FMODE_PREAD 8 -#define FMODE_PWRITE FMODE_PREAD /* These go hand in hand */ - -/* File is being opened for execution. Primary users of this flag are - distributed filesystems that can use it to achieve correct ETXTBUSY - behavior for cross-node execution/opening_for_writing of files */ -#define FMODE_EXEC 16 - -#define RW_MASK 1 -#define RWA_MASK 2 -#define READ 0 -#define WRITE 1 -#define READA 2 /* read-ahead - don't block if no resources */ -#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */ -#define SPECIAL 4 /* For non-blockdevice requests in request queue */ -#define READ_SYNC (READ | (1 << BIO_RW_SYNC)) -#define WRITE_SYNC (WRITE | (1 << BIO_RW_SYNC)) -#define WRITE_BARRIER ((1 << BIO_RW) | (1 << BIO_RW_BARRIER)) +/* File is opened with O_PATH; almost nothing can be done with it */ +#define FMODE_PATH ((__force fmode_t)0x4000) + +/* File was opened by fanotify and shouldn't generate fanotify events */ +#define FMODE_NONOTIFY ((__force fmode_t)0x1000000) + +/* + * The below are the various read and write types that we support. Some of + * them include behavioral modifiers that send information down to the + * block layer and IO scheduler. Terminology: + * + * The block layer uses device plugging to defer IO a little bit, in + * the hope that we will see more IO very shortly. This increases + * coalescing of adjacent IO and thus reduces the number of IOs we + * have to send to the device. It also allows for better queuing, + * if the IO isn't mergeable. If the caller is going to be waiting + * for the IO, then he must ensure that the device is unplugged so + * that the IO is dispatched to the driver. + * + * All IO is handled async in Linux. This is fine for background + * writes, but for reads or writes that someone waits for completion + * on, we want to notify the block layer and IO scheduler so that they + * know about it. That allows them to make better scheduling + * decisions. So when the below references 'sync' and 'async', it + * is referencing this priority hint. + * + * With that in mind, the available types are: + * + * READ A normal read operation. Device will be plugged. + * READ_SYNC A synchronous read. Device is not plugged, caller can + * immediately wait on this read without caring about + * unplugging. + * READA Used for read-ahead operations. Lower priority, and the + * block layer could (in theory) choose to ignore this + * request if it runs into resource problems. + * WRITE A normal async write. Device will be plugged. + * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down + * the hint that someone will be waiting on this IO + * shortly. The write equivalent of READ_SYNC. + * WRITE_ODIRECT Special case write for O_DIRECT only. + * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. + * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on + * non-volatile media on completion. + * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded + * by a cache flush and data is guaranteed to be on + * non-volatile media on completion. + * + */ +#define RW_MASK REQ_WRITE +#define RWA_MASK REQ_RAHEAD + +#define READ 0 +#define WRITE RW_MASK +#define READA RWA_MASK + +#define READ_SYNC (READ | REQ_SYNC) +#define READ_META (READ | REQ_META) +#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) +#define WRITE_ODIRECT (WRITE | REQ_SYNC) +#define WRITE_META (WRITE | REQ_META) +#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) +#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) +#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) #define SEL_IN 1 #define SEL_OUT 2 @@ -91,10 +174,12 @@ extern int dir_notify_enable; /* public flags for file_system_type */ #define FS_REQUIRES_DEV 1 #define FS_BINARY_MOUNTDATA 2 +#define FS_HAS_SUBTYPE 4 #define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */ -#define FS_ODD_RENAME 32768 /* Temporary stuff; will go away as soon - * as nfs_rename() will be cleaned up - */ +#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move() + * during rename() internally. + */ + /* * These are the fs-independent mount-flags: up to 32 flags are supported */ @@ -119,13 +204,19 @@ extern int dir_notify_enable; #define MS_PRIVATE (1<<18) /* change to private */ #define MS_SLAVE (1<<19) /* change to slave */ #define MS_SHARED (1<<20) /* change to shared */ +#define MS_RELATIME (1<<21) /* Update atime relative to mtime/ctime. */ +#define MS_KERNMOUNT (1<<22) /* this is a kern_mount call */ +#define MS_I_VERSION (1<<23) /* Update inode I_version field */ +#define MS_STRICTATIME (1<<24) /* Always perform atime updates */ +#define MS_NOSEC (1<<28) +#define MS_BORN (1<<29) #define MS_ACTIVE (1<<30) #define MS_NOUSER (1<<31) /* * Superblock flags that can be altered by MS_REMOUNT */ -#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK) +#define MS_RMT_MASK (MS_RDONLY|MS_SYNCHRONOUS|MS_MANDLOCK|MS_I_VERSION) /* * Old magic mount flag and mask @@ -145,6 +236,9 @@ extern int dir_notify_enable; #define S_NOCMTIME 128 /* Do not update file c/mtime */ #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */ #define S_PRIVATE 512 /* Inode is fs-internal */ +#define S_IMA 1024 /* Inode has an associated IMA struct */ +#define S_AUTOMOUNT 2048 /* Automount/referral quasi-directory */ +#define S_NOSEC 4096 /* no suid or xattr security attributes */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -167,6 +261,8 @@ extern int dir_notify_enable; #define IS_DIRSYNC(inode) (__IS_FLG(inode, MS_SYNCHRONOUS|MS_DIRSYNC) || \ ((inode)->i_flags & (S_SYNC|S_DIRSYNC))) #define IS_MANDLOCK(inode) __IS_FLG(inode, MS_MANDLOCK) +#define IS_NOATIME(inode) __IS_FLG(inode, MS_RDONLY|MS_NOATIME) +#define IS_I_VERSION(inode) __IS_FLG(inode, MS_I_VERSION) #define IS_NOQUOTA(inode) ((inode)->i_flags & S_NOQUOTA) #define IS_APPEND(inode) ((inode)->i_flags & S_APPEND) @@ -177,6 +273,9 @@ extern int dir_notify_enable; #define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME) #define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE) #define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE) +#define IS_IMA(inode) ((inode)->i_flags & S_IMA) +#define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) +#define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) /* the read-only stuff doesn't really belong here, but any other place is probably as bad and I don't want to create yet another include file. */ @@ -211,10 +310,63 @@ extern int dir_notify_enable; #define BLKTRACESTART _IO(0x12,116) #define BLKTRACESTOP _IO(0x12,117) #define BLKTRACETEARDOWN _IO(0x12,118) +#define BLKDISCARD _IO(0x12,119) +#define BLKIOMIN _IO(0x12,120) +#define BLKIOOPT _IO(0x12,121) +#define BLKALIGNOFF _IO(0x12,122) +#define BLKPBSZGET _IO(0x12,123) +#define BLKDISCARDZEROES _IO(0x12,124) +#define BLKSECDISCARD _IO(0x12,125) #define BMAP_IOCTL 1 /* obsolete - kept for compatibility */ #define FIBMAP _IO(0x00,1) /* bmap access */ #define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */ +#define FIFREEZE _IOWR('X', 119, int) /* Freeze */ +#define FITHAW _IOWR('X', 120, int) /* Thaw */ +#define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */ + +#define FS_IOC_GETFLAGS _IOR('f', 1, long) +#define FS_IOC_SETFLAGS _IOW('f', 2, long) +#define FS_IOC_GETVERSION _IOR('v', 1, long) +#define FS_IOC_SETVERSION _IOW('v', 2, long) +#define FS_IOC_FIEMAP _IOWR('f', 11, struct fiemap) +#define FS_IOC32_GETFLAGS _IOR('f', 1, int) +#define FS_IOC32_SETFLAGS _IOW('f', 2, int) +#define FS_IOC32_GETVERSION _IOR('v', 1, int) +#define FS_IOC32_SETVERSION _IOW('v', 2, int) + +/* + * Inode flags (FS_IOC_GETFLAGS / FS_IOC_SETFLAGS) + */ +#define FS_SECRM_FL 0x00000001 /* Secure deletion */ +#define FS_UNRM_FL 0x00000002 /* Undelete */ +#define FS_COMPR_FL 0x00000004 /* Compress file */ +#define FS_SYNC_FL 0x00000008 /* Synchronous updates */ +#define FS_IMMUTABLE_FL 0x00000010 /* Immutable file */ +#define FS_APPEND_FL 0x00000020 /* writes to file may only append */ +#define FS_NODUMP_FL 0x00000040 /* do not dump file */ +#define FS_NOATIME_FL 0x00000080 /* do not update atime */ +/* Reserved for compression usage... */ +#define FS_DIRTY_FL 0x00000100 +#define FS_COMPRBLK_FL 0x00000200 /* One or more compressed clusters */ +#define FS_NOCOMP_FL 0x00000400 /* Don't compress */ +#define FS_ECOMPR_FL 0x00000800 /* Compression error */ +/* End compression flags --- maybe not all used */ +#define FS_BTREE_FL 0x00001000 /* btree format dir */ +#define FS_INDEX_FL 0x00001000 /* hash-indexed directory */ +#define FS_IMAGIC_FL 0x00002000 /* AFS directory */ +#define FS_JOURNAL_DATA_FL 0x00004000 /* Reserved for ext3 */ +#define FS_NOTAIL_FL 0x00008000 /* file tail should not be merged */ +#define FS_DIRSYNC_FL 0x00010000 /* dirsync behaviour (directories only) */ +#define FS_TOPDIR_FL 0x00020000 /* Top of directory hierarchies*/ +#define FS_EXTENT_FL 0x00080000 /* Extents */ +#define FS_DIRECTIO_FL 0x00100000 /* Use direct i/o */ +#define FS_NOCOW_FL 0x00800000 /* Do not cow file */ +#define FS_RESERVED_FL 0x80000000 /* reserved for ext2 lib */ + +#define FS_FL_USER_VISIBLE 0x0003DFFF /* User visible flags */ +#define FS_FL_USER_MODIFIABLE 0x000380FF /* User modifiable flags */ + #define SYNC_FILE_RANGE_WAIT_BEFORE 1 #define SYNC_FILE_RANGE_WRITE 2 @@ -227,59 +379,74 @@ extern int dir_notify_enable; #include <linux/types.h> #include <linux/kdev_t.h> #include <linux/dcache.h> +#include <linux/path.h> #include <linux/stat.h> #include <linux/cache.h> -#include <linux/kobject.h> #include <linux/list.h> #include <linux/radix-tree.h> #include <linux/prio_tree.h> #include <linux/init.h> -#include <linux/sched.h> +#include <linux/pid.h> #include <linux/mutex.h> +#include <linux/capability.h> +#include <linux/semaphore.h> +#include <linux/fiemap.h> +#include <linux/rculist_bl.h> #include <asm/atomic.h> -#include <asm/semaphore.h> #include <asm/byteorder.h> +struct export_operations; struct hd_geometry; struct iovec; struct nameidata; struct kiocb; +struct kobject; struct pipe_inode_info; struct poll_table_struct; struct kstatfs; struct vm_area_struct; struct vfsmount; +struct cred; -extern void __init inode_init(unsigned long); +extern void __init inode_init(void); extern void __init inode_init_early(void); -extern void __init mnt_init(unsigned long); extern void __init files_init(unsigned long); +extern struct files_stat_struct files_stat; +extern unsigned long get_max_files(void); +extern int sysctl_nr_open; +extern struct inodes_stat_t inodes_stat; +extern int leases_enable, lease_break_time; + struct buffer_head; typedef int (get_block_t)(struct inode *inode, sector_t iblock, struct buffer_head *bh_result, int create); typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, - ssize_t bytes, void *private); + ssize_t bytes, void *private, int ret, + bool is_async); /* * Attribute flags. These should be or-ed together to figure out what * has been changed! */ -#define ATTR_MODE 1 -#define ATTR_UID 2 -#define ATTR_GID 4 -#define ATTR_SIZE 8 -#define ATTR_ATIME 16 -#define ATTR_MTIME 32 -#define ATTR_CTIME 64 -#define ATTR_ATIME_SET 128 -#define ATTR_MTIME_SET 256 -#define ATTR_FORCE 512 /* Not a change, but a change it */ -#define ATTR_ATTR_FLAG 1024 -#define ATTR_KILL_SUID 2048 -#define ATTR_KILL_SGID 4096 -#define ATTR_FILE 8192 +#define ATTR_MODE (1 << 0) +#define ATTR_UID (1 << 1) +#define ATTR_GID (1 << 2) +#define ATTR_SIZE (1 << 3) +#define ATTR_ATIME (1 << 4) +#define ATTR_MTIME (1 << 5) +#define ATTR_CTIME (1 << 6) +#define ATTR_ATIME_SET (1 << 7) +#define ATTR_MTIME_SET (1 << 8) +#define ATTR_FORCE (1 << 9) /* Not a change, but a change it */ +#define ATTR_ATTR_FLAG (1 << 10) +#define ATTR_KILL_SUID (1 << 11) +#define ATTR_KILL_SGID (1 << 12) +#define ATTR_FILE (1 << 13) +#define ATTR_KILL_PRIV (1 << 14) +#define ATTR_OPEN (1 << 15) /* Truncating from open(O_TRUNC) */ +#define ATTR_TIMES_SET (1 << 16) /* * This is the Inode Attributes structure, used for notify_change(). It @@ -301,7 +468,7 @@ struct iattr { struct timespec ia_ctime; /* - * Not an attribute, but an auxilary info for filesystems wanting to + * Not an attribute, but an auxiliary info for filesystems wanting to * implement an ftruncate() like method. NOTE: filesystem should * check for (ia_valid & ATTR_FILE), and not for (ia_file != NULL). */ @@ -331,7 +498,7 @@ struct iattr { * trying again. The aop will be taking reasonable * precautions not to livelock. If the caller held a page * reference, it should drop it before retrying. Returned - * by readpage(), prepare_write(), and commit_write(). + * by readpage(). * * address_space_operation functions return these large constants to indicate * special semantics to the caller. These are much larger than the bytes in a @@ -344,6 +511,12 @@ enum positive_aop_returns { AOP_TRUNCATED_PAGE = 0x80001, }; +#define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ +#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ +#define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct + * helper code (eg buffer layer) + * to clear GFP_FS from alloc */ + /* * oh the beauties of C type declarations. */ @@ -351,10 +524,63 @@ struct page; struct address_space; struct writeback_control; +struct iov_iter { + const struct iovec *iov; + unsigned long nr_segs; + size_t iov_offset; + size_t count; +}; + +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes); +size_t iov_iter_copy_from_user(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes); +void iov_iter_advance(struct iov_iter *i, size_t bytes); +int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); +size_t iov_iter_single_seg_count(struct iov_iter *i); + +static inline void iov_iter_init(struct iov_iter *i, + const struct iovec *iov, unsigned long nr_segs, + size_t count, size_t written) +{ + i->iov = iov; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count + written; + + iov_iter_advance(i, written); +} + +static inline size_t iov_iter_count(struct iov_iter *i) +{ + return i->count; +} + +/* + * "descriptor" for what we're up to with a read. + * This allows us to use the same read code yet + * have multiple different users of the data that + * we read from a file. + * + * The simplest case just copies the data to user + * mode. + */ +typedef struct { + size_t written; + size_t count; + union { + char __user *buf; + void *data; + } arg; + int error; +} read_descriptor_t; + +typedef int (*read_actor_t)(read_descriptor_t *, struct page *, + unsigned long, unsigned long); + struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); - void (*sync_page)(struct page *); /* Write back some dirty pages from this mapping. */ int (*writepages)(struct address_space *, struct writeback_control *); @@ -365,35 +591,55 @@ struct address_space_operations { int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); - /* - * ext3 requires that a successful prepare_write() call be followed - * by a commit_write() call - they must be balanced - */ - int (*prepare_write)(struct file *, struct page *, unsigned, unsigned); - int (*commit_write)(struct file *, struct page *, unsigned, unsigned); + int (*write_begin)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); + int (*write_end)(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); + /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); void (*invalidatepage) (struct page *, unsigned long); int (*releasepage) (struct page *, gfp_t); + void (*freepage)(struct page *); ssize_t (*direct_IO)(int, struct kiocb *, const struct iovec *iov, loff_t offset, unsigned long nr_segs); - struct page* (*get_xip_page)(struct address_space *, sector_t, - int); + int (*get_xip_mem)(struct address_space *, pgoff_t, int, + void **, unsigned long *); /* migrate the contents of a page to the specified target */ int (*migratepage) (struct address_space *, struct page *, struct page *); + int (*launder_page) (struct page *); + int (*is_partially_uptodate) (struct page *, read_descriptor_t *, + unsigned long); + int (*error_remove_page)(struct address_space *, struct page *); }; +extern const struct address_space_operations empty_aops; + +/* + * pagecache_write_begin/pagecache_write_end must be used by general code + * to write into the pagecache. + */ +int pagecache_write_begin(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); + +int pagecache_write_end(struct file *, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); + struct backing_dev_info; struct address_space { struct inode *host; /* owner: inode, block_device */ struct radix_tree_root page_tree; /* radix tree of all pages */ - rwlock_t tree_lock; /* and rwlock protecting it */ + spinlock_t tree_lock; /* and lock protecting it */ unsigned int i_mmap_writable;/* count VM_SHARED mappings */ struct prio_tree_root i_mmap; /* tree of private and shared mappings */ struct list_head i_mmap_nonlinear;/*list VM_NONLINEAR mappings */ - spinlock_t i_mmap_lock; /* protect tree, count, list */ - unsigned int truncate_count; /* Cover race condition with truncate */ + struct mutex i_mmap_mutex; /* protect tree, count, list */ + /* Protected by tree_lock together with the radix tree */ unsigned long nrpages; /* number of total pages */ pgoff_t writeback_index;/* writeback starts here */ const struct address_space_operations *a_ops; /* methods */ @@ -405,21 +651,23 @@ struct address_space { } __attribute__((aligned(sizeof(long)))); /* * On most architectures that alignment is already the case; but - * must be enforced here for CRIS, to let the least signficant bit + * must be enforced here for CRIS, to let the least significant bit * of struct page's "mapping" pointer be used for PAGE_MAPPING_ANON. */ struct block_device { dev_t bd_dev; /* not a kdev_t - it's a search key */ - struct inode * bd_inode; /* will die */ int bd_openers; + struct inode * bd_inode; /* will die */ + struct super_block * bd_super; struct mutex bd_mutex; /* open/close mutex */ - struct mutex bd_mount_mutex; /* mount mutex */ struct list_head bd_inodes; + void * bd_claiming; void * bd_holder; int bd_holders; + bool bd_write_holder; #ifdef CONFIG_SYSFS - struct list_head bd_holder_list; + struct list_head bd_holder_disks; #endif struct block_device * bd_contains; unsigned bd_block_size; @@ -429,7 +677,6 @@ struct block_device { int bd_invalidated; struct gendisk * bd_disk; struct list_head bd_list; - struct backing_dev_info *bd_inode_backing_dev_info; /* * Private data. You must have bd_claim'ed the block_device * to use this. NOTE: bd_claim allows an owner to claim @@ -437,29 +684,20 @@ struct block_device { * care to not mess up bd_private for that case. */ unsigned long bd_private; -}; -/* - * bdev->bd_mutex nesting subclasses for the lock validator: - * - * 0: normal - * 1: 'whole' - * 2: 'partition' - */ -enum bdev_bd_mutex_lock_class -{ - BD_MUTEX_NORMAL, - BD_MUTEX_WHOLE, - BD_MUTEX_PARTITION + /* The counter of freeze processes */ + int bd_fsfreeze_count; + /* Mutex for freeze */ + struct mutex bd_fsfreeze_mutex; }; - /* * Radix-tree tags, for tagging dirty and writeback pages within the pagecache * radix trees */ #define PAGECACHE_TAG_DIRTY 0 #define PAGECACHE_TAG_WRITEBACK 1 +#define PAGECACHE_TAG_TOWRITE 2 int mapping_tagged(struct address_space *mapping, int tag); @@ -494,73 +732,89 @@ static inline int mapping_writably_mapped(struct address_space *mapping) #define i_size_ordered_init(inode) do { } while (0) #endif +struct posix_acl; +#define ACL_NOT_CACHED ((void *)(-1)) + struct inode { + /* RCU path lookup touches following: */ + umode_t i_mode; + uid_t i_uid; + gid_t i_gid; + const struct inode_operations *i_op; + struct super_block *i_sb; + + spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ + unsigned int i_flags; + unsigned long i_state; +#ifdef CONFIG_SECURITY + void *i_security; +#endif + struct mutex i_mutex; + + + unsigned long dirtied_when; /* jiffies of first dirtying */ + struct hlist_node i_hash; - struct list_head i_list; + struct list_head i_wb_list; /* backing dev IO list */ + struct list_head i_lru; /* inode LRU list */ struct list_head i_sb_list; - struct list_head i_dentry; + union { + struct list_head i_dentry; + struct rcu_head i_rcu; + }; unsigned long i_ino; atomic_t i_count; - umode_t i_mode; unsigned int i_nlink; - uid_t i_uid; - gid_t i_gid; dev_t i_rdev; + unsigned int i_blkbits; + u64 i_version; loff_t i_size; +#ifdef __NEED_I_SIZE_ORDERED + seqcount_t i_size_seqcount; +#endif struct timespec i_atime; struct timespec i_mtime; struct timespec i_ctime; - unsigned int i_blkbits; - unsigned long i_blksize; - unsigned long i_version; blkcnt_t i_blocks; unsigned short i_bytes; - spinlock_t i_lock; /* i_blocks, i_bytes, maybe i_size */ - struct mutex i_mutex; struct rw_semaphore i_alloc_sem; - struct inode_operations *i_op; const struct file_operations *i_fop; /* former ->i_op->default_file_ops */ - struct super_block *i_sb; struct file_lock *i_flock; struct address_space *i_mapping; struct address_space i_data; #ifdef CONFIG_QUOTA struct dquot *i_dquot[MAXQUOTAS]; #endif - /* These three should probably be a union */ struct list_head i_devices; - struct pipe_inode_info *i_pipe; - struct block_device *i_bdev; - struct cdev *i_cdev; - int i_cindex; + union { + struct pipe_inode_info *i_pipe; + struct block_device *i_bdev; + struct cdev *i_cdev; + }; __u32 i_generation; -#ifdef CONFIG_DNOTIFY - unsigned long i_dnotify_mask; /* Directory notify events */ - struct dnotify_struct *i_dnotify; /* for directory notifications */ +#ifdef CONFIG_FSNOTIFY + __u32 i_fsnotify_mask; /* all events this inode cares about */ + struct hlist_head i_fsnotify_marks; #endif -#ifdef CONFIG_INOTIFY - struct list_head inotify_watches; /* watches on this inode */ - struct mutex inotify_mutex; /* protects the watches list */ +#ifdef CONFIG_IMA + atomic_t i_readcount; /* struct files open RO */ #endif - - unsigned long i_state; - unsigned long dirtied_when; /* jiffies of first dirtying */ - - unsigned int i_flags; - atomic_t i_writecount; - void *i_security; - union { - void *generic_ip; - } u; -#ifdef __NEED_I_SIZE_ORDERED - seqcount_t i_size_seqcount; +#ifdef CONFIG_FS_POSIX_ACL + struct posix_acl *i_acl; + struct posix_acl *i_default_acl; #endif + void *i_private; /* fs or device private pointer */ }; +static inline int inode_unhashed(struct inode *inode) +{ + return hlist_unhashed(&inode->i_hash); +} + /* * inode->i_mutex nesting subclasses for the lock validator: * @@ -591,7 +845,7 @@ enum inode_i_mutex_lock_class * cmpxchg8b without the need of the lock prefix). For SMP compiles * and 64bit archs it makes no difference if preempt is enabled or not. */ -static inline loff_t i_size_read(struct inode *inode) +static inline loff_t i_size_read(const struct inode *inode) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) loff_t i_size; @@ -614,7 +868,11 @@ static inline loff_t i_size_read(struct inode *inode) #endif } - +/* + * NOTE: unlike i_size_read(), i_size_write() does need locking around it + * (normally i_mutex), otherwise on 32bit/SMP an update of i_size_seqcount + * can be lost, resulting in subsequent i_size_read() calls spinning forever. + */ static inline void i_size_write(struct inode *inode, loff_t i_size) { #if BITS_PER_LONG==32 && defined(CONFIG_SMP) @@ -630,12 +888,12 @@ static inline void i_size_write(struct inode *inode, loff_t i_size) #endif } -static inline unsigned iminor(struct inode *inode) +static inline unsigned iminor(const struct inode *inode) { return MINOR(inode->i_rdev); } -static inline unsigned imajor(struct inode *inode) +static inline unsigned imajor(const struct inode *inode) { return MAJOR(inode->i_rdev); } @@ -644,9 +902,9 @@ extern struct block_device *I_BDEV(struct inode *inode); struct fown_struct { rwlock_t lock; /* protects pid, uid, euid fields */ - int pid; /* pid or -pgrp where SIGIO should be sent */ + struct pid *pid; /* pid or -pgrp where SIGIO should be sent */ + enum pid_type pid_type; /* Kind of process group SIGIO should be sent to */ uid_t uid, euid; /* uid/euid of process setting the owner */ - void *security; int signum; /* posix.1b rt signal to be delivered on IO */ }; @@ -654,19 +912,27 @@ struct fown_struct { * Track a single file's readahead state */ struct file_ra_state { - unsigned long start; /* Current window */ - unsigned long size; - unsigned long flags; /* ra flags RA_FLAG_xxx*/ - unsigned long cache_hit; /* cache hit count*/ - unsigned long prev_page; /* Cache last read() position */ - unsigned long ahead_start; /* Ahead window */ - unsigned long ahead_size; - unsigned long ra_pages; /* Maximum readahead window */ - unsigned long mmap_hit; /* Cache hit stat for mmap accesses */ - unsigned long mmap_miss; /* Cache miss stat for mmap accesses */ + pgoff_t start; /* where readahead started */ + unsigned int size; /* # of readahead pages */ + unsigned int async_size; /* do asynchronous readahead when + there are only # of pages ahead */ + + unsigned int ra_pages; /* Maximum readahead window */ + unsigned int mmap_miss; /* Cache miss stat for mmap accesses */ + loff_t prev_pos; /* Cache last read() position */ }; -#define RA_FLAG_MISS 0x01 /* a cache miss occured against this file */ -#define RA_FLAG_INCACHE 0x02 /* file is already in cache */ + +/* + * Check if @index falls in the readahead windows. + */ +static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) +{ + return (index >= ra->start && + index < ra->start + ra->size); +} + +#define FILE_MNT_WRITE_TAKEN 1 +#define FILE_MNT_WRITE_RELEASED 2 struct file { /* @@ -677,36 +943,93 @@ struct file { struct list_head fu_list; struct rcu_head fu_rcuhead; } f_u; - struct dentry *f_dentry; - struct vfsmount *f_vfsmnt; + struct path f_path; +#define f_dentry f_path.dentry +#define f_vfsmnt f_path.mnt const struct file_operations *f_op; - atomic_t f_count; + spinlock_t f_lock; /* f_ep_links, f_flags, no IRQ */ +#ifdef CONFIG_SMP + int f_sb_list_cpu; +#endif + atomic_long_t f_count; unsigned int f_flags; - mode_t f_mode; + fmode_t f_mode; loff_t f_pos; struct fown_struct f_owner; - unsigned int f_uid, f_gid; + const struct cred *f_cred; struct file_ra_state f_ra; - unsigned long f_version; + u64 f_version; +#ifdef CONFIG_SECURITY void *f_security; - +#endif /* needed for tty driver, and maybe others */ void *private_data; #ifdef CONFIG_EPOLL /* Used by fs/eventpoll.c to link all the hooks to this file */ struct list_head f_ep_links; - spinlock_t f_ep_lock; + struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; +#ifdef CONFIG_DEBUG_WRITECOUNT + unsigned long f_mnt_write_state; +#endif +}; + +struct file_handle { + __u32 handle_bytes; + int handle_type; + /* file identifier */ + unsigned char f_handle[0]; }; -extern spinlock_t files_lock; -#define file_list_lock() spin_lock(&files_lock); -#define file_list_unlock() spin_unlock(&files_lock); -#define get_file(x) atomic_inc(&(x)->f_count) -#define file_count(x) atomic_read(&(x)->f_count) +#define get_file(x) atomic_long_inc(&(x)->f_count) +#define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) +#define file_count(x) atomic_long_read(&(x)->f_count) + +#ifdef CONFIG_DEBUG_WRITECOUNT +static inline void file_take_write(struct file *f) +{ + WARN_ON(f->f_mnt_write_state != 0); + f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; +} +static inline void file_release_write(struct file *f) +{ + f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; +} +static inline void file_reset_write(struct file *f) +{ + f->f_mnt_write_state = 0; +} +static inline void file_check_state(struct file *f) +{ + /* + * At this point, either both or neither of these bits + * should be set. + */ + WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); + WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); +} +static inline int file_check_writeable(struct file *f) +{ + if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) + return 0; + printk(KERN_WARNING "writeable file with no " + "mnt_want_write()\n"); + WARN_ON(1); + return -EINVAL; +} +#else /* !CONFIG_DEBUG_WRITECOUNT */ +static inline void file_take_write(struct file *filp) {} +static inline void file_release_write(struct file *filp) {} +static inline void file_reset_write(struct file *filp) {} +static inline void file_check_state(struct file *filp) {} +static inline int file_check_writeable(struct file *filp) +{ + return 0; +} +#endif /* CONFIG_DEBUG_WRITECOUNT */ #define MAX_NON_LFS ((1UL<<31) - 1) @@ -727,6 +1050,12 @@ extern spinlock_t files_lock; #define FL_SLEEP 128 /* A blocking lock */ /* + * Special return value from posix_lock_file() and vfs_lock_file() for + * asynchronous locking. + */ +#define FILE_LOCK_DEFERRED 1 + +/* * The POSIX file lock owner is determined by * the "struct files_struct" in the thread group * (or NULL for no owner - BSD locks). @@ -736,8 +1065,6 @@ extern spinlock_t files_lock; typedef struct files_struct *fl_owner_t; struct file_lock_operations { - void (*fl_insert)(struct file_lock *); /* lock insertion callback */ - void (*fl_remove)(struct file_lock *); /* lock removal callback */ void (*fl_copy_lock)(struct file_lock *, struct file_lock *); void (*fl_release_private)(struct file_lock *); }; @@ -745,13 +1072,20 @@ struct file_lock_operations { struct lock_manager_operations { int (*fl_compare_owner)(struct file_lock *, struct file_lock *); void (*fl_notify)(struct file_lock *); /* unblock callback */ - void (*fl_copy_lock)(struct file_lock *, struct file_lock *); + int (*fl_grant)(struct file_lock *, struct file_lock *, int); void (*fl_release_private)(struct file_lock *); void (*fl_break)(struct file_lock *); - int (*fl_mylease)(struct file_lock *, struct file_lock *); int (*fl_change)(struct file_lock **, int); }; +struct lock_manager { + struct list_head list; +}; + +void locks_start_grace(struct lock_manager *); +void locks_end_grace(struct lock_manager *); +int locks_in_grace(void); + /* that will die - we need it for nfs_lock_info */ #include <linux/nfs_fs_i.h> @@ -760,22 +1094,27 @@ struct file_lock { struct list_head fl_link; /* doubly linked list of all locks */ struct list_head fl_block; /* circular list of blocked processes */ fl_owner_t fl_owner; + unsigned char fl_flags; + unsigned char fl_type; unsigned int fl_pid; + struct pid *fl_nspid; wait_queue_head_t fl_wait; struct file *fl_file; - unsigned char fl_flags; - unsigned char fl_type; loff_t fl_start; loff_t fl_end; struct fasync_struct * fl_fasync; /* for lease break notifications */ unsigned long fl_break_time; /* for nonblocking lease breaks */ - struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ - struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ + const struct file_lock_operations *fl_ops; /* Callbacks for filesystems */ + const struct lock_manager_operations *fl_lmops; /* Callbacks for lockmanagers */ union { struct nfs_lock_info nfs_fl; struct nfs4_lock_info nfs4_fl; + struct { + struct list_head link; /* link in AFS vnode's pending_locks list */ + int state; /* state of grant or error if -ve */ + } afs; } fl_u; }; @@ -788,6 +1127,9 @@ struct file_lock { #include <linux/fcntl.h> +extern void send_sigio(struct fown_struct *fown, int fd, int band); + +#ifdef CONFIG_FILE_LOCKING extern int fcntl_getlk(struct file *, struct flock __user *); extern int fcntl_setlk(unsigned int, struct file *, unsigned int, struct flock __user *); @@ -798,50 +1140,213 @@ extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, struct flock64 __user *); #endif -extern void send_sigio(struct fown_struct *fown, int fd, int band); extern int fcntl_setlease(unsigned int fd, struct file *filp, long arg); extern int fcntl_getlease(struct file *filp); -/* fs/sync.c */ -extern int do_sync_file_range(struct file *file, loff_t offset, loff_t endbyte, - unsigned int flags); - /* fs/locks.c */ +void locks_free_lock(struct file_lock *fl); extern void locks_init_lock(struct file_lock *); +extern struct file_lock * locks_alloc_lock(void); extern void locks_copy_lock(struct file_lock *, struct file_lock *); +extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); extern void locks_remove_flock(struct file *); -extern int posix_test_lock(struct file *, struct file_lock *, struct file_lock *); -extern int posix_lock_file_conf(struct file *, struct file_lock *, struct file_lock *); -extern int posix_lock_file(struct file *, struct file_lock *); +extern void locks_release_private(struct file_lock *); +extern void posix_test_lock(struct file *, struct file_lock *); +extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); extern int posix_lock_file_wait(struct file *, struct file_lock *); extern int posix_unblock_lock(struct file *, struct file_lock *); +extern int vfs_test_lock(struct file *, struct file_lock *); +extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); +extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags); extern void lease_get_mtime(struct inode *, struct timespec *time); -extern int setlease(struct file *, long, struct file_lock **); +extern int generic_setlease(struct file *, long, struct file_lock **); +extern int vfs_setlease(struct file *, long, struct file_lock **); extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); +extern void lock_flocks(void); +extern void unlock_flocks(void); +#else /* !CONFIG_FILE_LOCKING */ +static inline int fcntl_getlk(struct file *file, struct flock __user *user) +{ + return -EINVAL; +} + +static inline int fcntl_setlk(unsigned int fd, struct file *file, + unsigned int cmd, struct flock __user *user) +{ + return -EACCES; +} + +#if BITS_PER_LONG == 32 +static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) +{ + return -EINVAL; +} + +static inline int fcntl_setlk64(unsigned int fd, struct file *file, + unsigned int cmd, struct flock64 __user *user) +{ + return -EACCES; +} +#endif +static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg) +{ + return 0; +} + +static inline int fcntl_getlease(struct file *filp) +{ + return 0; +} + +static inline void locks_init_lock(struct file_lock *fl) +{ + return; +} + +static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + return; +} + +static inline void locks_copy_lock(struct file_lock *new, struct file_lock *fl) +{ + return; +} + +static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) +{ + return; +} + +static inline void locks_remove_flock(struct file *filp) +{ + return; +} + +static inline void posix_test_lock(struct file *filp, struct file_lock *fl) +{ + return; +} + +static inline int posix_lock_file(struct file *filp, struct file_lock *fl, + struct file_lock *conflock) +{ + return -ENOLCK; +} + +static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return -ENOLCK; +} + +static inline int posix_unblock_lock(struct file *filp, + struct file_lock *waiter) +{ + return -ENOENT; +} + +static inline int vfs_test_lock(struct file *filp, struct file_lock *fl) +{ + return 0; +} + +static inline int vfs_lock_file(struct file *filp, unsigned int cmd, + struct file_lock *fl, struct file_lock *conf) +{ + return -ENOLCK; +} + +static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) +{ + return 0; +} + +static inline int flock_lock_file_wait(struct file *filp, + struct file_lock *request) +{ + return -ENOLCK; +} + +static inline int __break_lease(struct inode *inode, unsigned int mode) +{ + return 0; +} + +static inline void lease_get_mtime(struct inode *inode, struct timespec *time) +{ + return; +} + +static inline int generic_setlease(struct file *filp, long arg, + struct file_lock **flp) +{ + return -EINVAL; +} + +static inline int vfs_setlease(struct file *filp, long arg, + struct file_lock **lease) +{ + return -EINVAL; +} + +static inline int lease_modify(struct file_lock **before, int arg) +{ + return -EINVAL; +} + +static inline int lock_may_read(struct inode *inode, loff_t start, + unsigned long len) +{ + return 1; +} + +static inline int lock_may_write(struct inode *inode, loff_t start, + unsigned long len) +{ + return 1; +} + +static inline void lock_flocks(void) +{ +} + +static inline void unlock_flocks(void) +{ +} + +#endif /* !CONFIG_FILE_LOCKING */ + struct fasync_struct { - int magic; - int fa_fd; - struct fasync_struct *fa_next; /* singly linked list */ - struct file *fa_file; + spinlock_t fa_lock; + int magic; + int fa_fd; + struct fasync_struct *fa_next; /* singly linked list */ + struct file *fa_file; + struct rcu_head fa_rcu; }; #define FASYNC_MAGIC 0x4601 /* SMP safe fasync helpers: */ extern int fasync_helper(int, struct file *, int, struct fasync_struct **); +extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *); +extern int fasync_remove_entry(struct file *, struct fasync_struct **); +extern struct fasync_struct *fasync_alloc(void); +extern void fasync_free(struct fasync_struct *); + /* can be called from interrupts */ extern void kill_fasync(struct fasync_struct **, int, int); -/* only for net: no internal synchronization */ -extern void __kill_fasync(struct fasync_struct *, int, int); +extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force); extern int f_setown(struct file *filp, unsigned long arg, int force); extern void f_delown(struct file *filp); +extern pid_t f_getown(struct file *filp); extern int send_sigurg(struct fown_struct *fown); /* @@ -851,43 +1356,50 @@ extern int send_sigurg(struct fown_struct *fown); #define MNT_FORCE 0x00000001 /* Attempt to forcibily umount */ #define MNT_DETACH 0x00000002 /* Just detach from the tree */ #define MNT_EXPIRE 0x00000004 /* Mark for expiry */ +#define UMOUNT_NOFOLLOW 0x00000008 /* Don't follow symlink on umount */ +#define UMOUNT_UNUSED 0x80000000 /* Flag guaranteed to be unused */ extern struct list_head super_blocks; extern spinlock_t sb_lock; -#define sb_entry(list) list_entry((list), struct super_block, s_list) -#define S_BIAS (1<<30) struct super_block { struct list_head s_list; /* Keep this first */ dev_t s_dev; /* search index; _not_ kdev_t */ - unsigned long s_blocksize; - unsigned char s_blocksize_bits; unsigned char s_dirt; - unsigned long long s_maxbytes; /* Max file size */ + unsigned char s_blocksize_bits; + unsigned long s_blocksize; + loff_t s_maxbytes; /* Max file size */ struct file_system_type *s_type; - struct super_operations *s_op; - struct dquot_operations *dq_op; - struct quotactl_ops *s_qcop; - struct export_operations *s_export_op; + const struct super_operations *s_op; + const struct dquot_operations *dq_op; + const struct quotactl_ops *s_qcop; + const struct export_operations *s_export_op; unsigned long s_flags; unsigned long s_magic; struct dentry *s_root; struct rw_semaphore s_umount; struct mutex s_lock; int s_count; - int s_syncing; - int s_need_sync_fs; atomic_t s_active; +#ifdef CONFIG_SECURITY void *s_security; - struct xattr_handler **s_xattr; +#endif + const struct xattr_handler **s_xattr; struct list_head s_inodes; /* all inodes */ - struct list_head s_dirty; /* dirty inodes */ - struct list_head s_io; /* parked for writeback */ - struct hlist_head s_anon; /* anonymous dentries for (nfs) exporting */ + struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ +#ifdef CONFIG_SMP + struct list_head __percpu *s_files; +#else struct list_head s_files; +#endif + /* s_dentry_lru, s_nr_dentry_unused protected by dcache.c lru locks */ + struct list_head s_dentry_lru; /* unused dentry lru */ + int s_nr_dentry_unused; /* # of dentry on lru */ struct block_device *s_bdev; + struct backing_dev_info *s_bdi; + struct mtd_info *s_mtd; struct list_head s_instances; struct quota_info s_dquot; /* Diskquota specific options */ @@ -895,8 +1407,14 @@ struct super_block { wait_queue_head_t s_wait_unfrozen; char s_id[32]; /* Informational name */ + u8 s_uuid[16]; /* UUID */ void *s_fs_info; /* Filesystem private info */ + fmode_t s_mode; + + /* Granularity of c/m/atime in ns. + Cannot be worse than a second */ + u32 s_time_gran; /* * The next field is for VFS *only*. No filesystems have any business @@ -904,9 +1422,23 @@ struct super_block { */ struct mutex s_vfs_rename_mutex; /* Kludge */ - /* Granularity of c/m/atime in ns. - Cannot be worse than a second */ - u32 s_time_gran; + /* + * Filesystem subtype. If non-empty the filesystem type field + * in /proc/mounts will be "type.subtype" + */ + char *s_subtype; + + /* + * Saved mount options for lazy filesystems using + * generic_show_options() + */ + char __rcu *s_options; + const struct dentry_operations *s_d_op; /* default d_op for dentries */ + + /* + * Saved pool identifier for cleancache (-1 means none) + */ + int cleancache_poolid; }; extern struct timespec current_fs_time(struct super_block *sb); @@ -923,45 +1455,29 @@ enum { #define vfs_check_frozen(sb, level) \ wait_event((sb)->s_wait_unfrozen, ((sb)->s_frozen < (level))) -static inline void get_fs_excl(void) -{ - atomic_inc(¤t->fs_excl); -} - -static inline void put_fs_excl(void) -{ - atomic_dec(¤t->fs_excl); -} - -static inline int has_fs_excl(void) -{ - return atomic_read(¤t->fs_excl); -} - +#define get_fs_excl() atomic_inc(¤t->fs_excl) +#define put_fs_excl() atomic_dec(¤t->fs_excl) +#define has_fs_excl() atomic_read(¤t->fs_excl) /* - * Superblock locking. + * until VFS tracks user namespaces for inodes, just make all files + * belong to init_user_ns */ -static inline void lock_super(struct super_block * sb) -{ - get_fs_excl(); - mutex_lock(&sb->s_lock); -} +extern struct user_namespace init_user_ns; +#define inode_userns(inode) (&init_user_ns) +extern bool inode_owner_or_capable(const struct inode *inode); -static inline void unlock_super(struct super_block * sb) -{ - put_fs_excl(); - mutex_unlock(&sb->s_lock); -} +/* not quite ready to be deprecated, but... */ +extern void lock_super(struct super_block *); +extern void unlock_super(struct super_block *); /* * VFS helper functions.. */ -extern int vfs_permission(struct nameidata *, int); extern int vfs_create(struct inode *, struct dentry *, int, struct nameidata *); extern int vfs_mkdir(struct inode *, struct dentry *, int); extern int vfs_mknod(struct inode *, struct dentry *, int, dev_t); -extern int vfs_symlink(struct inode *, struct dentry *, const char *, int); +extern int vfs_symlink(struct inode *, struct dentry *, const char *); extern int vfs_link(struct dentry *, struct inode *, struct dentry *); extern int vfs_rmdir(struct inode *, struct dentry *); extern int vfs_unlink(struct inode *, struct dentry *); @@ -976,6 +1492,21 @@ extern void dentry_unhash(struct dentry *dentry); * VFS file helper functions. */ extern int file_permission(struct file *, int); +extern void inode_init_owner(struct inode *inode, const struct inode *dir, + mode_t mode); +/* + * VFS FS_IOC_FIEMAP helper definitions. + */ +struct fiemap_extent_info { + unsigned int fi_flags; /* Flags as passed from user */ + unsigned int fi_extents_mapped; /* Number of mapped extents */ + unsigned int fi_extents_max; /* Size of fiemap_extent array */ + struct fiemap_extent __user *fi_extents_start; /* Start of + fiemap_extent array */ +}; +int fiemap_fill_next_extent(struct fiemap_extent_info *info, u64 logical, + u64 phys, u64 len, u32 flags); +int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags); /* * File types @@ -993,52 +1524,14 @@ extern int file_permission(struct file *, int); #define DT_SOCK 12 #define DT_WHT 14 -#define OSYNC_METADATA (1<<0) -#define OSYNC_DATA (1<<1) -#define OSYNC_INODE (1<<2) -int generic_osync_inode(struct inode *, struct address_space *, int); - /* * This is the "filldir" function type, used by readdir() to let * the kernel specify what kind of dirent layout it wants to have. * This allows the kernel to read directories into kernel space or * to have different dirent layouts depending on the binary type. */ -typedef int (*filldir_t)(void *, const char *, int, loff_t, ino_t, unsigned); - -struct block_device_operations { - int (*open) (struct inode *, struct file *); - int (*release) (struct inode *, struct file *); - int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long); - long (*unlocked_ioctl) (struct file *, unsigned, unsigned long); - long (*compat_ioctl) (struct file *, unsigned, unsigned long); - int (*direct_access) (struct block_device *, sector_t, unsigned long *); - int (*media_changed) (struct gendisk *); - int (*revalidate_disk) (struct gendisk *); - int (*getgeo)(struct block_device *, struct hd_geometry *); - struct module *owner; -}; - -/* - * "descriptor" for what we're up to with a read for sendfile(). - * This allows us to use the same read code yet - * have multiple different users of the data that - * we read from a file. - * - * The simplest case just copies the data to user - * mode. - */ -typedef struct { - size_t written; - size_t count; - union { - char __user * buf; - void *data; - } arg; - int error; -} read_descriptor_t; - -typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long); +typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned); +struct block_device_operations; /* These macros are for out of kernel modules to test that * the kernel supports the unlocked_ioctl and compat_ioctl @@ -1048,44 +1541,51 @@ typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, u /* * NOTE: - * read, write, poll, fsync, readv, writev, unlocked_ioctl and compat_ioctl - * can be called without the big kernel lock held in all filesystems. + * all file operations except setlease can be called without + * the big kernel lock held in all filesystems. */ struct file_operations { struct module *owner; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t); + ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); - int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); - int (*fsync) (struct file *, struct dentry *, int datasync); + int (*fsync) (struct file *, int datasync); int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); - ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, loff_t *); - ssize_t (*writev) (struct file *, const struct iovec *, unsigned long, loff_t *); - ssize_t (*sendfile) (struct file *, loff_t *, size_t, read_actor_t, void *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); int (*check_flags)(int); - int (*dir_notify)(struct file *filp, unsigned long arg); int (*flock) (struct file *, int, struct file_lock *); ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); + int (*setlease)(struct file *, long, struct file_lock **); + long (*fallocate)(struct file *file, int mode, loff_t offset, + loff_t len); }; +#define IPERM_FLAG_RCU 0x0001 + struct inode_operations { - int (*create) (struct inode *,struct dentry *,int, struct nameidata *); struct dentry * (*lookup) (struct inode *,struct dentry *, struct nameidata *); + void * (*follow_link) (struct dentry *, struct nameidata *); + int (*permission) (struct inode *, int, unsigned int); + int (*check_acl)(struct inode *, int, unsigned int); + + int (*readlink) (struct dentry *, char __user *,int); + void (*put_link) (struct dentry *, struct nameidata *, void *); + + int (*create) (struct inode *,struct dentry *,int, struct nameidata *); int (*link) (struct dentry *,struct inode *,struct dentry *); int (*unlink) (struct inode *,struct dentry *); int (*symlink) (struct inode *,struct dentry *,const char *); @@ -1094,11 +1594,7 @@ struct inode_operations { int (*mknod) (struct inode *,struct dentry *,int,dev_t); int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *); - int (*readlink) (struct dentry *, char __user *,int); - void * (*follow_link) (struct dentry *, struct nameidata *); - void (*put_link) (struct dentry *, struct nameidata *, void *); void (*truncate) (struct inode *); - int (*permission) (struct inode *, int, struct nameidata *); int (*setattr) (struct dentry *, struct iattr *); int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); int (*setxattr) (struct dentry *, const char *,const void *,size_t,int); @@ -1106,10 +1602,17 @@ struct inode_operations { ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*removexattr) (struct dentry *, const char *); void (*truncate_range)(struct inode *, loff_t, loff_t); -}; + int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, + u64 len); +} ____cacheline_aligned; struct seq_file; +ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, + unsigned long nr_segs, unsigned long fast_segs, + struct iovec *fast_pointer, + struct iovec **ret_pointer); + extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *); extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *); extern ssize_t vfs_readv(struct file *, const struct iovec __user *, @@ -1117,48 +1620,93 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *, extern ssize_t vfs_writev(struct file *, const struct iovec __user *, unsigned long, loff_t *); -/* - * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called - * without the big kernel lock held in all filesystems. - */ struct super_operations { struct inode *(*alloc_inode)(struct super_block *sb); void (*destroy_inode)(struct inode *); - void (*read_inode) (struct inode *); - - void (*dirty_inode) (struct inode *); - int (*write_inode) (struct inode *, int); - void (*put_inode) (struct inode *); - void (*drop_inode) (struct inode *); - void (*delete_inode) (struct inode *); + void (*dirty_inode) (struct inode *, int flags); + int (*write_inode) (struct inode *, struct writeback_control *wbc); + int (*drop_inode) (struct inode *); + void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); void (*write_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - void (*write_super_lockfs) (struct super_block *); - void (*unlockfs) (struct super_block *); + int (*freeze_fs) (struct super_block *); + int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); - void (*clear_inode) (struct inode *); - void (*umount_begin) (struct vfsmount *, int); + void (*umount_begin) (struct super_block *); int (*show_options)(struct seq_file *, struct vfsmount *); + int (*show_devname)(struct seq_file *, struct vfsmount *); + int (*show_path)(struct seq_file *, struct vfsmount *); int (*show_stats)(struct seq_file *, struct vfsmount *); - +#ifdef CONFIG_QUOTA ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t); ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t); +#endif + int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t); }; -/* Inode state bits. Protected by inode_lock. */ -#define I_DIRTY_SYNC 1 /* Not dirty enough for O_DATASYNC */ -#define I_DIRTY_DATASYNC 2 /* Data-related inode changes pending */ -#define I_DIRTY_PAGES 4 /* Data-related inode changes pending */ -#define __I_LOCK 3 -#define I_LOCK (1 << __I_LOCK) -#define I_FREEING 16 -#define I_CLEAR 32 -#define I_NEW 64 -#define I_WILL_FREE 128 +/* + * Inode state bits. Protected by inode->i_lock + * + * Three bits determine the dirty state of the inode, I_DIRTY_SYNC, + * I_DIRTY_DATASYNC and I_DIRTY_PAGES. + * + * Four bits define the lifetime of an inode. Initially, inodes are I_NEW, + * until that flag is cleared. I_WILL_FREE, I_FREEING and I_CLEAR are set at + * various stages of removing an inode. + * + * Two bits are used for locking and completion notification, I_NEW and I_SYNC. + * + * I_DIRTY_SYNC Inode is dirty, but doesn't have to be written on + * fdatasync(). i_atime is the usual cause. + * I_DIRTY_DATASYNC Data-related inode changes pending. We keep track of + * these changes separately from I_DIRTY_SYNC so that we + * don't have to write inode on fdatasync() when only + * mtime has changed in it. + * I_DIRTY_PAGES Inode has dirty pages. Inode itself may be clean. + * I_NEW Serves as both a mutex and completion notification. + * New inodes set I_NEW. If two processes both create + * the same inode, one of them will release its inode and + * wait for I_NEW to be released before returning. + * Inodes in I_WILL_FREE, I_FREEING or I_CLEAR state can + * also cause waiting on I_NEW, without I_NEW actually + * being set. find_inode() uses this to prevent returning + * nearly-dead inodes. + * I_WILL_FREE Must be set when calling write_inode_now() if i_count + * is zero. I_FREEING must be set when I_WILL_FREE is + * cleared. + * I_FREEING Set when inode is about to be freed but still has dirty + * pages or buffers attached or the inode itself is still + * dirty. + * I_CLEAR Added by end_writeback(). In this state the inode is clean + * and can be destroyed. Inode keeps I_FREEING. + * + * Inodes that are I_WILL_FREE, I_FREEING or I_CLEAR are + * prohibited for many purposes. iget() must wait for + * the inode to be completely released, then create it + * anew. Other functions will just ignore such inodes, + * if appropriate. I_NEW is used for waiting. + * + * I_SYNC Synchonized write of dirty inode data. The bits is + * set during data writeback, and cleared with a wakeup + * on the bit address once it is done. + * + * Q: What is the difference between I_WILL_FREE and I_FREEING? + */ +#define I_DIRTY_SYNC (1 << 0) +#define I_DIRTY_DATASYNC (1 << 1) +#define I_DIRTY_PAGES (1 << 2) +#define __I_NEW 3 +#define I_NEW (1 << __I_NEW) +#define I_WILL_FREE (1 << 4) +#define I_FREEING (1 << 5) +#define I_CLEAR (1 << 6) +#define __I_SYNC 7 +#define I_SYNC (1 << __I_SYNC) +#define I_REFERENCED (1 << 8) #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES) @@ -1173,182 +1721,144 @@ static inline void mark_inode_dirty_sync(struct inode *inode) __mark_inode_dirty(inode, I_DIRTY_SYNC); } -static inline void inode_inc_link_count(struct inode *inode) +/** + * inc_nlink - directly increment an inode's link count + * @inode: inode + * + * This is a low-level filesystem helper to replace any + * direct filesystem manipulation of i_nlink. Currently, + * it is only here for parity with dec_nlink(). + */ +static inline void inc_nlink(struct inode *inode) { inode->i_nlink++; - mark_inode_dirty(inode); } -static inline void inode_dec_link_count(struct inode *inode) +static inline void inode_inc_link_count(struct inode *inode) { - inode->i_nlink--; + inc_nlink(inode); mark_inode_dirty(inode); } -extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); -static inline void file_accessed(struct file *file) +/** + * drop_nlink - directly drop an inode's link count + * @inode: inode + * + * This is a low-level filesystem helper to replace any + * direct filesystem manipulation of i_nlink. In cases + * where we are attempting to track writes to the + * filesystem, a decrement to zero means an imminent + * write when the file is truncated and actually unlinked + * on the filesystem. + */ +static inline void drop_nlink(struct inode *inode) { - if (!(file->f_flags & O_NOATIME)) - touch_atime(file->f_vfsmnt, file->f_dentry); + inode->i_nlink--; } -int sync_inode(struct inode *inode, struct writeback_control *wbc); - /** - * struct export_operations - for nfsd to communicate with file systems - * @decode_fh: decode a file handle fragment and return a &struct dentry - * @encode_fh: encode a file handle fragment from a dentry - * @get_name: find the name for a given inode in a given directory - * @get_parent: find the parent of a given directory - * @get_dentry: find a dentry for the inode given a file handle sub-fragment - * @find_exported_dentry: - * set by the exporting module to a standard helper function. - * - * Description: - * The export_operations structure provides a means for nfsd to communicate - * with a particular exported file system - particularly enabling nfsd and - * the filesystem to co-operate when dealing with file handles. - * - * export_operations contains two basic operation for dealing with file - * handles, decode_fh() and encode_fh(), and allows for some other - * operations to be defined which standard helper routines use to get - * specific information from the filesystem. - * - * nfsd encodes information use to determine which filesystem a filehandle - * applies to in the initial part of the file handle. The remainder, termed - * a file handle fragment, is controlled completely by the filesystem. The - * standard helper routines assume that this fragment will contain one or - * two sub-fragments, one which identifies the file, and one which may be - * used to identify the (a) directory containing the file. - * - * In some situations, nfsd needs to get a dentry which is connected into a - * specific part of the file tree. To allow for this, it passes the - * function acceptable() together with a @context which can be used to see - * if the dentry is acceptable. As there can be multiple dentrys for a - * given file, the filesystem should check each one for acceptability before - * looking for the next. As soon as an acceptable one is found, it should - * be returned. - * - * decode_fh: - * @decode_fh is given a &struct super_block (@sb), a file handle fragment - * (@fh, @fh_len) and an acceptability testing function (@acceptable, - * @context). It should return a &struct dentry which refers to the same - * file that the file handle fragment refers to, and which passes the - * acceptability test. If it cannot, it should return a %NULL pointer if - * the file was found but no acceptable &dentries were available, or a - * %ERR_PTR error code indicating why it couldn't be found (e.g. %ENOENT or - * %ENOMEM). + * clear_nlink - directly zero an inode's link count + * @inode: inode * - * encode_fh: - * @encode_fh should store in the file handle fragment @fh (using at most - * @max_len bytes) information that can be used by @decode_fh to recover the - * file refered to by the &struct dentry @de. If the @connectable flag is - * set, the encode_fh() should store sufficient information so that a good - * attempt can be made to find not only the file but also it's place in the - * filesystem. This typically means storing a reference to de->d_parent in - * the filehandle fragment. encode_fh() should return the number of bytes - * stored or a negative error code such as %-ENOSPC - * - * get_name: - * @get_name should find a name for the given @child in the given @parent - * directory. The name should be stored in the @name (with the - * understanding that it is already pointing to a a %NAME_MAX+1 sized - * buffer. get_name() should return %0 on success, a negative error code - * or error. @get_name will be called without @parent->i_mutex held. - * - * get_parent: - * @get_parent should find the parent directory for the given @child which - * is also a directory. In the event that it cannot be found, or storage - * space cannot be allocated, a %ERR_PTR should be returned. - * - * get_dentry: - * Given a &super_block (@sb) and a pointer to a file-system specific inode - * identifier, possibly an inode number, (@inump) get_dentry() should find - * the identified inode and return a dentry for that inode. Any suitable - * dentry can be returned including, if necessary, a new dentry created with - * d_alloc_root. The caller can then find any other extant dentrys by - * following the d_alias links. If a new dentry was created using - * d_alloc_root, DCACHE_NFSD_DISCONNECTED should be set, and the dentry - * should be d_rehash()ed. - * - * If the inode cannot be found, either a %NULL pointer or an %ERR_PTR code - * can be returned. The @inump will be whatever was passed to - * nfsd_find_fh_dentry() in either the @obj or @parent parameters. - * - * Locking rules: - * get_parent is called with child->d_inode->i_mutex down - * get_name is not (which is possibly inconsistent) + * This is a low-level filesystem helper to replace any + * direct filesystem manipulation of i_nlink. See + * drop_nlink() for why we care about i_nlink hitting zero. */ +static inline void clear_nlink(struct inode *inode) +{ + inode->i_nlink = 0; +} -struct export_operations { - struct dentry *(*decode_fh)(struct super_block *sb, __u32 *fh, int fh_len, int fh_type, - int (*acceptable)(void *context, struct dentry *de), - void *context); - int (*encode_fh)(struct dentry *de, __u32 *fh, int *max_len, - int connectable); - - /* the following are only called from the filesystem itself */ - int (*get_name)(struct dentry *parent, char *name, - struct dentry *child); - struct dentry * (*get_parent)(struct dentry *child); - struct dentry * (*get_dentry)(struct super_block *sb, void *inump); +static inline void inode_dec_link_count(struct inode *inode) +{ + drop_nlink(inode); + mark_inode_dirty(inode); +} - /* This is set by the exporting module to a standard helper */ - struct dentry * (*find_exported_dentry)( - struct super_block *sb, void *obj, void *parent, - int (*acceptable)(void *context, struct dentry *de), - void *context); +/** + * inode_inc_iversion - increments i_version + * @inode: inode that need to be updated + * + * Every time the inode is modified, the i_version field will be incremented. + * The filesystem has to be mounted with i_version flag + */ +static inline void inode_inc_iversion(struct inode *inode) +{ + spin_lock(&inode->i_lock); + inode->i_version++; + spin_unlock(&inode->i_lock); +} -}; +extern void touch_atime(struct vfsmount *mnt, struct dentry *dentry); +static inline void file_accessed(struct file *file) +{ + if (!(file->f_flags & O_NOATIME)) + touch_atime(file->f_path.mnt, file->f_path.dentry); +} -extern struct dentry * -find_exported_dentry(struct super_block *sb, void *obj, void *parent, - int (*acceptable)(void *context, struct dentry *de), - void *context); +int sync_inode(struct inode *inode, struct writeback_control *wbc); +int sync_inode_metadata(struct inode *inode, int wait); struct file_system_type { const char *name; int fs_flags; - int (*get_sb) (struct file_system_type *, int, - const char *, void *, struct vfsmount *); + struct dentry *(*mount) (struct file_system_type *, int, + const char *, void *); void (*kill_sb) (struct super_block *); struct module *owner; struct file_system_type * next; struct list_head fs_supers; + struct lock_class_key s_lock_key; struct lock_class_key s_umount_key; + struct lock_class_key s_vfs_rename_key; + + struct lock_class_key i_lock_key; + struct lock_class_key i_mutex_key; + struct lock_class_key i_mutex_dir_key; + struct lock_class_key i_alloc_sem_key; }; -extern int get_sb_bdev(struct file_system_type *fs_type, +extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, + void *data, int (*fill_super)(struct super_block *, void *, int)); +extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt); -extern int get_sb_single(struct file_system_type *fs_type, + int (*fill_super)(struct super_block *, void *, int)); +extern struct dentry *mount_single(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt); -extern int get_sb_nodev(struct file_system_type *fs_type, + int (*fill_super)(struct super_block *, void *, int)); +extern struct dentry *mount_nodev(struct file_system_type *fs_type, int flags, void *data, - int (*fill_super)(struct super_block *, void *, int), - struct vfsmount *mnt); + int (*fill_super)(struct super_block *, void *, int)); void generic_shutdown_super(struct super_block *sb); void kill_block_super(struct super_block *sb); void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); +void deactivate_locked_super(struct super_block *sb); int set_anon_super(struct super_block *s, void *data); struct super_block *sget(struct file_system_type *type, int (*test)(struct super_block *,void *), int (*set)(struct super_block *,void *), void *data); -extern int get_sb_pseudo(struct file_system_type *, char *, - struct super_operations *ops, unsigned long, - struct vfsmount *mnt); -extern int simple_set_mnt(struct vfsmount *mnt, struct super_block *sb); -int __put_super(struct super_block *sb); -int __put_super_and_need_restart(struct super_block *sb); -void unnamed_dev_init(void); +extern struct dentry *mount_pseudo(struct file_system_type *, char *, + const struct super_operations *ops, + const struct dentry_operations *dops, + unsigned long); + +static inline void sb_mark_dirty(struct super_block *sb) +{ + sb->s_dirt = 1; +} +static inline void sb_mark_clean(struct super_block *sb) +{ + sb->s_dirt = 0; +} +static inline int sb_is_dirty(struct super_block *sb) +{ + return sb->s_dirt; +} /* Alas, no aliases. Too much hassle with bringing module.h everywhere */ #define fops_get(fops) \ @@ -1358,24 +1868,35 @@ void unnamed_dev_init(void); extern int register_filesystem(struct file_system_type *); extern int unregister_filesystem(struct file_system_type *); -extern struct vfsmount *kern_mount(struct file_system_type *); +extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data); +#define kern_mount(type) kern_mount_data(type, NULL) extern int may_umount_tree(struct vfsmount *); extern int may_umount(struct vfsmount *); -extern void umount_tree(struct vfsmount *, int, struct list_head *); -extern void release_mounts(struct list_head *); extern long do_mount(char *, char *, char *, unsigned long, void *); -extern struct vfsmount *copy_tree(struct vfsmount *, struct dentry *, int); -extern void mnt_set_mountpoint(struct vfsmount *, struct dentry *, - struct vfsmount *); - -extern int vfs_statfs(struct dentry *, struct kstatfs *); +extern struct vfsmount *collect_mounts(struct path *); +extern void drop_collected_mounts(struct vfsmount *); +extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, + struct vfsmount *); +extern int vfs_statfs(struct path *, struct kstatfs *); +extern int user_statfs(const char __user *, struct kstatfs *); +extern int fd_statfs(int, struct kstatfs *); +extern int statfs_by_dentry(struct dentry *, struct kstatfs *); +extern int freeze_super(struct super_block *super); +extern int thaw_super(struct super_block *super); +extern bool our_mnt(struct vfsmount *mnt); + +extern int current_umask(void); /* /sys/fs */ -extern struct subsystem fs_subsys; +extern struct kobject *fs_kobj; + +#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) +extern int rw_verify_area(int, struct file *, loff_t *, size_t); #define FLOCK_VERIFY_READ 1 #define FLOCK_VERIFY_WRITE 2 +#ifdef CONFIG_FILE_LOCKING extern int locks_mandatory_locked(struct inode *); extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); @@ -1383,23 +1904,34 @@ extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size * Candidates for mandatory locking have the setgid bit set * but no group execute bit - an otherwise meaningless combination. */ -#define MANDATORY_LOCK(inode) \ - (IS_MANDLOCK(inode) && ((inode)->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID) + +static inline int __mandatory_lock(struct inode *ino) +{ + return (ino->i_mode & (S_ISGID | S_IXGRP)) == S_ISGID; +} + +/* + * ... and these candidates should be on MS_MANDLOCK mounted fs, + * otherwise these will be advisory locks + */ + +static inline int mandatory_lock(struct inode *ino) +{ + return IS_MANDLOCK(ino) && __mandatory_lock(ino); +} static inline int locks_verify_locked(struct inode *inode) { - if (MANDATORY_LOCK(inode)) + if (mandatory_lock(inode)) return locks_mandatory_locked(inode); return 0; } -extern int rw_verify_area(int, struct file *, loff_t *, size_t); - static inline int locks_verify_truncate(struct inode *inode, struct file *filp, loff_t size) { - if (inode->i_flock && MANDATORY_LOCK(inode)) + if (inode->i_flock && mandatory_lock(inode)) return locks_mandatory_area( FLOCK_VERIFY_WRITE, inode, filp, size < inode->i_size ? size : inode->i_size, @@ -1415,92 +1947,194 @@ static inline int break_lease(struct inode *inode, unsigned int mode) return __break_lease(inode, mode); return 0; } +#else /* !CONFIG_FILE_LOCKING */ +static inline int locks_mandatory_locked(struct inode *inode) +{ + return 0; +} + +static inline int locks_mandatory_area(int rw, struct inode *inode, + struct file *filp, loff_t offset, + size_t count) +{ + return 0; +} + +static inline int __mandatory_lock(struct inode *inode) +{ + return 0; +} + +static inline int mandatory_lock(struct inode *inode) +{ + return 0; +} + +static inline int locks_verify_locked(struct inode *inode) +{ + return 0; +} + +static inline int locks_verify_truncate(struct inode *inode, struct file *filp, + size_t size) +{ + return 0; +} + +static inline int break_lease(struct inode *inode, unsigned int mode) +{ + return 0; +} + +#endif /* CONFIG_FILE_LOCKING */ /* fs/open.c */ extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs, struct file *filp); -extern long do_sys_open(int fdf, const char __user *filename, int flags, +extern int do_fallocate(struct file *file, int mode, loff_t offset, + loff_t len); +extern long do_sys_open(int dfd, const char __user *filename, int flags, int mode); extern struct file *filp_open(const char *, int, int); -extern struct file * dentry_open(struct dentry *, struct vfsmount *, int); +extern struct file *file_open_root(struct dentry *, struct vfsmount *, + const char *, int); +extern struct file * dentry_open(struct dentry *, struct vfsmount *, int, + const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern char * getname(const char __user *); +/* fs/ioctl.c */ + +extern int ioctl_preallocate(struct file *filp, void __user *argp); + /* fs/dcache.c */ extern void __init vfs_caches_init_early(void); extern void __init vfs_caches_init(unsigned long); -#define __getname() kmem_cache_alloc(names_cachep, SLAB_KERNEL) -#define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) +extern struct kmem_cache *names_cachep; + +#define __getname_gfp(gfp) kmem_cache_alloc(names_cachep, (gfp)) +#define __getname() __getname_gfp(GFP_KERNEL) +#define __putname(name) kmem_cache_free(names_cachep, (void *)(name)) #ifndef CONFIG_AUDITSYSCALL #define putname(name) __putname(name) #else extern void putname(const char *name); #endif +#ifdef CONFIG_BLOCK extern int register_blkdev(unsigned int, const char *); -extern int unregister_blkdev(unsigned int, const char *); +extern void unregister_blkdev(unsigned int, const char *); extern struct block_device *bdget(dev_t); +extern struct block_device *bdgrab(struct block_device *bdev); extern void bd_set_size(struct block_device *, loff_t size); extern void bd_forget(struct inode *inode); extern void bdput(struct block_device *); -extern struct block_device *open_by_devnum(dev_t, unsigned); -extern struct block_device *open_partition_by_devnum(dev_t, unsigned); +extern void invalidate_bdev(struct block_device *); +extern int sync_blockdev(struct block_device *bdev); +extern struct super_block *freeze_bdev(struct block_device *); +extern void emergency_thaw_all(void); +extern int thaw_bdev(struct block_device *bdev, struct super_block *sb); +extern int fsync_bdev(struct block_device *); +#else +static inline void bd_forget(struct inode *inode) {} +static inline int sync_blockdev(struct block_device *bdev) { return 0; } +static inline void invalidate_bdev(struct block_device *bdev) {} + +static inline struct super_block *freeze_bdev(struct block_device *sb) +{ + return NULL; +} + +static inline int thaw_bdev(struct block_device *bdev, struct super_block *sb) +{ + return 0; +} +#endif +extern int sync_filesystem(struct super_block *); extern const struct file_operations def_blk_fops; -extern const struct address_space_operations def_blk_aops; extern const struct file_operations def_chr_fops; extern const struct file_operations bad_sock_fops; extern const struct file_operations def_fifo_fops; +#ifdef CONFIG_BLOCK extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long); -extern int blkdev_ioctl(struct inode *, struct file *, unsigned, unsigned long); +extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); extern long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -extern int blkdev_get(struct block_device *, mode_t, unsigned); -extern int blkdev_put(struct block_device *); -extern int blkdev_put_partition(struct block_device *); -extern int bd_claim(struct block_device *, void *); -extern void bd_release(struct block_device *); +extern int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder); +extern struct block_device *blkdev_get_by_path(const char *path, fmode_t mode, + void *holder); +extern struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, + void *holder); +extern int blkdev_put(struct block_device *bdev, fmode_t mode); #ifdef CONFIG_SYSFS -extern int bd_claim_by_disk(struct block_device *, void *, struct gendisk *); -extern void bd_release_from_disk(struct block_device *, struct gendisk *); +extern int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); +extern void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk); #else -#define bd_claim_by_disk(bdev, holder, disk) bd_claim(bdev, holder) -#define bd_release_from_disk(bdev, disk) bd_release(bdev) +static inline int bd_link_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ + return 0; +} +static inline void bd_unlink_disk_holder(struct block_device *bdev, + struct gendisk *disk) +{ +} +#endif #endif /* fs/char_dev.c */ #define CHRDEV_MAJOR_HASH_SIZE 255 extern int alloc_chrdev_region(dev_t *, unsigned, unsigned, const char *); extern int register_chrdev_region(dev_t, unsigned, const char *); -extern int register_chrdev(unsigned int, const char *, - const struct file_operations *); -extern int unregister_chrdev(unsigned int, const char *); +extern int __register_chrdev(unsigned int major, unsigned int baseminor, + unsigned int count, const char *name, + const struct file_operations *fops); +extern void __unregister_chrdev(unsigned int major, unsigned int baseminor, + unsigned int count, const char *name); extern void unregister_chrdev_region(dev_t, unsigned); -extern int chrdev_open(struct inode *, struct file *); extern void chrdev_show(struct seq_file *,off_t); +static inline int register_chrdev(unsigned int major, const char *name, + const struct file_operations *fops) +{ + return __register_chrdev(major, 0, 256, name, fops); +} + +static inline void unregister_chrdev(unsigned int major, const char *name) +{ + __unregister_chrdev(major, 0, 256, name); +} + /* fs/block_dev.c */ -#define BLKDEV_MAJOR_HASH_SIZE 255 #define BDEVNAME_SIZE 32 /* Largest string for a blockdev identifier */ +#define BDEVT_SIZE 10 /* Largest string for MAJ:MIN for blkdev */ + +#ifdef CONFIG_BLOCK +#define BLKDEV_MAJOR_HASH_SIZE 255 extern const char *__bdevname(dev_t, char *buffer); extern const char *bdevname(struct block_device *bdev, char *buffer); extern struct block_device *lookup_bdev(const char *); -extern struct block_device *open_bdev_excl(const char *, int, void *); -extern void close_bdev_excl(struct block_device *); extern void blkdev_show(struct seq_file *,off_t); +#else +#define BLKDEV_MAJOR_HASH_SIZE 0 +#endif + extern void init_special_inode(struct inode *, umode_t, dev_t); /* Invalid inode operations -- fs/bad_inode.c */ extern void make_bad_inode(struct inode *); extern int is_bad_inode(struct inode *); -extern const struct file_operations read_fifo_fops; -extern const struct file_operations write_fifo_fops; -extern const struct file_operations rdwr_fifo_fops; +extern const struct file_operations read_pipefifo_fops; +extern const struct file_operations write_pipefifo_fops; +extern const struct file_operations rdwr_pipefifo_fops; extern int fs_may_remount_ro(struct super_block *); +#ifdef CONFIG_BLOCK /* * return READ, READA, or WRITE */ @@ -1511,18 +2145,21 @@ extern int fs_may_remount_ro(struct super_block *); */ #define bio_data_dir(bio) ((bio)->bi_rw & 1) +extern void check_disk_size_change(struct gendisk *disk, + struct block_device *bdev); +extern int revalidate_disk(struct gendisk *); extern int check_disk_change(struct block_device *); -extern int invalidate_inodes(struct super_block *); -extern int __invalidate_device(struct block_device *); +extern int __invalidate_device(struct block_device *, bool); extern int invalidate_partition(struct gendisk *, int); +#endif unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); -unsigned long invalidate_inode_pages(struct address_space *mapping); + static inline void invalidate_remote_inode(struct inode *inode) { if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) - invalidate_inode_pages(inode->i_mapping); + invalidate_mapping_pages(inode->i_mapping, 0, -1); } extern int invalidate_inode_pages2(struct address_space *mapping); extern int invalidate_inode_pages2_range(struct address_space *mapping, @@ -1531,26 +2168,35 @@ extern int write_inode_now(struct inode *, int); extern int filemap_fdatawrite(struct address_space *); extern int filemap_flush(struct address_space *); extern int filemap_fdatawait(struct address_space *); +extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, + loff_t lend); extern int filemap_write_and_wait(struct address_space *mapping); extern int filemap_write_and_wait_range(struct address_space *mapping, loff_t lstart, loff_t lend); -extern int wait_on_page_writeback_range(struct address_space *mapping, - pgoff_t start, pgoff_t end); extern int __filemap_fdatawrite_range(struct address_space *mapping, loff_t start, loff_t end, int sync_mode); +extern int filemap_fdatawrite_range(struct address_space *mapping, + loff_t start, loff_t end); -extern long do_fsync(struct file *file, int datasync); +extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, + int datasync); +extern int vfs_fsync(struct file *file, int datasync); +extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); extern void sync_supers(void); -extern void sync_filesystems(int wait); extern void emergency_sync(void); extern void emergency_remount(void); -extern int do_remount_sb(struct super_block *sb, int flags, - void *data, int force); +#ifdef CONFIG_BLOCK extern sector_t bmap(struct inode *, sector_t); +#endif extern int notify_change(struct dentry *, struct iattr *); -extern int permission(struct inode *, int, struct nameidata *); -extern int generic_permission(struct inode *, int, - int (*check_acl)(struct inode *, int)); +extern int inode_permission(struct inode *, int); +extern int generic_permission(struct inode *, int, unsigned int, + int (*check_acl)(struct inode *, int, unsigned int)); + +static inline bool execute_ok(struct inode *inode) +{ + return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); +} extern int get_write_access(struct inode *); extern int deny_write_access(struct file *); @@ -1561,18 +2207,39 @@ static inline void put_write_access(struct inode * inode) static inline void allow_write_access(struct file *file) { if (file) - atomic_inc(&file->f_dentry->d_inode->i_writecount); + atomic_inc(&file->f_path.dentry->d_inode->i_writecount); } -extern int do_pipe(int *); - -extern int open_namei(int dfd, const char *, int, int, struct nameidata *); -extern int may_open(struct nameidata *, int, int); +#ifdef CONFIG_IMA +static inline void i_readcount_dec(struct inode *inode) +{ + BUG_ON(!atomic_read(&inode->i_readcount)); + atomic_dec(&inode->i_readcount); +} +static inline void i_readcount_inc(struct inode *inode) +{ + atomic_inc(&inode->i_readcount); +} +#else +static inline void i_readcount_dec(struct inode *inode) +{ + return; +} +static inline void i_readcount_inc(struct inode *inode) +{ + return; +} +#endif +extern int do_pipe_flags(int *, int); +extern struct file *create_read_pipe(struct file *f, int flags); +extern struct file *create_write_pipe(int flags); +extern void free_write_pipe(struct file *); -extern int kernel_read(struct file *, unsigned long, char *, unsigned long); +extern int kernel_read(struct file *, loff_t, char *, unsigned long); extern struct file * open_exec(const char *); /* fs/dcache.c -- generic fs support functions */ extern int is_subdir(struct dentry *, struct dentry *); +extern int path_is_under(struct path *, struct path *); extern ino_t find_inode_number(struct dentry *, struct qstr *); #include <linux/err.h> @@ -1582,13 +2249,16 @@ extern loff_t default_llseek(struct file *file, loff_t offset, int origin); extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin); +extern int inode_init_always(struct super_block *, struct inode *); extern void inode_init_once(struct inode *); +extern void address_space_init_once(struct address_space *mapping); +extern void ihold(struct inode * inode); extern void iput(struct inode *); extern struct inode * igrab(struct inode *); extern ino_t iunique(struct super_block *, ino_t); extern int inode_needs_sync(struct inode *inode); -extern void generic_delete_inode(struct inode *inode); -extern void generic_drop_inode(struct inode *inode); +extern int generic_delete_inode(struct inode *inode); +extern int generic_drop_inode(struct inode *inode); extern struct inode *ilookup5_nowait(struct super_block *sb, unsigned long hashval, int (*test)(struct inode *, void *), @@ -1599,39 +2269,32 @@ extern struct inode *ilookup(struct super_block *sb, unsigned long ino); extern struct inode * iget5_locked(struct super_block *, unsigned long, int (*test)(struct inode *, void *), int (*set)(struct inode *, void *), void *); extern struct inode * iget_locked(struct super_block *, unsigned long); +extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *); +extern int insert_inode_locked(struct inode *); extern void unlock_new_inode(struct inode *); - -static inline struct inode *iget(struct super_block *sb, unsigned long ino) -{ - struct inode *inode = iget_locked(sb, ino); - - if (inode && (inode->i_state & I_NEW)) { - sb->s_op->read_inode(inode); - unlock_new_inode(inode); - } - - return inode; -} +extern unsigned int get_next_ino(void); extern void __iget(struct inode * inode); -extern void clear_inode(struct inode *); -extern void destroy_inode(struct inode *); +extern void iget_failed(struct inode *); +extern void end_writeback(struct inode *); +extern void __destroy_inode(struct inode *); extern struct inode *new_inode(struct super_block *); -extern int remove_suid(struct dentry *); -extern void remove_dquot_ref(struct super_block *, int, struct list_head *); +extern void free_inode_nonrcu(struct inode *inode); +extern int should_remove_suid(struct dentry *); +extern int file_remove_suid(struct file *); extern void __insert_inode_hash(struct inode *, unsigned long hashval); extern void remove_inode_hash(struct inode *); -static inline void insert_inode_hash(struct inode *inode) { +static inline void insert_inode_hash(struct inode *inode) +{ __insert_inode_hash(inode, inode->i_ino); } +extern void inode_sb_list_add(struct inode *inode); -extern struct file * get_empty_filp(void); -extern void file_move(struct file *f, struct list_head *list); -extern void file_kill(struct file *f); -struct bio; +#ifdef CONFIG_BLOCK extern void submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); +#endif extern int set_blocksize(struct block_device *, int); extern int sb_set_blocksize(struct super_block *, int); extern int sb_min_blocksize(struct super_block *, int); @@ -1639,31 +2302,30 @@ extern int sb_min_blocksize(struct super_block *, int); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); -extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); -extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); -extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *); -extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t); -extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *); -extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t); -extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, - unsigned long, loff_t *); +extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); +extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, + loff_t *); +extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, loff_t *, size_t, size_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, unsigned long, loff_t, loff_t *, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); -ssize_t generic_file_write_nolock(struct file *file, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos); -extern ssize_t generic_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); -extern void do_generic_mapping_read(struct address_space *mapping, - struct file_ra_state *, struct file *, - loff_t *, read_descriptor_t *, read_actor_t); +extern int generic_segment_checks(const struct iovec *iov, + unsigned long *nr_segs, size_t *count, int access_flags); + +/* fs/block_dev.c */ +extern ssize_t blkdev_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); +extern int blkdev_fsync(struct file *filp, int datasync); /* fs/splice.c */ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); +extern ssize_t default_file_splice_read(struct file *, loff_t *, + struct pipe_inode_info *, size_t, unsigned int); extern ssize_t generic_file_splice_write(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int); extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe, @@ -1673,22 +2335,17 @@ extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out, extern void file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping); -extern ssize_t generic_file_readv(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos); -ssize_t generic_file_writev(struct file *filp, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos); +extern loff_t noop_llseek(struct file *file, loff_t offset, int origin); extern loff_t no_llseek(struct file *file, loff_t offset, int origin); extern loff_t generic_file_llseek(struct file *file, loff_t offset, int origin); -extern loff_t remote_llseek(struct file *file, loff_t offset, int origin); +extern loff_t generic_file_llseek_unlocked(struct file *file, loff_t offset, + int origin); extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); #ifdef CONFIG_FS_XIP extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); -extern ssize_t xip_file_sendfile(struct file *in_file, loff_t *ppos, - size_t count, read_actor_t actor, - void *target); extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma); extern ssize_t xip_file_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); @@ -1700,55 +2357,35 @@ static inline int xip_truncate_page(struct address_space *mapping, loff_t from) } #endif -static inline void do_generic_file_read(struct file * filp, loff_t *ppos, - read_descriptor_t * desc, - read_actor_t actor) -{ - do_generic_mapping_read(filp->f_mapping, - &filp->f_ra, - filp, - ppos, - desc, - actor); -} - -ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, - struct block_device *bdev, const struct iovec *iov, loff_t offset, - unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, - int lock_type); +#ifdef CONFIG_BLOCK +typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, + loff_t file_offset); enum { - DIO_LOCKING = 1, /* need locking between buffered and direct access */ - DIO_NO_LOCKING, /* bdev; no locking at all between buffered/direct */ - DIO_OWN_LOCKING, /* filesystem locks buffered and direct internally */ + /* need locking between buffered and direct access */ + DIO_LOCKING = 0x01, + + /* filesystem does not support filling holes */ + DIO_SKIP_HOLES = 0x02, }; -static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, - struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) -{ - return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_LOCKING); -} +void dio_end_io(struct bio *bio, int error); -static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, - struct inode *inode, struct block_device *bdev, const struct iovec *iov, - loff_t offset, unsigned long nr_segs, get_block_t get_block, - dio_iodone_t end_io) -{ - return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_NO_LOCKING); -} +ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, + struct block_device *bdev, const struct iovec *iov, loff_t offset, + unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, + dio_submit_t submit_io, int flags); -static inline ssize_t blockdev_direct_IO_own_locking(int rw, struct kiocb *iocb, +static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, struct block_device *bdev, const struct iovec *iov, loff_t offset, unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io) { return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, - nr_segs, get_block, end_io, DIO_OWN_LOCKING); + nr_segs, get_block, end_io, NULL, + DIO_LOCKING | DIO_SKIP_HOLES); } +#endif extern const struct file_operations generic_ro_fops; @@ -1760,12 +2397,13 @@ extern int page_readlink(struct dentry *, char __user *, int); extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void page_put_link(struct dentry *, struct nameidata *, void *); extern int __page_symlink(struct inode *inode, const char *symname, int len, - gfp_t gfp_mask); + int nofs); extern int page_symlink(struct inode *inode, const char *symname, int len); -extern struct inode_operations page_symlink_inode_operations; +extern const struct inode_operations page_symlink_inode_operations; extern int generic_readlink(struct dentry *, char __user *, int); extern void generic_fillattr(struct inode *, struct kstat *); extern int vfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); void inode_sub_bytes(struct inode *inode, loff_t bytes); loff_t inode_get_bytes(struct inode *inode); @@ -1773,48 +2411,69 @@ void inode_set_bytes(struct inode *inode, loff_t bytes); extern int vfs_readdir(struct file *, filldir_t, void *); -extern int vfs_stat(char __user *, struct kstat *); -extern int vfs_lstat(char __user *, struct kstat *); -extern int vfs_stat_fd(int dfd, char __user *, struct kstat *); -extern int vfs_lstat_fd(int dfd, char __user *, struct kstat *); +extern int vfs_stat(const char __user *, struct kstat *); +extern int vfs_lstat(const char __user *, struct kstat *); extern int vfs_fstat(unsigned int, struct kstat *); - -extern int vfs_ioctl(struct file *, unsigned int, unsigned int, unsigned long); - +extern int vfs_fstatat(int , const char __user *, struct kstat *, int); + +extern int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, + unsigned long arg); +extern int __generic_block_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, + loff_t start, loff_t len, + get_block_t *get_block); +extern int generic_block_fiemap(struct inode *inode, + struct fiemap_extent_info *fieinfo, u64 start, + u64 len, get_block_t *get_block); + +extern void get_filesystem(struct file_system_type *fs); +extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern struct super_block *get_super(struct block_device *); +extern struct super_block *get_active_super(struct block_device *bdev); extern struct super_block *user_get_super(dev_t); extern void drop_super(struct super_block *sb); +extern void iterate_supers(void (*)(struct super_block *, void *), void *); extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, void *, filldir_t); +extern int simple_setattr(struct dentry *, struct iattr *); extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_link(struct dentry *, struct inode *, struct dentry *); extern int simple_unlink(struct inode *, struct dentry *); extern int simple_rmdir(struct inode *, struct dentry *); extern int simple_rename(struct inode *, struct dentry *, struct inode *, struct dentry *); -extern int simple_sync_file(struct file *, struct dentry *, int); +extern int noop_fsync(struct file *, int); extern int simple_empty(struct dentry *); extern int simple_readpage(struct file *file, struct page *page); -extern int simple_prepare_write(struct file *file, struct page *page, - unsigned offset, unsigned to); -extern int simple_commit_write(struct file *file, struct page *page, - unsigned offset, unsigned to); +extern int simple_write_begin(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned flags, + struct page **pagep, void **fsdata); +extern int simple_write_end(struct file *file, struct address_space *mapping, + loff_t pos, unsigned len, unsigned copied, + struct page *page, void *fsdata); extern struct dentry *simple_lookup(struct inode *, struct dentry *, struct nameidata *); extern ssize_t generic_read_dir(struct file *, char __user *, size_t, loff_t *); extern const struct file_operations simple_dir_operations; -extern struct inode_operations simple_dir_inode_operations; +extern const struct inode_operations simple_dir_inode_operations; struct tree_descr { char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); -extern int simple_fill_super(struct super_block *, int, struct tree_descr *); +extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); -extern ssize_t simple_read_from_buffer(void __user *, size_t, loff_t *, const void *, size_t); +extern ssize_t simple_read_from_buffer(void __user *to, size_t count, + loff_t *ppos, const void *from, size_t available); +extern ssize_t simple_write_to_buffer(void *to, size_t available, loff_t *ppos, + const void __user *from, size_t count); + +extern int generic_file_fsync(struct file *, int); + +extern int generic_check_addressable(unsigned, u64); #ifdef CONFIG_MIGRATION extern int buffer_migrate_page(struct address_space *, @@ -1823,24 +2482,30 @@ extern int buffer_migrate_page(struct address_space *, #define buffer_migrate_page NULL #endif -extern int inode_change_ok(struct inode *, struct iattr *); -extern int __must_check inode_setattr(struct inode *, struct iattr *); +extern int inode_change_ok(const struct inode *, struct iattr *); +extern int inode_newsize_ok(const struct inode *, loff_t offset); +extern void setattr_copy(struct inode *inode, const struct iattr *attr); extern void file_update_time(struct file *file); +extern int generic_show_options(struct seq_file *m, struct vfsmount *mnt); +extern void save_mount_options(struct super_block *sb, char *options); +extern void replace_mount_options(struct super_block *sb, char *options); + static inline ino_t parent_ino(struct dentry *dentry) { ino_t res; + /* + * Don't strictly need d_lock here? If the parent ino could change + * then surely we'd have a deeper race in the caller? + */ spin_lock(&dentry->d_lock); res = dentry->d_parent->d_inode->i_ino; spin_unlock(&dentry->d_lock); return res; } -/* kernel/fork.c */ -extern int unshare_files(void); - /* Transaction based IO helpers */ /* @@ -1860,19 +2525,7 @@ ssize_t simple_transaction_read(struct file *file, char __user *buf, size_t size, loff_t *pos); int simple_transaction_release(struct inode *inode, struct file *file); -static inline void simple_transaction_set(struct file *file, size_t n) -{ - struct simple_transaction_argresp *ar = file->private_data; - - BUG_ON(n > SIMPLE_TRANSACTION_LIMIT); - - /* - * The barrier ensures that ar->size will really remain zero until - * ar->data is ready for reading. - */ - smp_mb(); - ar->size = n; -} +void simple_transaction_set(struct file *file, size_t n); /* * simple attribute files @@ -1896,12 +2549,13 @@ static int __fops ## _open(struct inode *inode, struct file *file) \ __simple_attr_check_format(__fmt, 0ull); \ return simple_attr_open(inode, file, __get, __set, __fmt); \ } \ -static struct file_operations __fops = { \ +static const struct file_operations __fops = { \ .owner = THIS_MODULE, \ .open = __fops ## _open, \ - .release = simple_attr_close, \ + .release = simple_attr_release, \ .read = simple_attr_read, \ .write = simple_attr_write, \ + .llseek = generic_file_llseek, \ }; static inline void __attribute__((format(printf, 1, 2))) @@ -1911,34 +2565,40 @@ __simple_attr_check_format(const char *fmt, ...) } int simple_attr_open(struct inode *inode, struct file *file, - u64 (*get)(void *), void (*set)(void *, u64), + int (*get)(void *, u64 *), int (*set)(void *, u64), const char *fmt); -int simple_attr_close(struct inode *inode, struct file *file); +int simple_attr_release(struct inode *inode, struct file *file); ssize_t simple_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); ssize_t simple_attr_write(struct file *file, const char __user *buf, size_t len, loff_t *ppos); +struct ctl_table; +int proc_nr_files(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +int proc_nr_dentry(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +int proc_nr_inodes(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos); +int __init get_filesystem_list(char *buf); -#ifdef CONFIG_SECURITY -static inline char *alloc_secdata(void) -{ - return (char *)get_zeroed_page(GFP_KERNEL); -} +#define __FMODE_EXEC ((__force int) FMODE_EXEC) +#define __FMODE_NONOTIFY ((__force int) FMODE_NONOTIFY) -static inline void free_secdata(void *secdata) +#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE]) +#define OPEN_FMODE(flag) ((__force fmode_t)(((flag + 1) & O_ACCMODE) | \ + (flag & __FMODE_NONOTIFY))) + +static inline int is_sxid(mode_t mode) { - free_page((unsigned long)secdata); + return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP)); } -#else -static inline char *alloc_secdata(void) + +static inline void inode_has_no_xattr(struct inode *inode) { - return (char *)1; + if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC)) + inode->i_flags |= S_NOSEC; } -static inline void free_secdata(void *secdata) -{ } -#endif /* CONFIG_SECURITY */ - #endif /* __KERNEL__ */ #endif /* _LINUX_FS_H */ |