4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
46 static inline int cifs_convert_flags(unsigned int flags)
48 if ((flags & O_ACCMODE) == O_RDONLY)
50 else if ((flags & O_ACCMODE) == O_WRONLY)
52 else if ((flags & O_ACCMODE) == O_RDWR) {
53 /* GENERIC_ALL is too much permission to request
54 can cause unnecessary access denied on create */
55 /* return GENERIC_ALL; */
56 return (GENERIC_READ | GENERIC_WRITE);
59 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
60 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
64 static u32 cifs_posix_convert_flags(unsigned int flags)
68 if ((flags & O_ACCMODE) == O_RDONLY)
69 posix_flags = SMB_O_RDONLY;
70 else if ((flags & O_ACCMODE) == O_WRONLY)
71 posix_flags = SMB_O_WRONLY;
72 else if ((flags & O_ACCMODE) == O_RDWR)
73 posix_flags = SMB_O_RDWR;
76 posix_flags |= SMB_O_CREAT;
78 posix_flags |= SMB_O_EXCL;
80 posix_flags |= SMB_O_TRUNC;
81 /* be safe and imply O_SYNC for O_DSYNC */
83 posix_flags |= SMB_O_SYNC;
84 if (flags & O_DIRECTORY)
85 posix_flags |= SMB_O_DIRECTORY;
86 if (flags & O_NOFOLLOW)
87 posix_flags |= SMB_O_NOFOLLOW;
89 posix_flags |= SMB_O_DIRECT;
94 static inline int cifs_get_disposition(unsigned int flags)
96 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
99 return FILE_OVERWRITE_IF;
100 else if ((flags & O_CREAT) == O_CREAT)
102 else if ((flags & O_TRUNC) == O_TRUNC)
103 return FILE_OVERWRITE;
108 int cifs_posix_open(char *full_path, struct inode **pinode,
109 struct super_block *sb, int mode, unsigned int f_flags,
110 __u32 *poplock, __u16 *pnetfid, int xid)
113 FILE_UNIX_BASIC_INFO *presp_data;
114 __u32 posix_flags = 0;
115 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
116 struct cifs_fattr fattr;
117 struct tcon_link *tlink;
118 struct cifs_tcon *tcon;
120 cFYI(1, "posix open %s", full_path);
122 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
123 if (presp_data == NULL)
126 tlink = cifs_sb_tlink(cifs_sb);
132 tcon = tlink_tcon(tlink);
133 mode &= ~current_umask();
135 posix_flags = cifs_posix_convert_flags(f_flags);
136 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
137 poplock, full_path, cifs_sb->local_nls,
138 cifs_sb->mnt_cifs_flags &
139 CIFS_MOUNT_MAP_SPECIAL_CHR);
140 cifs_put_tlink(tlink);
145 if (presp_data->Type == cpu_to_le32(-1))
146 goto posix_open_ret; /* open ok, caller does qpathinfo */
149 goto posix_open_ret; /* caller does not need info */
151 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
153 /* get new inode and set it up */
154 if (*pinode == NULL) {
155 cifs_fill_uniqueid(sb, &fattr);
156 *pinode = cifs_iget(sb, &fattr);
162 cifs_fattr_to_inode(*pinode, &fattr);
171 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
172 struct cifs_tcon *tcon, unsigned int f_flags, __u32 *poplock,
173 __u16 *pnetfid, int xid)
178 int create_options = CREATE_NOT_DIR;
181 desiredAccess = cifs_convert_flags(f_flags);
183 /*********************************************************************
184 * open flag mapping table:
186 * POSIX Flag CIFS Disposition
187 * ---------- ----------------
188 * O_CREAT FILE_OPEN_IF
189 * O_CREAT | O_EXCL FILE_CREATE
190 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
191 * O_TRUNC FILE_OVERWRITE
192 * none of the above FILE_OPEN
194 * Note that there is not a direct match between disposition
195 * FILE_SUPERSEDE (ie create whether or not file exists although
196 * O_CREAT | O_TRUNC is similar but truncates the existing
197 * file rather than creating a new file as FILE_SUPERSEDE does
198 * (which uses the attributes / metadata passed in on open call)
200 *? O_SYNC is a reasonable match to CIFS writethrough flag
201 *? and the read write flags match reasonably. O_LARGEFILE
202 *? is irrelevant because largefile support is always used
203 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
204 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
205 *********************************************************************/
207 disposition = cifs_get_disposition(f_flags);
209 /* BB pass O_SYNC flag through on file attributes .. BB */
211 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
215 if (backup_cred(cifs_sb))
216 create_options |= CREATE_OPEN_BACKUP_INTENT;
218 if (tcon->ses->capabilities & CAP_NT_SMBS)
219 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
220 desiredAccess, create_options, pnetfid, poplock, buf,
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
224 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
225 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
226 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
227 & CIFS_MOUNT_MAP_SPECIAL_CHR);
233 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
236 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
244 struct cifsFileInfo *
245 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
246 struct tcon_link *tlink, __u32 oplock)
248 struct dentry *dentry = file->f_path.dentry;
249 struct inode *inode = dentry->d_inode;
250 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
251 struct cifsFileInfo *pCifsFile;
253 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
254 if (pCifsFile == NULL)
257 pCifsFile->count = 1;
258 pCifsFile->netfid = fileHandle;
259 pCifsFile->pid = current->tgid;
260 pCifsFile->uid = current_fsuid();
261 pCifsFile->dentry = dget(dentry);
262 pCifsFile->f_flags = file->f_flags;
263 pCifsFile->invalidHandle = false;
264 pCifsFile->tlink = cifs_get_tlink(tlink);
265 mutex_init(&pCifsFile->fh_mutex);
266 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
268 spin_lock(&cifs_file_list_lock);
269 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
270 /* if readable file instance put first in list*/
271 if (file->f_mode & FMODE_READ)
272 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
274 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
275 spin_unlock(&cifs_file_list_lock);
277 cifs_set_oplock_level(pCifsInode, oplock);
279 file->private_data = pCifsFile;
284 * Release a reference on the file private data. This may involve closing
285 * the filehandle out on the server. Must be called without holding
286 * cifs_file_list_lock.
288 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
290 struct inode *inode = cifs_file->dentry->d_inode;
291 struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
292 struct cifsInodeInfo *cifsi = CIFS_I(inode);
293 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
294 struct cifsLockInfo *li, *tmp;
296 spin_lock(&cifs_file_list_lock);
297 if (--cifs_file->count > 0) {
298 spin_unlock(&cifs_file_list_lock);
302 /* remove it from the lists */
303 list_del(&cifs_file->flist);
304 list_del(&cifs_file->tlist);
306 if (list_empty(&cifsi->openFileList)) {
307 cFYI(1, "closing last open instance for inode %p",
308 cifs_file->dentry->d_inode);
310 /* in strict cache mode we need invalidate mapping on the last
311 close because it may cause a error when we open this file
312 again and get at least level II oplock */
313 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
314 CIFS_I(inode)->invalid_mapping = true;
316 cifs_set_oplock_level(cifsi, 0);
318 spin_unlock(&cifs_file_list_lock);
320 cancel_work_sync(&cifs_file->oplock_break);
322 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
326 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
330 /* Delete any outstanding lock records. We'll lose them when the file
333 mutex_lock(&cifsi->lock_mutex);
334 list_for_each_entry_safe(li, tmp, &cifsi->llist, llist) {
335 if (li->netfid != cifs_file->netfid)
337 list_del(&li->llist);
340 mutex_unlock(&cifsi->lock_mutex);
342 cifs_put_tlink(cifs_file->tlink);
343 dput(cifs_file->dentry);
347 int cifs_open(struct inode *inode, struct file *file)
352 struct cifs_sb_info *cifs_sb;
353 struct cifs_tcon *tcon;
354 struct tcon_link *tlink;
355 struct cifsFileInfo *pCifsFile = NULL;
356 char *full_path = NULL;
357 bool posix_open_ok = false;
362 cifs_sb = CIFS_SB(inode->i_sb);
363 tlink = cifs_sb_tlink(cifs_sb);
366 return PTR_ERR(tlink);
368 tcon = tlink_tcon(tlink);
370 full_path = build_path_from_dentry(file->f_path.dentry);
371 if (full_path == NULL) {
376 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
377 inode, file->f_flags, full_path);
384 if (!tcon->broken_posix_open && tcon->unix_ext &&
385 (tcon->ses->capabilities & CAP_UNIX) &&
386 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
387 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
388 /* can not refresh inode info since size could be stale */
389 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
390 cifs_sb->mnt_file_mode /* ignored */,
391 file->f_flags, &oplock, &netfid, xid);
393 cFYI(1, "posix open succeeded");
394 posix_open_ok = true;
395 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
396 if (tcon->ses->serverNOS)
397 cERROR(1, "server %s of type %s returned"
398 " unexpected error on SMB posix open"
399 ", disabling posix open support."
400 " Check if server update available.",
401 tcon->ses->serverName,
402 tcon->ses->serverNOS);
403 tcon->broken_posix_open = true;
404 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
405 (rc != -EOPNOTSUPP)) /* path not found or net err */
407 /* else fallthrough to retry open the old way on network i/o
411 if (!posix_open_ok) {
412 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
413 file->f_flags, &oplock, &netfid, xid);
418 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
419 if (pCifsFile == NULL) {
420 CIFSSMBClose(xid, tcon, netfid);
425 cifs_fscache_set_inode_cookie(inode, file);
427 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
428 /* time to set mode which we can not set earlier due to
429 problems creating new read-only files */
430 struct cifs_unix_set_info_args args = {
431 .mode = inode->i_mode,
434 .ctime = NO_CHANGE_64,
435 .atime = NO_CHANGE_64,
436 .mtime = NO_CHANGE_64,
439 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
446 cifs_put_tlink(tlink);
450 /* Try to reacquire byte range locks that were released when session */
451 /* to server was lost */
452 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
456 /* BB list all locks open on this file and relock */
461 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
466 struct cifs_sb_info *cifs_sb;
467 struct cifs_tcon *tcon;
468 struct cifsInodeInfo *pCifsInode;
470 char *full_path = NULL;
472 int disposition = FILE_OPEN;
473 int create_options = CREATE_NOT_DIR;
477 mutex_lock(&pCifsFile->fh_mutex);
478 if (!pCifsFile->invalidHandle) {
479 mutex_unlock(&pCifsFile->fh_mutex);
485 inode = pCifsFile->dentry->d_inode;
486 cifs_sb = CIFS_SB(inode->i_sb);
487 tcon = tlink_tcon(pCifsFile->tlink);
489 /* can not grab rename sem here because various ops, including
490 those that already have the rename sem can end up causing writepage
491 to get called and if the server was down that means we end up here,
492 and we can never tell if the caller already has the rename_sem */
493 full_path = build_path_from_dentry(pCifsFile->dentry);
494 if (full_path == NULL) {
496 mutex_unlock(&pCifsFile->fh_mutex);
501 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
502 inode, pCifsFile->f_flags, full_path);
509 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
510 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
511 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
514 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
515 * original open. Must mask them off for a reopen.
517 unsigned int oflags = pCifsFile->f_flags &
518 ~(O_CREAT | O_EXCL | O_TRUNC);
520 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
521 cifs_sb->mnt_file_mode /* ignored */,
522 oflags, &oplock, &netfid, xid);
524 cFYI(1, "posix reopen succeeded");
527 /* fallthrough to retry open the old way on errors, especially
528 in the reconnect path it is important to retry hard */
531 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
533 if (backup_cred(cifs_sb))
534 create_options |= CREATE_OPEN_BACKUP_INTENT;
536 /* Can not refresh inode by passing in file_info buf to be returned
537 by SMBOpen and then calling get_inode_info with returned buf
538 since file might have write behind data that needs to be flushed
539 and server version of file size can be stale. If we knew for sure
540 that inode was not dirty locally we could do this */
542 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
543 create_options, &netfid, &oplock, NULL,
544 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
545 CIFS_MOUNT_MAP_SPECIAL_CHR);
547 mutex_unlock(&pCifsFile->fh_mutex);
548 cFYI(1, "cifs_open returned 0x%x", rc);
549 cFYI(1, "oplock: %d", oplock);
550 goto reopen_error_exit;
554 pCifsFile->netfid = netfid;
555 pCifsFile->invalidHandle = false;
556 mutex_unlock(&pCifsFile->fh_mutex);
557 pCifsInode = CIFS_I(inode);
560 rc = filemap_write_and_wait(inode->i_mapping);
561 mapping_set_error(inode->i_mapping, rc);
564 rc = cifs_get_inode_info_unix(&inode,
565 full_path, inode->i_sb, xid);
567 rc = cifs_get_inode_info(&inode,
568 full_path, NULL, inode->i_sb,
570 } /* else we are writing out data to server already
571 and could deadlock if we tried to flush data, and
572 since we do not know if we have data that would
573 invalidate the current end of file on the server
574 we can not go to the server to get the new inod
577 cifs_set_oplock_level(pCifsInode, oplock);
579 cifs_relock_file(pCifsFile);
587 int cifs_close(struct inode *inode, struct file *file)
589 if (file->private_data != NULL) {
590 cifsFileInfo_put(file->private_data);
591 file->private_data = NULL;
594 /* return code from the ->release op is always ignored */
598 int cifs_closedir(struct inode *inode, struct file *file)
602 struct cifsFileInfo *pCFileStruct = file->private_data;
605 cFYI(1, "Closedir inode = 0x%p", inode);
610 struct cifs_tcon *pTcon = tlink_tcon(pCFileStruct->tlink);
612 cFYI(1, "Freeing private data in close dir");
613 spin_lock(&cifs_file_list_lock);
614 if (!pCFileStruct->srch_inf.endOfSearch &&
615 !pCFileStruct->invalidHandle) {
616 pCFileStruct->invalidHandle = true;
617 spin_unlock(&cifs_file_list_lock);
618 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
619 cFYI(1, "Closing uncompleted readdir with rc %d",
621 /* not much we can do if it fails anyway, ignore rc */
624 spin_unlock(&cifs_file_list_lock);
625 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
627 cFYI(1, "closedir free smb buf in srch struct");
628 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
629 if (pCFileStruct->srch_inf.smallBuf)
630 cifs_small_buf_release(ptmp);
632 cifs_buf_release(ptmp);
634 cifs_put_tlink(pCFileStruct->tlink);
635 kfree(file->private_data);
636 file->private_data = NULL;
638 /* BB can we lock the filestruct while this is going on? */
643 static int store_file_lock(struct cifsInodeInfo *cinode, __u64 len,
644 __u64 offset, __u8 type, __u16 netfid)
646 struct cifsLockInfo *li =
647 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
654 li->pid = current->tgid;
655 mutex_lock(&cinode->lock_mutex);
656 list_add_tail(&li->llist, &cinode->llist);
657 mutex_unlock(&cinode->lock_mutex);
662 cifs_read_flock(struct file_lock *flock, __u8 *type, int *lock, int *unlock,
665 if (flock->fl_flags & FL_POSIX)
667 if (flock->fl_flags & FL_FLOCK)
669 if (flock->fl_flags & FL_SLEEP) {
670 cFYI(1, "Blocking lock");
673 if (flock->fl_flags & FL_ACCESS)
674 cFYI(1, "Process suspended by mandatory locking - "
675 "not implemented yet");
676 if (flock->fl_flags & FL_LEASE)
677 cFYI(1, "Lease on file - not implemented yet");
678 if (flock->fl_flags &
679 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
680 cFYI(1, "Unknown lock flags 0x%x", flock->fl_flags);
682 *type = LOCKING_ANDX_LARGE_FILES;
683 if (flock->fl_type == F_WRLCK) {
686 } else if (flock->fl_type == F_UNLCK) {
689 /* Check if unlock includes more than one lock range */
690 } else if (flock->fl_type == F_RDLCK) {
692 *type |= LOCKING_ANDX_SHARED_LOCK;
694 } else if (flock->fl_type == F_EXLCK) {
697 } else if (flock->fl_type == F_SHLCK) {
699 *type |= LOCKING_ANDX_SHARED_LOCK;
702 cFYI(1, "Unknown type of lock");
706 cifs_getlk(struct cifsFileInfo *cfile, struct file_lock *flock, __u8 type,
707 bool wait_flag, bool posix_lck, int xid)
710 __u64 length = 1 + flock->fl_end - flock->fl_start;
711 __u16 netfid = cfile->netfid;
712 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
716 if (type & LOCKING_ANDX_SHARED_LOCK)
717 posix_lock_type = CIFS_RDLCK;
719 posix_lock_type = CIFS_WRLCK;
720 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
721 length, flock, posix_lock_type,
726 /* BB we could chain these into one lock request BB */
727 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
728 flock->fl_start, 0, 1, type, 0, 0);
730 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid,
731 length, flock->fl_start, 1, 0,
733 flock->fl_type = F_UNLCK;
735 cERROR(1, "Error unlocking previously locked "
736 "range %d during test of lock", rc);
741 if (type & LOCKING_ANDX_SHARED_LOCK) {
742 flock->fl_type = F_WRLCK;
747 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
748 flock->fl_start, 0, 1,
749 type | LOCKING_ANDX_SHARED_LOCK, 0, 0);
751 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid,
752 length, flock->fl_start, 1, 0,
753 type | LOCKING_ANDX_SHARED_LOCK,
755 flock->fl_type = F_RDLCK;
757 cERROR(1, "Error unlocking previously locked "
758 "range %d during test of lock", rc);
760 flock->fl_type = F_WRLCK;
767 cifs_setlk(struct file *file, struct file_lock *flock, __u8 type,
768 bool wait_flag, bool posix_lck, int lock, int unlock, int xid)
771 __u64 length = 1 + flock->fl_end - flock->fl_start;
772 struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
773 struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
774 struct cifsInodeInfo *cinode = CIFS_I(file->f_path.dentry->d_inode);
775 __u16 netfid = cfile->netfid;
779 if (type & LOCKING_ANDX_SHARED_LOCK)
780 posix_lock_type = CIFS_RDLCK;
782 posix_lock_type = CIFS_WRLCK;
785 posix_lock_type = CIFS_UNLCK;
787 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */, length,
788 flock, posix_lock_type, wait_flag);
793 rc = CIFSSMBLock(xid, tcon, netfid, current->tgid, length,
794 flock->fl_start, 0, lock, type, wait_flag, 0);
796 /* For Windows locks we must store them. */
797 rc = store_file_lock(cinode, length, flock->fl_start,
802 * For each stored lock that this unlock overlaps completely,
806 struct cifsLockInfo *li, *tmp;
808 mutex_lock(&cinode->lock_mutex);
809 list_for_each_entry_safe(li, tmp, &cinode->llist, llist) {
810 if (flock->fl_start > li->offset ||
811 (flock->fl_start + length) <
812 (li->offset + li->length))
814 if (current->tgid != li->pid)
816 if (cfile->netfid != li->netfid)
819 stored_rc = CIFSSMBLock(xid, tcon, netfid,
820 current->tgid, li->length,
821 li->offset, 1, 0, li->type,
826 list_del(&li->llist);
830 mutex_unlock(&cinode->lock_mutex);
833 if (flock->fl_flags & FL_POSIX)
834 posix_lock_file_wait(file, flock);
838 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
841 int lock = 0, unlock = 0;
842 bool wait_flag = false;
843 bool posix_lck = false;
844 struct cifs_sb_info *cifs_sb;
845 struct cifs_tcon *tcon;
846 struct cifsInodeInfo *cinode;
847 struct cifsFileInfo *cfile;
854 cFYI(1, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld "
855 "end: %lld", cmd, flock->fl_flags, flock->fl_type,
856 flock->fl_start, flock->fl_end);
858 cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag);
860 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
861 cfile = (struct cifsFileInfo *)file->private_data;
862 tcon = tlink_tcon(cfile->tlink);
863 netfid = cfile->netfid;
864 cinode = CIFS_I(file->f_path.dentry->d_inode);
866 if ((tcon->ses->capabilities & CAP_UNIX) &&
867 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
868 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
871 * BB add code here to normalize offset and length to account for
872 * negative length which we can not accept over the wire.
875 rc = cifs_getlk(cfile, flock, type, wait_flag, posix_lck, xid);
880 if (!lock && !unlock) {
882 * if no lock or unlock then nothing to do since we do not
889 rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
895 /* update the file size (if needed) after a write */
897 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
898 unsigned int bytes_written)
900 loff_t end_of_write = offset + bytes_written;
902 if (end_of_write > cifsi->server_eof)
903 cifsi->server_eof = end_of_write;
906 static ssize_t cifs_write(struct cifsFileInfo *open_file, __u32 pid,
907 const char *write_data, size_t write_size,
911 unsigned int bytes_written = 0;
912 unsigned int total_written;
913 struct cifs_sb_info *cifs_sb;
914 struct cifs_tcon *pTcon;
916 struct dentry *dentry = open_file->dentry;
917 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
918 struct cifs_io_parms io_parms;
920 cifs_sb = CIFS_SB(dentry->d_sb);
922 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
923 *poffset, dentry->d_name.name);
925 pTcon = tlink_tcon(open_file->tlink);
929 for (total_written = 0; write_size > total_written;
930 total_written += bytes_written) {
932 while (rc == -EAGAIN) {
936 if (open_file->invalidHandle) {
937 /* we could deadlock if we called
938 filemap_fdatawait from here so tell
939 reopen_file not to flush data to
941 rc = cifs_reopen_file(open_file, false);
946 len = min((size_t)cifs_sb->wsize,
947 write_size - total_written);
948 /* iov[0] is reserved for smb header */
949 iov[1].iov_base = (char *)write_data + total_written;
950 iov[1].iov_len = len;
951 io_parms.netfid = open_file->netfid;
953 io_parms.tcon = pTcon;
954 io_parms.offset = *poffset;
955 io_parms.length = len;
956 rc = CIFSSMBWrite2(xid, &io_parms, &bytes_written, iov,
959 if (rc || (bytes_written == 0)) {
967 cifs_update_eof(cifsi, *poffset, bytes_written);
968 *poffset += bytes_written;
972 cifs_stats_bytes_written(pTcon, total_written);
974 if (total_written > 0) {
975 spin_lock(&dentry->d_inode->i_lock);
976 if (*poffset > dentry->d_inode->i_size)
977 i_size_write(dentry->d_inode, *poffset);
978 spin_unlock(&dentry->d_inode->i_lock);
980 mark_inode_dirty_sync(dentry->d_inode);
982 return total_written;
985 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
988 struct cifsFileInfo *open_file = NULL;
989 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
991 /* only filter by fsuid on multiuser mounts */
992 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
995 spin_lock(&cifs_file_list_lock);
996 /* we could simply get the first_list_entry since write-only entries
997 are always at the end of the list but since the first entry might
998 have a close pending, we go through the whole list */
999 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1000 if (fsuid_only && open_file->uid != current_fsuid())
1002 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1003 if (!open_file->invalidHandle) {
1004 /* found a good file */
1005 /* lock it so it will not be closed on us */
1006 cifsFileInfo_get(open_file);
1007 spin_unlock(&cifs_file_list_lock);
1009 } /* else might as well continue, and look for
1010 another, or simply have the caller reopen it
1011 again rather than trying to fix this handle */
1012 } else /* write only file */
1013 break; /* write only files are last so must be done */
1015 spin_unlock(&cifs_file_list_lock);
1019 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1022 struct cifsFileInfo *open_file;
1023 struct cifs_sb_info *cifs_sb;
1024 bool any_available = false;
1027 /* Having a null inode here (because mapping->host was set to zero by
1028 the VFS or MM) should not happen but we had reports of on oops (due to
1029 it being zero) during stress testcases so we need to check for it */
1031 if (cifs_inode == NULL) {
1032 cERROR(1, "Null inode passed to cifs_writeable_file");
1037 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1039 /* only filter by fsuid on multiuser mounts */
1040 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1043 spin_lock(&cifs_file_list_lock);
1045 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1046 if (!any_available && open_file->pid != current->tgid)
1048 if (fsuid_only && open_file->uid != current_fsuid())
1050 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1051 cifsFileInfo_get(open_file);
1053 if (!open_file->invalidHandle) {
1054 /* found a good writable file */
1055 spin_unlock(&cifs_file_list_lock);
1059 spin_unlock(&cifs_file_list_lock);
1061 /* Had to unlock since following call can block */
1062 rc = cifs_reopen_file(open_file, false);
1066 /* if it fails, try another handle if possible */
1067 cFYI(1, "wp failed on reopen file");
1068 cifsFileInfo_put(open_file);
1070 spin_lock(&cifs_file_list_lock);
1072 /* else we simply continue to the next entry. Thus
1073 we do not loop on reopen errors. If we
1074 can not reopen the file, for example if we
1075 reconnected to a server with another client
1076 racing to delete or lock the file we would not
1077 make progress if we restarted before the beginning
1078 of the loop here. */
1081 /* couldn't find useable FH with same pid, try any available */
1082 if (!any_available) {
1083 any_available = true;
1084 goto refind_writable;
1086 spin_unlock(&cifs_file_list_lock);
1090 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1092 struct address_space *mapping = page->mapping;
1093 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1096 int bytes_written = 0;
1097 struct inode *inode;
1098 struct cifsFileInfo *open_file;
1100 if (!mapping || !mapping->host)
1103 inode = page->mapping->host;
1105 offset += (loff_t)from;
1106 write_data = kmap(page);
1109 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1114 /* racing with truncate? */
1115 if (offset > mapping->host->i_size) {
1117 return 0; /* don't care */
1120 /* check to make sure that we are not extending the file */
1121 if (mapping->host->i_size - offset < (loff_t)to)
1122 to = (unsigned)(mapping->host->i_size - offset);
1124 open_file = find_writable_file(CIFS_I(mapping->host), false);
1126 bytes_written = cifs_write(open_file, open_file->pid,
1127 write_data, to - from, &offset);
1128 cifsFileInfo_put(open_file);
1129 /* Does mm or vfs already set times? */
1130 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1131 if ((bytes_written > 0) && (offset))
1133 else if (bytes_written < 0)
1136 cFYI(1, "No writeable filehandles for inode");
1144 static int cifs_writepages(struct address_space *mapping,
1145 struct writeback_control *wbc)
1147 struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1148 bool done = false, scanned = false, range_whole = false;
1150 struct cifs_writedata *wdata;
1155 * If wsize is smaller than the page cache size, default to writing
1156 * one page at a time via cifs_writepage
1158 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1159 return generic_writepages(mapping, wbc);
1161 if (wbc->range_cyclic) {
1162 index = mapping->writeback_index; /* Start from prev offset */
1165 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1166 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1167 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1172 while (!done && index <= end) {
1173 unsigned int i, nr_pages, found_pages;
1174 pgoff_t next = 0, tofind;
1175 struct page **pages;
1177 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1180 wdata = cifs_writedata_alloc((unsigned int)tofind);
1187 * find_get_pages_tag seems to return a max of 256 on each
1188 * iteration, so we must call it several times in order to
1189 * fill the array or the wsize is effectively limited to
1190 * 256 * PAGE_CACHE_SIZE.
1193 pages = wdata->pages;
1195 nr_pages = find_get_pages_tag(mapping, &index,
1196 PAGECACHE_TAG_DIRTY,
1198 found_pages += nr_pages;
1201 } while (nr_pages && tofind && index <= end);
1203 if (found_pages == 0) {
1204 kref_put(&wdata->refcount, cifs_writedata_release);
1209 for (i = 0; i < found_pages; i++) {
1210 page = wdata->pages[i];
1212 * At this point we hold neither mapping->tree_lock nor
1213 * lock on the page itself: the page may be truncated or
1214 * invalidated (changing page->mapping to NULL), or even
1215 * swizzled back from swapper_space to tmpfs file
1221 else if (!trylock_page(page))
1224 if (unlikely(page->mapping != mapping)) {
1229 if (!wbc->range_cyclic && page->index > end) {
1235 if (next && (page->index != next)) {
1236 /* Not next consecutive page */
1241 if (wbc->sync_mode != WB_SYNC_NONE)
1242 wait_on_page_writeback(page);
1244 if (PageWriteback(page) ||
1245 !clear_page_dirty_for_io(page)) {
1251 * This actually clears the dirty bit in the radix tree.
1252 * See cifs_writepage() for more commentary.
1254 set_page_writeback(page);
1256 if (page_offset(page) >= mapping->host->i_size) {
1259 end_page_writeback(page);
1263 wdata->pages[i] = page;
1264 next = page->index + 1;
1268 /* reset index to refind any pages skipped */
1270 index = wdata->pages[0]->index + 1;
1272 /* put any pages we aren't going to use */
1273 for (i = nr_pages; i < found_pages; i++) {
1274 page_cache_release(wdata->pages[i]);
1275 wdata->pages[i] = NULL;
1278 /* nothing to write? */
1279 if (nr_pages == 0) {
1280 kref_put(&wdata->refcount, cifs_writedata_release);
1284 wdata->sync_mode = wbc->sync_mode;
1285 wdata->nr_pages = nr_pages;
1286 wdata->offset = page_offset(wdata->pages[0]);
1289 if (wdata->cfile != NULL)
1290 cifsFileInfo_put(wdata->cfile);
1291 wdata->cfile = find_writable_file(CIFS_I(mapping->host),
1293 if (!wdata->cfile) {
1294 cERROR(1, "No writable handles for inode");
1298 rc = cifs_async_writev(wdata);
1299 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
1301 for (i = 0; i < nr_pages; ++i)
1302 unlock_page(wdata->pages[i]);
1304 /* send failure -- clean up the mess */
1306 for (i = 0; i < nr_pages; ++i) {
1308 redirty_page_for_writepage(wbc,
1311 SetPageError(wdata->pages[i]);
1312 end_page_writeback(wdata->pages[i]);
1313 page_cache_release(wdata->pages[i]);
1316 mapping_set_error(mapping, rc);
1318 kref_put(&wdata->refcount, cifs_writedata_release);
1320 wbc->nr_to_write -= nr_pages;
1321 if (wbc->nr_to_write <= 0)
1327 if (!scanned && !done) {
1329 * We hit the last page and there is more work to be done: wrap
1330 * back to the start of the file
1337 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1338 mapping->writeback_index = index;
1344 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
1350 /* BB add check for wbc flags */
1351 page_cache_get(page);
1352 if (!PageUptodate(page))
1353 cFYI(1, "ppw - page not up to date");
1356 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1358 * A writepage() implementation always needs to do either this,
1359 * or re-dirty the page with "redirty_page_for_writepage()" in
1360 * the case of a failure.
1362 * Just unlocking the page will cause the radix tree tag-bits
1363 * to fail to update with the state of the page correctly.
1365 set_page_writeback(page);
1367 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1368 if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
1370 else if (rc == -EAGAIN)
1371 redirty_page_for_writepage(wbc, page);
1375 SetPageUptodate(page);
1376 end_page_writeback(page);
1377 page_cache_release(page);
1382 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1384 int rc = cifs_writepage_locked(page, wbc);
1389 static int cifs_write_end(struct file *file, struct address_space *mapping,
1390 loff_t pos, unsigned len, unsigned copied,
1391 struct page *page, void *fsdata)
1394 struct inode *inode = mapping->host;
1395 struct cifsFileInfo *cfile = file->private_data;
1396 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1399 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1402 pid = current->tgid;
1404 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1407 if (PageChecked(page)) {
1409 SetPageUptodate(page);
1410 ClearPageChecked(page);
1411 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1412 SetPageUptodate(page);
1414 if (!PageUptodate(page)) {
1416 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1420 /* this is probably better than directly calling
1421 partialpage_write since in this function the file handle is
1422 known which we might as well leverage */
1423 /* BB check if anything else missing out of ppw
1424 such as updating last write time */
1425 page_data = kmap(page);
1426 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
1427 /* if (rc < 0) should we set writebehind rc? */
1434 set_page_dirty(page);
1438 spin_lock(&inode->i_lock);
1439 if (pos > inode->i_size)
1440 i_size_write(inode, pos);
1441 spin_unlock(&inode->i_lock);
1445 page_cache_release(page);
1450 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
1455 struct cifs_tcon *tcon;
1456 struct cifsFileInfo *smbfile = file->private_data;
1457 struct inode *inode = file->f_path.dentry->d_inode;
1458 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1460 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1463 mutex_lock(&inode->i_mutex);
1467 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1468 file->f_path.dentry->d_name.name, datasync);
1470 if (!CIFS_I(inode)->clientCanCacheRead) {
1471 rc = cifs_invalidate_mapping(inode);
1473 cFYI(1, "rc: %d during invalidate phase", rc);
1474 rc = 0; /* don't care about it in fsync */
1478 tcon = tlink_tcon(smbfile->tlink);
1479 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1480 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1483 mutex_unlock(&inode->i_mutex);
1487 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
1491 struct cifs_tcon *tcon;
1492 struct cifsFileInfo *smbfile = file->private_data;
1493 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1494 struct inode *inode = file->f_mapping->host;
1496 rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
1499 mutex_lock(&inode->i_mutex);
1503 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1504 file->f_path.dentry->d_name.name, datasync);
1506 tcon = tlink_tcon(smbfile->tlink);
1507 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1508 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1511 mutex_unlock(&inode->i_mutex);
1516 * As file closes, flush all cached write data for this inode checking
1517 * for write behind errors.
1519 int cifs_flush(struct file *file, fl_owner_t id)
1521 struct inode *inode = file->f_path.dentry->d_inode;
1524 if (file->f_mode & FMODE_WRITE)
1525 rc = filemap_write_and_wait(inode->i_mapping);
1527 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1533 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
1538 for (i = 0; i < num_pages; i++) {
1539 pages[i] = alloc_page(__GFP_HIGHMEM);
1542 * save number of pages we have already allocated and
1543 * return with ENOMEM error
1554 for (i = 0; i < num_pages; i++)
1560 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
1565 clen = min_t(const size_t, len, wsize);
1566 num_pages = clen / PAGE_CACHE_SIZE;
1567 if (clen % PAGE_CACHE_SIZE)
1577 cifs_iovec_write(struct file *file, const struct iovec *iov,
1578 unsigned long nr_segs, loff_t *poffset)
1580 unsigned int written;
1581 unsigned long num_pages, npages, i;
1582 size_t copied, len, cur_len;
1583 ssize_t total_written = 0;
1584 struct kvec *to_send;
1585 struct page **pages;
1587 struct inode *inode;
1588 struct cifsFileInfo *open_file;
1589 struct cifs_tcon *pTcon;
1590 struct cifs_sb_info *cifs_sb;
1591 struct cifs_io_parms io_parms;
1595 len = iov_length(iov, nr_segs);
1599 rc = generic_write_checks(file, poffset, &len, 0);
1603 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1604 num_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
1606 pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL);
1610 to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL);
1616 rc = cifs_write_allocate_pages(pages, num_pages);
1624 open_file = file->private_data;
1626 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1627 pid = open_file->pid;
1629 pid = current->tgid;
1631 pTcon = tlink_tcon(open_file->tlink);
1632 inode = file->f_path.dentry->d_inode;
1634 iov_iter_init(&it, iov, nr_segs, len, 0);
1638 size_t save_len = cur_len;
1639 for (i = 0; i < npages; i++) {
1640 copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE);
1641 copied = iov_iter_copy_from_user(pages[i], &it, 0,
1644 iov_iter_advance(&it, copied);
1645 to_send[i+1].iov_base = kmap(pages[i]);
1646 to_send[i+1].iov_len = copied;
1649 cur_len = save_len - cur_len;
1652 if (open_file->invalidHandle) {
1653 rc = cifs_reopen_file(open_file, false);
1657 io_parms.netfid = open_file->netfid;
1659 io_parms.tcon = pTcon;
1660 io_parms.offset = *poffset;
1661 io_parms.length = cur_len;
1662 rc = CIFSSMBWrite2(xid, &io_parms, &written, to_send,
1664 } while (rc == -EAGAIN);
1666 for (i = 0; i < npages; i++)
1671 total_written += written;
1672 cifs_update_eof(CIFS_I(inode), *poffset, written);
1673 *poffset += written;
1674 } else if (rc < 0) {
1680 /* get length and number of kvecs of the next write */
1681 npages = get_numpages(cifs_sb->wsize, len, &cur_len);
1684 if (total_written > 0) {
1685 spin_lock(&inode->i_lock);
1686 if (*poffset > inode->i_size)
1687 i_size_write(inode, *poffset);
1688 spin_unlock(&inode->i_lock);
1691 cifs_stats_bytes_written(pTcon, total_written);
1692 mark_inode_dirty_sync(inode);
1694 for (i = 0; i < num_pages; i++)
1699 return total_written;
1702 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
1703 unsigned long nr_segs, loff_t pos)
1706 struct inode *inode;
1708 inode = iocb->ki_filp->f_path.dentry->d_inode;
1711 * BB - optimize the way when signing is disabled. We can drop this
1712 * extra memory-to-memory copying and use iovec buffers for constructing
1716 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
1718 CIFS_I(inode)->invalid_mapping = true;
1725 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
1726 unsigned long nr_segs, loff_t pos)
1728 struct inode *inode;
1730 inode = iocb->ki_filp->f_path.dentry->d_inode;
1732 if (CIFS_I(inode)->clientCanCacheAll)
1733 return generic_file_aio_write(iocb, iov, nr_segs, pos);
1736 * In strict cache mode we need to write the data to the server exactly
1737 * from the pos to pos+len-1 rather than flush all affected pages
1738 * because it may cause a error with mandatory locks on these pages but
1739 * not on the region from pos to ppos+len-1.
1742 return cifs_user_writev(iocb, iov, nr_segs, pos);
1746 cifs_iovec_read(struct file *file, const struct iovec *iov,
1747 unsigned long nr_segs, loff_t *poffset)
1752 unsigned int bytes_read = 0;
1753 size_t len, cur_len;
1755 struct cifs_sb_info *cifs_sb;
1756 struct cifs_tcon *pTcon;
1757 struct cifsFileInfo *open_file;
1758 struct smb_com_read_rsp *pSMBr;
1759 struct cifs_io_parms io_parms;
1767 len = iov_length(iov, nr_segs);
1772 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1774 /* FIXME: set up handlers for larger reads and/or convert to async */
1775 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
1777 open_file = file->private_data;
1778 pTcon = tlink_tcon(open_file->tlink);
1780 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1781 pid = open_file->pid;
1783 pid = current->tgid;
1785 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1786 cFYI(1, "attempting read on write only file instance");
1788 for (total_read = 0; total_read < len; total_read += bytes_read) {
1789 cur_len = min_t(const size_t, len - total_read, rsize);
1793 while (rc == -EAGAIN) {
1794 int buf_type = CIFS_NO_BUFFER;
1795 if (open_file->invalidHandle) {
1796 rc = cifs_reopen_file(open_file, true);
1800 io_parms.netfid = open_file->netfid;
1802 io_parms.tcon = pTcon;
1803 io_parms.offset = *poffset;
1804 io_parms.length = cur_len;
1805 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1806 &read_data, &buf_type);
1807 pSMBr = (struct smb_com_read_rsp *)read_data;
1809 char *data_offset = read_data + 4 +
1810 le16_to_cpu(pSMBr->DataOffset);
1811 if (memcpy_toiovecend(iov, data_offset,
1812 iov_offset, bytes_read))
1814 if (buf_type == CIFS_SMALL_BUFFER)
1815 cifs_small_buf_release(read_data);
1816 else if (buf_type == CIFS_LARGE_BUFFER)
1817 cifs_buf_release(read_data);
1819 iov_offset += bytes_read;
1823 if (rc || (bytes_read == 0)) {
1831 cifs_stats_bytes_read(pTcon, bytes_read);
1832 *poffset += bytes_read;
1840 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1841 unsigned long nr_segs, loff_t pos)
1845 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
1852 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
1853 unsigned long nr_segs, loff_t pos)
1855 struct inode *inode;
1857 inode = iocb->ki_filp->f_path.dentry->d_inode;
1859 if (CIFS_I(inode)->clientCanCacheRead)
1860 return generic_file_aio_read(iocb, iov, nr_segs, pos);
1863 * In strict cache mode we need to read from the server all the time
1864 * if we don't have level II oplock because the server can delay mtime
1865 * change - so we can't make a decision about inode invalidating.
1866 * And we can also fail with pagereading if there are mandatory locks
1867 * on pages affected by this read but not on the region from pos to
1871 return cifs_user_readv(iocb, iov, nr_segs, pos);
1874 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1878 unsigned int bytes_read = 0;
1879 unsigned int total_read;
1880 unsigned int current_read_size;
1882 struct cifs_sb_info *cifs_sb;
1883 struct cifs_tcon *pTcon;
1885 char *current_offset;
1886 struct cifsFileInfo *open_file;
1887 struct cifs_io_parms io_parms;
1888 int buf_type = CIFS_NO_BUFFER;
1892 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1894 /* FIXME: set up handlers for larger reads and/or convert to async */
1895 rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
1897 if (file->private_data == NULL) {
1902 open_file = file->private_data;
1903 pTcon = tlink_tcon(open_file->tlink);
1905 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
1906 pid = open_file->pid;
1908 pid = current->tgid;
1910 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1911 cFYI(1, "attempting read on write only file instance");
1913 for (total_read = 0, current_offset = read_data;
1914 read_size > total_read;
1915 total_read += bytes_read, current_offset += bytes_read) {
1916 current_read_size = min_t(uint, read_size - total_read, rsize);
1918 /* For windows me and 9x we do not want to request more
1919 than it negotiated since it will refuse the read then */
1921 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1922 current_read_size = min_t(uint, current_read_size,
1926 while (rc == -EAGAIN) {
1927 if (open_file->invalidHandle) {
1928 rc = cifs_reopen_file(open_file, true);
1932 io_parms.netfid = open_file->netfid;
1934 io_parms.tcon = pTcon;
1935 io_parms.offset = *poffset;
1936 io_parms.length = current_read_size;
1937 rc = CIFSSMBRead(xid, &io_parms, &bytes_read,
1938 ¤t_offset, &buf_type);
1940 if (rc || (bytes_read == 0)) {
1948 cifs_stats_bytes_read(pTcon, total_read);
1949 *poffset += bytes_read;
1957 * If the page is mmap'ed into a process' page tables, then we need to make
1958 * sure that it doesn't change while being written back.
1961 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
1963 struct page *page = vmf->page;
1966 return VM_FAULT_LOCKED;
1969 static struct vm_operations_struct cifs_file_vm_ops = {
1970 .fault = filemap_fault,
1971 .page_mkwrite = cifs_page_mkwrite,
1974 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
1977 struct inode *inode = file->f_path.dentry->d_inode;
1981 if (!CIFS_I(inode)->clientCanCacheRead) {
1982 rc = cifs_invalidate_mapping(inode);
1987 rc = generic_file_mmap(file, vma);
1989 vma->vm_ops = &cifs_file_vm_ops;
1994 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1999 rc = cifs_revalidate_file(file);
2001 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2005 rc = generic_file_mmap(file, vma);
2007 vma->vm_ops = &cifs_file_vm_ops;
2012 static int cifs_readpages(struct file *file, struct address_space *mapping,
2013 struct list_head *page_list, unsigned num_pages)
2016 struct list_head tmplist;
2017 struct cifsFileInfo *open_file = file->private_data;
2018 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2019 unsigned int rsize = cifs_sb->rsize;
2023 * Give up immediately if rsize is too small to read an entire page.
2024 * The VFS will fall back to readpage. We should never reach this
2025 * point however since we set ra_pages to 0 when the rsize is smaller
2026 * than a cache page.
2028 if (unlikely(rsize < PAGE_CACHE_SIZE))
2032 * Reads as many pages as possible from fscache. Returns -ENOBUFS
2033 * immediately if the cookie is negative
2035 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
2040 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2041 pid = open_file->pid;
2043 pid = current->tgid;
2046 INIT_LIST_HEAD(&tmplist);
2048 cFYI(1, "%s: file=%p mapping=%p num_pages=%u", __func__, file,
2049 mapping, num_pages);
2052 * Start with the page at end of list and move it to private
2053 * list. Do the same with any following pages until we hit
2054 * the rsize limit, hit an index discontinuity, or run out of
2055 * pages. Issue the async read and then start the loop again
2056 * until the list is empty.
2058 * Note that list order is important. The page_list is in
2059 * the order of declining indexes. When we put the pages in
2060 * the rdata->pages, then we want them in increasing order.
2062 while (!list_empty(page_list)) {
2063 unsigned int bytes = PAGE_CACHE_SIZE;
2064 unsigned int expected_index;
2065 unsigned int nr_pages = 1;
2067 struct page *page, *tpage;
2068 struct cifs_readdata *rdata;
2070 page = list_entry(page_list->prev, struct page, lru);
2073 * Lock the page and put it in the cache. Since no one else
2074 * should have access to this page, we're safe to simply set
2075 * PG_locked without checking it first.
2077 __set_page_locked(page);
2078 rc = add_to_page_cache_locked(page, mapping,
2079 page->index, GFP_KERNEL);
2081 /* give up if we can't stick it in the cache */
2083 __clear_page_locked(page);
2087 /* move first page to the tmplist */
2088 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2089 list_move_tail(&page->lru, &tmplist);
2091 /* now try and add more pages onto the request */
2092 expected_index = page->index + 1;
2093 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
2094 /* discontinuity ? */
2095 if (page->index != expected_index)
2098 /* would this page push the read over the rsize? */
2099 if (bytes + PAGE_CACHE_SIZE > rsize)
2102 __set_page_locked(page);
2103 if (add_to_page_cache_locked(page, mapping,
2104 page->index, GFP_KERNEL)) {
2105 __clear_page_locked(page);
2108 list_move_tail(&page->lru, &tmplist);
2109 bytes += PAGE_CACHE_SIZE;
2114 rdata = cifs_readdata_alloc(nr_pages);
2116 /* best to give up if we're out of mem */
2117 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
2118 list_del(&page->lru);
2119 lru_cache_add_file(page);
2121 page_cache_release(page);
2127 spin_lock(&cifs_file_list_lock);
2128 cifsFileInfo_get(open_file);
2129 spin_unlock(&cifs_file_list_lock);
2130 rdata->cfile = open_file;
2131 rdata->mapping = mapping;
2132 rdata->offset = offset;
2133 rdata->bytes = bytes;
2135 list_splice_init(&tmplist, &rdata->pages);
2138 if (open_file->invalidHandle) {
2139 rc = cifs_reopen_file(open_file, true);
2143 rc = cifs_async_readv(rdata);
2144 } while (rc == -EAGAIN);
2147 list_for_each_entry_safe(page, tpage, &rdata->pages,
2149 list_del(&page->lru);
2150 lru_cache_add_file(page);
2152 page_cache_release(page);
2154 cifs_readdata_free(rdata);
2162 static int cifs_readpage_worker(struct file *file, struct page *page,
2168 /* Is the page cached? */
2169 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
2173 page_cache_get(page);
2174 read_data = kmap(page);
2175 /* for reads over a certain size could initiate async read ahead */
2177 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2182 cFYI(1, "Bytes read %d", rc);
2184 file->f_path.dentry->d_inode->i_atime =
2185 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2187 if (PAGE_CACHE_SIZE > rc)
2188 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2190 flush_dcache_page(page);
2191 SetPageUptodate(page);
2193 /* send this page to the cache */
2194 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
2200 page_cache_release(page);
2206 static int cifs_readpage(struct file *file, struct page *page)
2208 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2214 if (file->private_data == NULL) {
2220 cFYI(1, "readpage %p at offset %d 0x%x\n",
2221 page, (int)offset, (int)offset);
2223 rc = cifs_readpage_worker(file, page, &offset);
2231 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2233 struct cifsFileInfo *open_file;
2235 spin_lock(&cifs_file_list_lock);
2236 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2237 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2238 spin_unlock(&cifs_file_list_lock);
2242 spin_unlock(&cifs_file_list_lock);
2246 /* We do not want to update the file size from server for inodes
2247 open for write - to avoid races with writepage extending
2248 the file - in the future we could consider allowing
2249 refreshing the inode only on increases in the file size
2250 but this is tricky to do without racing with writebehind
2251 page caching in the current Linux kernel design */
2252 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2257 if (is_inode_writable(cifsInode)) {
2258 /* This inode is open for write at least once */
2259 struct cifs_sb_info *cifs_sb;
2261 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2262 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2263 /* since no page cache to corrupt on directio
2264 we can change size safely */
2268 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2276 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2277 loff_t pos, unsigned len, unsigned flags,
2278 struct page **pagep, void **fsdata)
2280 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2281 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2282 loff_t page_start = pos & PAGE_MASK;
2287 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2289 page = grab_cache_page_write_begin(mapping, index, flags);
2295 if (PageUptodate(page))
2299 * If we write a full page it will be up to date, no need to read from
2300 * the server. If the write is short, we'll end up doing a sync write
2303 if (len == PAGE_CACHE_SIZE)
2307 * optimize away the read when we have an oplock, and we're not
2308 * expecting to use any of the data we'd be reading in. That
2309 * is, when the page lies beyond the EOF, or straddles the EOF
2310 * and the write will cover all of the existing data.
2312 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2313 i_size = i_size_read(mapping->host);
2314 if (page_start >= i_size ||
2315 (offset == 0 && (pos + len) >= i_size)) {
2316 zero_user_segments(page, 0, offset,
2320 * PageChecked means that the parts of the page
2321 * to which we're not writing are considered up
2322 * to date. Once the data is copied to the
2323 * page, it can be set uptodate.
2325 SetPageChecked(page);
2330 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2332 * might as well read a page, it is fast enough. If we get
2333 * an error, we don't need to return it. cifs_write_end will
2334 * do a sync write instead since PG_uptodate isn't set.
2336 cifs_readpage_worker(file, page, &page_start);
2338 /* we could try using another file handle if there is one -
2339 but how would we lock it to prevent close of that handle
2340 racing with this read? In any case
2341 this will be written out by write_end so is fine */
2348 static int cifs_release_page(struct page *page, gfp_t gfp)
2350 if (PagePrivate(page))
2353 return cifs_fscache_release_page(page, gfp);
2356 static void cifs_invalidate_page(struct page *page, unsigned long offset)
2358 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
2361 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2364 static int cifs_launder_page(struct page *page)
2367 loff_t range_start = page_offset(page);
2368 loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
2369 struct writeback_control wbc = {
2370 .sync_mode = WB_SYNC_ALL,
2372 .range_start = range_start,
2373 .range_end = range_end,
2376 cFYI(1, "Launder page: %p", page);
2378 if (clear_page_dirty_for_io(page))
2379 rc = cifs_writepage_locked(page, &wbc);
2381 cifs_fscache_invalidate_page(page, page->mapping->host);
2385 void cifs_oplock_break(struct work_struct *work)
2387 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2389 struct inode *inode = cfile->dentry->d_inode;
2390 struct cifsInodeInfo *cinode = CIFS_I(inode);
2393 if (inode && S_ISREG(inode->i_mode)) {
2394 if (cinode->clientCanCacheRead)
2395 break_lease(inode, O_RDONLY);
2397 break_lease(inode, O_WRONLY);
2398 rc = filemap_fdatawrite(inode->i_mapping);
2399 if (cinode->clientCanCacheRead == 0) {
2400 rc = filemap_fdatawait(inode->i_mapping);
2401 mapping_set_error(inode->i_mapping, rc);
2402 invalidate_remote_inode(inode);
2404 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2408 * releasing stale oplock after recent reconnect of smb session using
2409 * a now incorrect file handle is not a data integrity issue but do
2410 * not bother sending an oplock release if session to server still is
2411 * disconnected since oplock already released by the server
2413 if (!cfile->oplock_break_cancelled) {
2414 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid,
2415 current->tgid, 0, 0, 0, 0,
2416 LOCKING_ANDX_OPLOCK_RELEASE, false,
2417 cinode->clientCanCacheRead ? 1 : 0);
2418 cFYI(1, "Oplock release rc = %d", rc);
2422 const struct address_space_operations cifs_addr_ops = {
2423 .readpage = cifs_readpage,
2424 .readpages = cifs_readpages,
2425 .writepage = cifs_writepage,
2426 .writepages = cifs_writepages,
2427 .write_begin = cifs_write_begin,
2428 .write_end = cifs_write_end,
2429 .set_page_dirty = __set_page_dirty_nobuffers,
2430 .releasepage = cifs_release_page,
2431 .invalidatepage = cifs_invalidate_page,
2432 .launder_page = cifs_launder_page,
2436 * cifs_readpages requires the server to support a buffer large enough to
2437 * contain the header plus one complete page of data. Otherwise, we need
2438 * to leave cifs_readpages out of the address space operations.
2440 const struct address_space_operations cifs_addr_ops_smallbuf = {
2441 .readpage = cifs_readpage,
2442 .writepage = cifs_writepage,
2443 .writepages = cifs_writepages,
2444 .write_begin = cifs_write_begin,
2445 .write_end = cifs_write_end,
2446 .set_page_dirty = __set_page_dirty_nobuffers,
2447 .releasepage = cifs_release_page,
2448 .invalidatepage = cifs_invalidate_page,
2449 .launder_page = cifs_launder_page,