4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <asm/div64.h>
39 #include "cifsproto.h"
40 #include "cifs_unicode.h"
41 #include "cifs_debug.h"
42 #include "cifs_fs_sb.h"
45 static inline int cifs_convert_flags(unsigned int flags)
47 if ((flags & O_ACCMODE) == O_RDONLY)
49 else if ((flags & O_ACCMODE) == O_WRONLY)
51 else if ((flags & O_ACCMODE) == O_RDWR) {
52 /* GENERIC_ALL is too much permission to request
53 can cause unnecessary access denied on create */
54 /* return GENERIC_ALL; */
55 return (GENERIC_READ | GENERIC_WRITE);
58 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
59 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 static u32 cifs_posix_convert_flags(unsigned int flags)
67 if ((flags & O_ACCMODE) == O_RDONLY)
68 posix_flags = SMB_O_RDONLY;
69 else if ((flags & O_ACCMODE) == O_WRONLY)
70 posix_flags = SMB_O_WRONLY;
71 else if ((flags & O_ACCMODE) == O_RDWR)
72 posix_flags = SMB_O_RDWR;
75 posix_flags |= SMB_O_CREAT;
77 posix_flags |= SMB_O_EXCL;
79 posix_flags |= SMB_O_TRUNC;
80 /* be safe and imply O_SYNC for O_DSYNC */
82 posix_flags |= SMB_O_SYNC;
83 if (flags & O_DIRECTORY)
84 posix_flags |= SMB_O_DIRECTORY;
85 if (flags & O_NOFOLLOW)
86 posix_flags |= SMB_O_NOFOLLOW;
88 posix_flags |= SMB_O_DIRECT;
93 static inline int cifs_get_disposition(unsigned int flags)
95 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
98 return FILE_OVERWRITE_IF;
99 else if ((flags & O_CREAT) == O_CREAT)
101 else if ((flags & O_TRUNC) == O_TRUNC)
102 return FILE_OVERWRITE;
107 int cifs_posix_open(char *full_path, struct inode **pinode,
108 struct super_block *sb, int mode, unsigned int f_flags,
109 __u32 *poplock, __u16 *pnetfid, int xid)
112 FILE_UNIX_BASIC_INFO *presp_data;
113 __u32 posix_flags = 0;
114 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
115 struct cifs_fattr fattr;
116 struct tcon_link *tlink;
117 struct cifsTconInfo *tcon;
119 cFYI(1, "posix open %s", full_path);
121 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
122 if (presp_data == NULL)
125 tlink = cifs_sb_tlink(cifs_sb);
131 tcon = tlink_tcon(tlink);
132 mode &= ~current_umask();
134 posix_flags = cifs_posix_convert_flags(f_flags);
135 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
136 poplock, full_path, cifs_sb->local_nls,
137 cifs_sb->mnt_cifs_flags &
138 CIFS_MOUNT_MAP_SPECIAL_CHR);
139 cifs_put_tlink(tlink);
144 if (presp_data->Type == cpu_to_le32(-1))
145 goto posix_open_ret; /* open ok, caller does qpathinfo */
148 goto posix_open_ret; /* caller does not need info */
150 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
152 /* get new inode and set it up */
153 if (*pinode == NULL) {
154 cifs_fill_uniqueid(sb, &fattr);
155 *pinode = cifs_iget(sb, &fattr);
161 cifs_fattr_to_inode(*pinode, &fattr);
170 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
171 struct cifsTconInfo *tcon, unsigned int f_flags, __u32 *poplock,
172 __u16 *pnetfid, int xid)
179 desiredAccess = cifs_convert_flags(f_flags);
181 /*********************************************************************
182 * open flag mapping table:
184 * POSIX Flag CIFS Disposition
185 * ---------- ----------------
186 * O_CREAT FILE_OPEN_IF
187 * O_CREAT | O_EXCL FILE_CREATE
188 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
189 * O_TRUNC FILE_OVERWRITE
190 * none of the above FILE_OPEN
192 * Note that there is not a direct match between disposition
193 * FILE_SUPERSEDE (ie create whether or not file exists although
194 * O_CREAT | O_TRUNC is similar but truncates the existing
195 * file rather than creating a new file as FILE_SUPERSEDE does
196 * (which uses the attributes / metadata passed in on open call)
198 *? O_SYNC is a reasonable match to CIFS writethrough flag
199 *? and the read write flags match reasonably. O_LARGEFILE
200 *? is irrelevant because largefile support is always used
201 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
202 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
203 *********************************************************************/
205 disposition = cifs_get_disposition(f_flags);
207 /* BB pass O_SYNC flag through on file attributes .. BB */
209 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
213 if (tcon->ses->capabilities & CAP_NT_SMBS)
214 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
215 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
216 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
217 & CIFS_MOUNT_MAP_SPECIAL_CHR);
219 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
220 desiredAccess, CREATE_NOT_DIR, pnetfid, poplock, buf,
221 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
222 & CIFS_MOUNT_MAP_SPECIAL_CHR);
228 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
231 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
239 struct cifsFileInfo *
240 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
241 struct tcon_link *tlink, __u32 oplock)
243 struct dentry *dentry = file->f_path.dentry;
244 struct inode *inode = dentry->d_inode;
245 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
246 struct cifsFileInfo *pCifsFile;
248 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
249 if (pCifsFile == NULL)
252 pCifsFile->count = 1;
253 pCifsFile->netfid = fileHandle;
254 pCifsFile->pid = current->tgid;
255 pCifsFile->uid = current_fsuid();
256 pCifsFile->dentry = dget(dentry);
257 pCifsFile->f_flags = file->f_flags;
258 pCifsFile->invalidHandle = false;
259 pCifsFile->tlink = cifs_get_tlink(tlink);
260 mutex_init(&pCifsFile->fh_mutex);
261 mutex_init(&pCifsFile->lock_mutex);
262 INIT_LIST_HEAD(&pCifsFile->llist);
263 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
265 spin_lock(&cifs_file_list_lock);
266 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
267 /* if readable file instance put first in list*/
268 if (file->f_mode & FMODE_READ)
269 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
271 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
272 spin_unlock(&cifs_file_list_lock);
274 cifs_set_oplock_level(pCifsInode, oplock);
276 file->private_data = pCifsFile;
281 * Release a reference on the file private data. This may involve closing
282 * the filehandle out on the server. Must be called without holding
283 * cifs_file_list_lock.
285 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
287 struct inode *inode = cifs_file->dentry->d_inode;
288 struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink);
289 struct cifsInodeInfo *cifsi = CIFS_I(inode);
290 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
291 struct cifsLockInfo *li, *tmp;
293 spin_lock(&cifs_file_list_lock);
294 if (--cifs_file->count > 0) {
295 spin_unlock(&cifs_file_list_lock);
299 /* remove it from the lists */
300 list_del(&cifs_file->flist);
301 list_del(&cifs_file->tlist);
303 if (list_empty(&cifsi->openFileList)) {
304 cFYI(1, "closing last open instance for inode %p",
305 cifs_file->dentry->d_inode);
307 /* in strict cache mode we need invalidate mapping on the last
308 close because it may cause a error when we open this file
309 again and get at least level II oplock */
310 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
311 CIFS_I(inode)->invalid_mapping = true;
313 cifs_set_oplock_level(cifsi, 0);
315 spin_unlock(&cifs_file_list_lock);
317 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
321 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
325 /* Delete any outstanding lock records. We'll lose them when the file
328 mutex_lock(&cifs_file->lock_mutex);
329 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
330 list_del(&li->llist);
333 mutex_unlock(&cifs_file->lock_mutex);
335 cifs_put_tlink(cifs_file->tlink);
336 dput(cifs_file->dentry);
340 int cifs_open(struct inode *inode, struct file *file)
345 struct cifs_sb_info *cifs_sb;
346 struct cifsTconInfo *tcon;
347 struct tcon_link *tlink;
348 struct cifsFileInfo *pCifsFile = NULL;
349 char *full_path = NULL;
350 bool posix_open_ok = false;
355 cifs_sb = CIFS_SB(inode->i_sb);
356 tlink = cifs_sb_tlink(cifs_sb);
359 return PTR_ERR(tlink);
361 tcon = tlink_tcon(tlink);
363 full_path = build_path_from_dentry(file->f_path.dentry);
364 if (full_path == NULL) {
369 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
370 inode, file->f_flags, full_path);
377 if (!tcon->broken_posix_open && tcon->unix_ext &&
378 (tcon->ses->capabilities & CAP_UNIX) &&
379 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
380 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
381 /* can not refresh inode info since size could be stale */
382 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
383 cifs_sb->mnt_file_mode /* ignored */,
384 file->f_flags, &oplock, &netfid, xid);
386 cFYI(1, "posix open succeeded");
387 posix_open_ok = true;
388 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
389 if (tcon->ses->serverNOS)
390 cERROR(1, "server %s of type %s returned"
391 " unexpected error on SMB posix open"
392 ", disabling posix open support."
393 " Check if server update available.",
394 tcon->ses->serverName,
395 tcon->ses->serverNOS);
396 tcon->broken_posix_open = true;
397 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
398 (rc != -EOPNOTSUPP)) /* path not found or net err */
400 /* else fallthrough to retry open the old way on network i/o
404 if (!posix_open_ok) {
405 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
406 file->f_flags, &oplock, &netfid, xid);
411 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
412 if (pCifsFile == NULL) {
413 CIFSSMBClose(xid, tcon, netfid);
418 cifs_fscache_set_inode_cookie(inode, file);
420 if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
421 /* time to set mode which we can not set earlier due to
422 problems creating new read-only files */
423 struct cifs_unix_set_info_args args = {
424 .mode = inode->i_mode,
427 .ctime = NO_CHANGE_64,
428 .atime = NO_CHANGE_64,
429 .mtime = NO_CHANGE_64,
432 CIFSSMBUnixSetFileInfo(xid, tcon, &args, netfid,
439 cifs_put_tlink(tlink);
443 /* Try to reacquire byte range locks that were released when session */
444 /* to server was lost */
445 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
449 /* BB list all locks open on this file and relock */
454 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
459 struct cifs_sb_info *cifs_sb;
460 struct cifsTconInfo *tcon;
461 struct cifsInodeInfo *pCifsInode;
463 char *full_path = NULL;
465 int disposition = FILE_OPEN;
469 mutex_lock(&pCifsFile->fh_mutex);
470 if (!pCifsFile->invalidHandle) {
471 mutex_unlock(&pCifsFile->fh_mutex);
477 inode = pCifsFile->dentry->d_inode;
478 cifs_sb = CIFS_SB(inode->i_sb);
479 tcon = tlink_tcon(pCifsFile->tlink);
481 /* can not grab rename sem here because various ops, including
482 those that already have the rename sem can end up causing writepage
483 to get called and if the server was down that means we end up here,
484 and we can never tell if the caller already has the rename_sem */
485 full_path = build_path_from_dentry(pCifsFile->dentry);
486 if (full_path == NULL) {
488 mutex_unlock(&pCifsFile->fh_mutex);
493 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
494 inode, pCifsFile->f_flags, full_path);
501 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
502 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
503 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
506 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
507 * original open. Must mask them off for a reopen.
509 unsigned int oflags = pCifsFile->f_flags &
510 ~(O_CREAT | O_EXCL | O_TRUNC);
512 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
513 cifs_sb->mnt_file_mode /* ignored */,
514 oflags, &oplock, &netfid, xid);
516 cFYI(1, "posix reopen succeeded");
519 /* fallthrough to retry open the old way on errors, especially
520 in the reconnect path it is important to retry hard */
523 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
525 /* Can not refresh inode by passing in file_info buf to be returned
526 by SMBOpen and then calling get_inode_info with returned buf
527 since file might have write behind data that needs to be flushed
528 and server version of file size can be stale. If we knew for sure
529 that inode was not dirty locally we could do this */
531 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
532 CREATE_NOT_DIR, &netfid, &oplock, NULL,
533 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
534 CIFS_MOUNT_MAP_SPECIAL_CHR);
536 mutex_unlock(&pCifsFile->fh_mutex);
537 cFYI(1, "cifs_open returned 0x%x", rc);
538 cFYI(1, "oplock: %d", oplock);
539 goto reopen_error_exit;
543 pCifsFile->netfid = netfid;
544 pCifsFile->invalidHandle = false;
545 mutex_unlock(&pCifsFile->fh_mutex);
546 pCifsInode = CIFS_I(inode);
549 rc = filemap_write_and_wait(inode->i_mapping);
550 mapping_set_error(inode->i_mapping, rc);
553 rc = cifs_get_inode_info_unix(&inode,
554 full_path, inode->i_sb, xid);
556 rc = cifs_get_inode_info(&inode,
557 full_path, NULL, inode->i_sb,
559 } /* else we are writing out data to server already
560 and could deadlock if we tried to flush data, and
561 since we do not know if we have data that would
562 invalidate the current end of file on the server
563 we can not go to the server to get the new inod
566 cifs_set_oplock_level(pCifsInode, oplock);
568 cifs_relock_file(pCifsFile);
576 int cifs_close(struct inode *inode, struct file *file)
578 cifsFileInfo_put(file->private_data);
579 file->private_data = NULL;
581 /* return code from the ->release op is always ignored */
585 int cifs_closedir(struct inode *inode, struct file *file)
589 struct cifsFileInfo *pCFileStruct = file->private_data;
592 cFYI(1, "Closedir inode = 0x%p", inode);
597 struct cifsTconInfo *pTcon = tlink_tcon(pCFileStruct->tlink);
599 cFYI(1, "Freeing private data in close dir");
600 spin_lock(&cifs_file_list_lock);
601 if (!pCFileStruct->srch_inf.endOfSearch &&
602 !pCFileStruct->invalidHandle) {
603 pCFileStruct->invalidHandle = true;
604 spin_unlock(&cifs_file_list_lock);
605 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
606 cFYI(1, "Closing uncompleted readdir with rc %d",
608 /* not much we can do if it fails anyway, ignore rc */
611 spin_unlock(&cifs_file_list_lock);
612 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
614 cFYI(1, "closedir free smb buf in srch struct");
615 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
616 if (pCFileStruct->srch_inf.smallBuf)
617 cifs_small_buf_release(ptmp);
619 cifs_buf_release(ptmp);
621 cifs_put_tlink(pCFileStruct->tlink);
622 kfree(file->private_data);
623 file->private_data = NULL;
625 /* BB can we lock the filestruct while this is going on? */
630 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
631 __u64 offset, __u8 lockType)
633 struct cifsLockInfo *li =
634 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
640 mutex_lock(&fid->lock_mutex);
641 list_add(&li->llist, &fid->llist);
642 mutex_unlock(&fid->lock_mutex);
646 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
652 bool wait_flag = false;
653 struct cifs_sb_info *cifs_sb;
654 struct cifsTconInfo *tcon;
656 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
657 bool posix_locking = 0;
659 length = 1 + pfLock->fl_end - pfLock->fl_start;
663 cFYI(1, "Lock parm: 0x%x flockflags: "
664 "0x%x flocktype: 0x%x start: %lld end: %lld",
665 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
668 if (pfLock->fl_flags & FL_POSIX)
670 if (pfLock->fl_flags & FL_FLOCK)
672 if (pfLock->fl_flags & FL_SLEEP) {
673 cFYI(1, "Blocking lock");
676 if (pfLock->fl_flags & FL_ACCESS)
677 cFYI(1, "Process suspended by mandatory locking - "
678 "not implemented yet");
679 if (pfLock->fl_flags & FL_LEASE)
680 cFYI(1, "Lease on file - not implemented yet");
681 if (pfLock->fl_flags &
682 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
683 cFYI(1, "Unknown lock flags 0x%x", pfLock->fl_flags);
685 if (pfLock->fl_type == F_WRLCK) {
688 } else if (pfLock->fl_type == F_UNLCK) {
691 /* Check if unlock includes more than
693 } else if (pfLock->fl_type == F_RDLCK) {
695 lockType |= LOCKING_ANDX_SHARED_LOCK;
697 } else if (pfLock->fl_type == F_EXLCK) {
700 } else if (pfLock->fl_type == F_SHLCK) {
702 lockType |= LOCKING_ANDX_SHARED_LOCK;
705 cFYI(1, "Unknown type of lock");
707 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
708 tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink);
709 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
711 if ((tcon->ses->capabilities & CAP_UNIX) &&
712 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
713 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
715 /* BB add code here to normalize offset and length to
716 account for negative length which we can not accept over the
721 if (lockType & LOCKING_ANDX_SHARED_LOCK)
722 posix_lock_type = CIFS_RDLCK;
724 posix_lock_type = CIFS_WRLCK;
725 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
727 posix_lock_type, wait_flag);
732 /* BB we could chain these into one lock request BB */
733 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
734 0, 1, lockType, 0 /* wait flag */, 0);
736 rc = CIFSSMBLock(xid, tcon, netfid, length,
737 pfLock->fl_start, 1 /* numUnlock */ ,
738 0 /* numLock */ , lockType,
739 0 /* wait flag */, 0);
740 pfLock->fl_type = F_UNLCK;
742 cERROR(1, "Error unlocking previously locked "
743 "range %d during test of lock", rc);
747 /* if rc == ERR_SHARING_VIOLATION ? */
750 if (lockType & LOCKING_ANDX_SHARED_LOCK) {
751 pfLock->fl_type = F_WRLCK;
753 rc = CIFSSMBLock(xid, tcon, netfid, length,
754 pfLock->fl_start, 0, 1,
755 lockType | LOCKING_ANDX_SHARED_LOCK,
756 0 /* wait flag */, 0);
758 rc = CIFSSMBLock(xid, tcon, netfid,
759 length, pfLock->fl_start, 1, 0,
761 LOCKING_ANDX_SHARED_LOCK,
762 0 /* wait flag */, 0);
763 pfLock->fl_type = F_RDLCK;
765 cERROR(1, "Error unlocking "
766 "previously locked range %d "
767 "during test of lock", rc);
770 pfLock->fl_type = F_WRLCK;
780 if (!numLock && !numUnlock) {
781 /* if no lock or unlock then nothing
782 to do since we do not know what it is */
789 if (lockType & LOCKING_ANDX_SHARED_LOCK)
790 posix_lock_type = CIFS_RDLCK;
792 posix_lock_type = CIFS_WRLCK;
795 posix_lock_type = CIFS_UNLCK;
797 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
799 posix_lock_type, wait_flag);
801 struct cifsFileInfo *fid = file->private_data;
804 rc = CIFSSMBLock(xid, tcon, netfid, length,
805 pfLock->fl_start, 0, numLock, lockType,
809 /* For Windows locks we must store them. */
810 rc = store_file_lock(fid, length,
811 pfLock->fl_start, lockType);
813 } else if (numUnlock) {
814 /* For each stored lock that this unlock overlaps
815 completely, unlock it. */
817 struct cifsLockInfo *li, *tmp;
820 mutex_lock(&fid->lock_mutex);
821 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
822 if (pfLock->fl_start <= li->offset &&
823 (pfLock->fl_start + length) >=
824 (li->offset + li->length)) {
825 stored_rc = CIFSSMBLock(xid, tcon,
832 list_del(&li->llist);
837 mutex_unlock(&fid->lock_mutex);
841 if (pfLock->fl_flags & FL_POSIX)
842 posix_lock_file_wait(file, pfLock);
847 /* update the file size (if needed) after a write */
849 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
850 unsigned int bytes_written)
852 loff_t end_of_write = offset + bytes_written;
854 if (end_of_write > cifsi->server_eof)
855 cifsi->server_eof = end_of_write;
858 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
859 size_t write_size, loff_t *poffset)
861 struct inode *inode = file->f_path.dentry->d_inode;
863 unsigned int bytes_written = 0;
864 unsigned int total_written;
865 struct cifs_sb_info *cifs_sb;
866 struct cifsTconInfo *pTcon;
868 struct cifsFileInfo *open_file;
869 struct cifsInodeInfo *cifsi = CIFS_I(inode);
871 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
873 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
874 *poffset, file->f_path.dentry->d_name.name); */
876 if (file->private_data == NULL)
879 open_file = file->private_data;
880 pTcon = tlink_tcon(open_file->tlink);
882 rc = generic_write_checks(file, poffset, &write_size, 0);
888 for (total_written = 0; write_size > total_written;
889 total_written += bytes_written) {
891 while (rc == -EAGAIN) {
892 if (file->private_data == NULL) {
893 /* file has been closed on us */
895 /* if we have gotten here we have written some data
896 and blocked, and the file has been freed on us while
897 we blocked so return what we managed to write */
898 return total_written;
900 if (open_file->invalidHandle) {
901 /* we could deadlock if we called
902 filemap_fdatawait from here so tell
903 reopen_file not to flush data to server
905 rc = cifs_reopen_file(open_file, false);
910 rc = CIFSSMBWrite(xid, pTcon,
912 min_t(const int, cifs_sb->wsize,
913 write_size - total_written),
914 *poffset, &bytes_written,
915 NULL, write_data + total_written, 0);
917 if (rc || (bytes_written == 0)) {
925 cifs_update_eof(cifsi, *poffset, bytes_written);
926 *poffset += bytes_written;
930 cifs_stats_bytes_written(pTcon, total_written);
932 /* Do not update local mtime - server will set its actual value on write
933 * inode->i_ctime = inode->i_mtime =
934 * current_fs_time(inode->i_sb);*/
935 if (total_written > 0) {
936 spin_lock(&inode->i_lock);
937 if (*poffset > inode->i_size)
938 i_size_write(inode, *poffset);
939 spin_unlock(&inode->i_lock);
941 mark_inode_dirty_sync(inode);
944 return total_written;
947 static ssize_t cifs_write(struct cifsFileInfo *open_file,
948 const char *write_data, size_t write_size,
952 unsigned int bytes_written = 0;
953 unsigned int total_written;
954 struct cifs_sb_info *cifs_sb;
955 struct cifsTconInfo *pTcon;
957 struct dentry *dentry = open_file->dentry;
958 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
960 cifs_sb = CIFS_SB(dentry->d_sb);
962 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
963 *poffset, dentry->d_name.name);
965 pTcon = tlink_tcon(open_file->tlink);
969 for (total_written = 0; write_size > total_written;
970 total_written += bytes_written) {
972 while (rc == -EAGAIN) {
973 if (open_file->invalidHandle) {
974 /* we could deadlock if we called
975 filemap_fdatawait from here so tell
976 reopen_file not to flush data to
978 rc = cifs_reopen_file(open_file, false);
982 if (experimEnabled || (pTcon->ses->server &&
983 ((pTcon->ses->server->secMode &
984 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
989 len = min((size_t)cifs_sb->wsize,
990 write_size - total_written);
991 /* iov[0] is reserved for smb header */
992 iov[1].iov_base = (char *)write_data +
994 iov[1].iov_len = len;
995 rc = CIFSSMBWrite2(xid, pTcon,
996 open_file->netfid, len,
997 *poffset, &bytes_written,
1000 rc = CIFSSMBWrite(xid, pTcon,
1002 min_t(const int, cifs_sb->wsize,
1003 write_size - total_written),
1004 *poffset, &bytes_written,
1005 write_data + total_written,
1008 if (rc || (bytes_written == 0)) {
1016 cifs_update_eof(cifsi, *poffset, bytes_written);
1017 *poffset += bytes_written;
1021 cifs_stats_bytes_written(pTcon, total_written);
1023 if (total_written > 0) {
1024 spin_lock(&dentry->d_inode->i_lock);
1025 if (*poffset > dentry->d_inode->i_size)
1026 i_size_write(dentry->d_inode, *poffset);
1027 spin_unlock(&dentry->d_inode->i_lock);
1029 mark_inode_dirty_sync(dentry->d_inode);
1031 return total_written;
1034 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1037 struct cifsFileInfo *open_file = NULL;
1038 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1040 /* only filter by fsuid on multiuser mounts */
1041 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1044 spin_lock(&cifs_file_list_lock);
1045 /* we could simply get the first_list_entry since write-only entries
1046 are always at the end of the list but since the first entry might
1047 have a close pending, we go through the whole list */
1048 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1049 if (fsuid_only && open_file->uid != current_fsuid())
1051 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1052 if (!open_file->invalidHandle) {
1053 /* found a good file */
1054 /* lock it so it will not be closed on us */
1055 cifsFileInfo_get(open_file);
1056 spin_unlock(&cifs_file_list_lock);
1058 } /* else might as well continue, and look for
1059 another, or simply have the caller reopen it
1060 again rather than trying to fix this handle */
1061 } else /* write only file */
1062 break; /* write only files are last so must be done */
1064 spin_unlock(&cifs_file_list_lock);
1068 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1071 struct cifsFileInfo *open_file;
1072 struct cifs_sb_info *cifs_sb;
1073 bool any_available = false;
1076 /* Having a null inode here (because mapping->host was set to zero by
1077 the VFS or MM) should not happen but we had reports of on oops (due to
1078 it being zero) during stress testcases so we need to check for it */
1080 if (cifs_inode == NULL) {
1081 cERROR(1, "Null inode passed to cifs_writeable_file");
1086 cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1088 /* only filter by fsuid on multiuser mounts */
1089 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1092 spin_lock(&cifs_file_list_lock);
1094 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1095 if (!any_available && open_file->pid != current->tgid)
1097 if (fsuid_only && open_file->uid != current_fsuid())
1099 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1100 cifsFileInfo_get(open_file);
1102 if (!open_file->invalidHandle) {
1103 /* found a good writable file */
1104 spin_unlock(&cifs_file_list_lock);
1108 spin_unlock(&cifs_file_list_lock);
1110 /* Had to unlock since following call can block */
1111 rc = cifs_reopen_file(open_file, false);
1115 /* if it fails, try another handle if possible */
1116 cFYI(1, "wp failed on reopen file");
1117 cifsFileInfo_put(open_file);
1119 spin_lock(&cifs_file_list_lock);
1121 /* else we simply continue to the next entry. Thus
1122 we do not loop on reopen errors. If we
1123 can not reopen the file, for example if we
1124 reconnected to a server with another client
1125 racing to delete or lock the file we would not
1126 make progress if we restarted before the beginning
1127 of the loop here. */
1130 /* couldn't find useable FH with same pid, try any available */
1131 if (!any_available) {
1132 any_available = true;
1133 goto refind_writable;
1135 spin_unlock(&cifs_file_list_lock);
1139 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1141 struct address_space *mapping = page->mapping;
1142 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1145 int bytes_written = 0;
1146 struct inode *inode;
1147 struct cifsFileInfo *open_file;
1149 if (!mapping || !mapping->host)
1152 inode = page->mapping->host;
1154 offset += (loff_t)from;
1155 write_data = kmap(page);
1158 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1163 /* racing with truncate? */
1164 if (offset > mapping->host->i_size) {
1166 return 0; /* don't care */
1169 /* check to make sure that we are not extending the file */
1170 if (mapping->host->i_size - offset < (loff_t)to)
1171 to = (unsigned)(mapping->host->i_size - offset);
1173 open_file = find_writable_file(CIFS_I(mapping->host), false);
1175 bytes_written = cifs_write(open_file, write_data,
1176 to - from, &offset);
1177 cifsFileInfo_put(open_file);
1178 /* Does mm or vfs already set times? */
1179 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1180 if ((bytes_written > 0) && (offset))
1182 else if (bytes_written < 0)
1185 cFYI(1, "No writeable filehandles for inode");
1193 static int cifs_writepages(struct address_space *mapping,
1194 struct writeback_control *wbc)
1196 unsigned int bytes_to_write;
1197 unsigned int bytes_written;
1198 struct cifs_sb_info *cifs_sb;
1202 int range_whole = 0;
1209 struct cifsFileInfo *open_file;
1210 struct cifsTconInfo *tcon;
1211 struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1213 struct pagevec pvec;
1218 cifs_sb = CIFS_SB(mapping->host->i_sb);
1221 * If wsize is smaller that the page cache size, default to writing
1222 * one page at a time via cifs_writepage
1224 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1225 return generic_writepages(mapping, wbc);
1227 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1229 return generic_writepages(mapping, wbc);
1232 * if there's no open file, then this is likely to fail too,
1233 * but it'll at least handle the return. Maybe it should be
1236 open_file = find_writable_file(CIFS_I(mapping->host), false);
1239 return generic_writepages(mapping, wbc);
1242 tcon = tlink_tcon(open_file->tlink);
1243 if (!experimEnabled && tcon->ses->server->secMode &
1244 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1245 cifsFileInfo_put(open_file);
1247 return generic_writepages(mapping, wbc);
1249 cifsFileInfo_put(open_file);
1253 pagevec_init(&pvec, 0);
1254 if (wbc->range_cyclic) {
1255 index = mapping->writeback_index; /* Start from prev offset */
1258 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1259 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1260 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1265 while (!done && (index <= end) &&
1266 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1267 PAGECACHE_TAG_DIRTY,
1268 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1277 for (i = 0; i < nr_pages; i++) {
1278 page = pvec.pages[i];
1280 * At this point we hold neither mapping->tree_lock nor
1281 * lock on the page itself: the page may be truncated or
1282 * invalidated (changing page->mapping to NULL), or even
1283 * swizzled back from swapper_space to tmpfs file
1289 else if (!trylock_page(page))
1292 if (unlikely(page->mapping != mapping)) {
1297 if (!wbc->range_cyclic && page->index > end) {
1303 if (next && (page->index != next)) {
1304 /* Not next consecutive page */
1309 if (wbc->sync_mode != WB_SYNC_NONE)
1310 wait_on_page_writeback(page);
1312 if (PageWriteback(page) ||
1313 !clear_page_dirty_for_io(page)) {
1319 * This actually clears the dirty bit in the radix tree.
1320 * See cifs_writepage() for more commentary.
1322 set_page_writeback(page);
1324 if (page_offset(page) >= mapping->host->i_size) {
1327 end_page_writeback(page);
1332 * BB can we get rid of this? pages are held by pvec
1334 page_cache_get(page);
1336 len = min(mapping->host->i_size - page_offset(page),
1337 (loff_t)PAGE_CACHE_SIZE);
1339 /* reserve iov[0] for the smb header */
1341 iov[n_iov].iov_base = kmap(page);
1342 iov[n_iov].iov_len = len;
1343 bytes_to_write += len;
1347 offset = page_offset(page);
1349 next = page->index + 1;
1350 if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1355 open_file = find_writable_file(CIFS_I(mapping->host),
1358 cERROR(1, "No writable handles for inode");
1361 rc = CIFSSMBWrite2(xid, tcon, open_file->netfid,
1362 bytes_to_write, offset,
1363 &bytes_written, iov, n_iov,
1365 cifsFileInfo_put(open_file);
1368 cFYI(1, "Write2 rc=%d, wrote=%u", rc, bytes_written);
1371 * For now, treat a short write as if nothing got
1372 * written. A zero length write however indicates
1373 * ENOSPC or EFBIG. We have no way to know which
1374 * though, so call it ENOSPC for now. EFBIG would
1375 * get translated to AS_EIO anyway.
1377 * FIXME: make it take into account the data that did
1381 if (bytes_written == 0)
1383 else if (bytes_written < bytes_to_write)
1387 /* retry on data-integrity flush */
1388 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN)
1391 /* fix the stats and EOF */
1392 if (bytes_written > 0) {
1393 cifs_stats_bytes_written(tcon, bytes_written);
1394 cifs_update_eof(cifsi, offset, bytes_written);
1397 for (i = 0; i < n_iov; i++) {
1398 page = pvec.pages[first + i];
1399 /* on retryable write error, redirty page */
1401 redirty_page_for_writepage(wbc, page);
1406 end_page_writeback(page);
1407 page_cache_release(page);
1411 mapping_set_error(mapping, rc);
1415 if ((wbc->nr_to_write -= n_iov) <= 0)
1419 /* Need to re-find the pages we skipped */
1420 index = pvec.pages[0]->index + 1;
1422 pagevec_release(&pvec);
1424 if (!scanned && !done) {
1426 * We hit the last page and there is more work to be done: wrap
1427 * back to the start of the file
1433 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1434 mapping->writeback_index = index;
1441 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1447 /* BB add check for wbc flags */
1448 page_cache_get(page);
1449 if (!PageUptodate(page))
1450 cFYI(1, "ppw - page not up to date");
1453 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1455 * A writepage() implementation always needs to do either this,
1456 * or re-dirty the page with "redirty_page_for_writepage()" in
1457 * the case of a failure.
1459 * Just unlocking the page will cause the radix tree tag-bits
1460 * to fail to update with the state of the page correctly.
1462 set_page_writeback(page);
1463 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1464 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1466 end_page_writeback(page);
1467 page_cache_release(page);
1472 static int cifs_write_end(struct file *file, struct address_space *mapping,
1473 loff_t pos, unsigned len, unsigned copied,
1474 struct page *page, void *fsdata)
1477 struct inode *inode = mapping->host;
1479 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1482 if (PageChecked(page)) {
1484 SetPageUptodate(page);
1485 ClearPageChecked(page);
1486 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1487 SetPageUptodate(page);
1489 if (!PageUptodate(page)) {
1491 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1495 /* this is probably better than directly calling
1496 partialpage_write since in this function the file handle is
1497 known which we might as well leverage */
1498 /* BB check if anything else missing out of ppw
1499 such as updating last write time */
1500 page_data = kmap(page);
1501 rc = cifs_write(file->private_data, page_data + offset,
1503 /* if (rc < 0) should we set writebehind rc? */
1510 set_page_dirty(page);
1514 spin_lock(&inode->i_lock);
1515 if (pos > inode->i_size)
1516 i_size_write(inode, pos);
1517 spin_unlock(&inode->i_lock);
1521 page_cache_release(page);
1526 int cifs_strict_fsync(struct file *file, int datasync)
1530 struct cifsTconInfo *tcon;
1531 struct cifsFileInfo *smbfile = file->private_data;
1532 struct inode *inode = file->f_path.dentry->d_inode;
1533 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1537 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1538 file->f_path.dentry->d_name.name, datasync);
1540 if (!CIFS_I(inode)->clientCanCacheRead)
1541 cifs_invalidate_mapping(inode);
1543 tcon = tlink_tcon(smbfile->tlink);
1544 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1545 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1551 int cifs_fsync(struct file *file, int datasync)
1555 struct cifsTconInfo *tcon;
1556 struct cifsFileInfo *smbfile = file->private_data;
1557 struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1561 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1562 file->f_path.dentry->d_name.name, datasync);
1564 tcon = tlink_tcon(smbfile->tlink);
1565 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1566 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1572 /* static void cifs_sync_page(struct page *page)
1574 struct address_space *mapping;
1575 struct inode *inode;
1576 unsigned long index = page->index;
1577 unsigned int rpages = 0;
1580 cFYI(1, "sync page %p", page);
1581 mapping = page->mapping;
1584 inode = mapping->host;
1588 /* fill in rpages then
1589 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1591 /* cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1601 * As file closes, flush all cached write data for this inode checking
1602 * for write behind errors.
1604 int cifs_flush(struct file *file, fl_owner_t id)
1606 struct inode *inode = file->f_path.dentry->d_inode;
1609 if (file->f_mode & FMODE_WRITE)
1610 rc = filemap_write_and_wait(inode->i_mapping);
1612 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1618 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
1623 for (i = 0; i < num_pages; i++) {
1624 pages[i] = alloc_page(__GFP_HIGHMEM);
1627 * save number of pages we have already allocated and
1628 * return with ENOMEM error
1639 for (i = 0; i < num_pages; i++)
1645 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
1650 clen = min_t(const size_t, len, wsize);
1651 num_pages = clen / PAGE_CACHE_SIZE;
1652 if (clen % PAGE_CACHE_SIZE)
1662 cifs_iovec_write(struct file *file, const struct iovec *iov,
1663 unsigned long nr_segs, loff_t *poffset)
1665 size_t total_written = 0;
1666 unsigned int written = 0;
1667 unsigned long num_pages, npages;
1668 size_t copied, len, cur_len, i;
1669 struct kvec *to_send;
1670 struct page **pages;
1672 struct inode *inode;
1673 struct cifsFileInfo *open_file;
1674 struct cifsTconInfo *pTcon;
1675 struct cifs_sb_info *cifs_sb;
1678 len = iov_length(iov, nr_segs);
1682 rc = generic_write_checks(file, poffset, &len, 0);
1686 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1687 num_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
1689 pages = kmalloc(sizeof(struct pages *)*num_pages, GFP_KERNEL);
1693 to_send = kmalloc(sizeof(struct kvec)*(num_pages + 1), GFP_KERNEL);
1699 rc = cifs_write_allocate_pages(pages, num_pages);
1707 open_file = file->private_data;
1708 pTcon = tlink_tcon(open_file->tlink);
1709 inode = file->f_path.dentry->d_inode;
1711 iov_iter_init(&it, iov, nr_segs, len, 0);
1715 size_t save_len = cur_len;
1716 for (i = 0; i < npages; i++) {
1717 copied = min_t(const size_t, cur_len, PAGE_CACHE_SIZE);
1718 copied = iov_iter_copy_from_user(pages[i], &it, 0,
1721 iov_iter_advance(&it, copied);
1722 to_send[i+1].iov_base = kmap(pages[i]);
1723 to_send[i+1].iov_len = copied;
1726 cur_len = save_len - cur_len;
1729 if (open_file->invalidHandle) {
1730 rc = cifs_reopen_file(open_file, false);
1734 rc = CIFSSMBWrite2(xid, pTcon, open_file->netfid,
1735 cur_len, *poffset, &written,
1736 to_send, npages, 0);
1737 } while (rc == -EAGAIN);
1739 for (i = 0; i < npages; i++)
1744 total_written += written;
1745 cifs_update_eof(CIFS_I(inode), *poffset, written);
1746 *poffset += written;
1747 } else if (rc < 0) {
1753 /* get length and number of kvecs of the next write */
1754 npages = get_numpages(cifs_sb->wsize, len, &cur_len);
1757 if (total_written > 0) {
1758 spin_lock(&inode->i_lock);
1759 if (*poffset > inode->i_size)
1760 i_size_write(inode, *poffset);
1761 spin_unlock(&inode->i_lock);
1764 cifs_stats_bytes_written(pTcon, total_written);
1765 mark_inode_dirty_sync(inode);
1767 for (i = 0; i < num_pages; i++)
1772 return total_written;
1775 static ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
1776 unsigned long nr_segs, loff_t pos)
1779 struct inode *inode;
1781 inode = iocb->ki_filp->f_path.dentry->d_inode;
1784 * BB - optimize the way when signing is disabled. We can drop this
1785 * extra memory-to-memory copying and use iovec buffers for constructing
1789 written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
1791 CIFS_I(inode)->invalid_mapping = true;
1798 ssize_t cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
1799 unsigned long nr_segs, loff_t pos)
1801 struct inode *inode;
1803 inode = iocb->ki_filp->f_path.dentry->d_inode;
1805 if (CIFS_I(inode)->clientCanCacheAll)
1806 return generic_file_aio_write(iocb, iov, nr_segs, pos);
1809 * In strict cache mode we need to write the data to the server exactly
1810 * from the pos to pos+len-1 rather than flush all affected pages
1811 * because it may cause a error with mandatory locks on these pages but
1812 * not on the region from pos to ppos+len-1.
1815 return cifs_user_writev(iocb, iov, nr_segs, pos);
1819 cifs_iovec_read(struct file *file, const struct iovec *iov,
1820 unsigned long nr_segs, loff_t *poffset)
1824 unsigned int total_read, bytes_read = 0;
1825 size_t len, cur_len;
1827 struct cifs_sb_info *cifs_sb;
1828 struct cifsTconInfo *pTcon;
1829 struct cifsFileInfo *open_file;
1830 struct smb_com_read_rsp *pSMBr;
1836 len = iov_length(iov, nr_segs);
1841 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1843 open_file = file->private_data;
1844 pTcon = tlink_tcon(open_file->tlink);
1846 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1847 cFYI(1, "attempting read on write only file instance");
1849 for (total_read = 0; total_read < len; total_read += bytes_read) {
1850 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
1854 while (rc == -EAGAIN) {
1855 int buf_type = CIFS_NO_BUFFER;
1856 if (open_file->invalidHandle) {
1857 rc = cifs_reopen_file(open_file, true);
1861 rc = CIFSSMBRead(xid, pTcon, open_file->netfid,
1862 cur_len, *poffset, &bytes_read,
1863 &read_data, &buf_type);
1864 pSMBr = (struct smb_com_read_rsp *)read_data;
1866 char *data_offset = read_data + 4 +
1867 le16_to_cpu(pSMBr->DataOffset);
1868 if (memcpy_toiovecend(iov, data_offset,
1869 iov_offset, bytes_read))
1871 if (buf_type == CIFS_SMALL_BUFFER)
1872 cifs_small_buf_release(read_data);
1873 else if (buf_type == CIFS_LARGE_BUFFER)
1874 cifs_buf_release(read_data);
1876 iov_offset += bytes_read;
1880 if (rc || (bytes_read == 0)) {
1888 cifs_stats_bytes_read(pTcon, bytes_read);
1889 *poffset += bytes_read;
1897 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1898 size_t read_size, loff_t *poffset)
1901 iov.iov_base = read_data;
1902 iov.iov_len = read_size;
1904 return cifs_iovec_read(file, &iov, 1, poffset);
1907 static ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
1908 unsigned long nr_segs, loff_t pos)
1912 read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
1919 ssize_t cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
1920 unsigned long nr_segs, loff_t pos)
1922 struct inode *inode;
1924 inode = iocb->ki_filp->f_path.dentry->d_inode;
1926 if (CIFS_I(inode)->clientCanCacheRead)
1927 return generic_file_aio_read(iocb, iov, nr_segs, pos);
1930 * In strict cache mode we need to read from the server all the time
1931 * if we don't have level II oplock because the server can delay mtime
1932 * change - so we can't make a decision about inode invalidating.
1933 * And we can also fail with pagereading if there are mandatory locks
1934 * on pages affected by this read but not on the region from pos to
1938 return cifs_user_readv(iocb, iov, nr_segs, pos);
1941 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1945 unsigned int bytes_read = 0;
1946 unsigned int total_read;
1947 unsigned int current_read_size;
1948 struct cifs_sb_info *cifs_sb;
1949 struct cifsTconInfo *pTcon;
1951 char *current_offset;
1952 struct cifsFileInfo *open_file;
1953 int buf_type = CIFS_NO_BUFFER;
1956 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1958 if (file->private_data == NULL) {
1963 open_file = file->private_data;
1964 pTcon = tlink_tcon(open_file->tlink);
1966 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1967 cFYI(1, "attempting read on write only file instance");
1969 for (total_read = 0, current_offset = read_data;
1970 read_size > total_read;
1971 total_read += bytes_read, current_offset += bytes_read) {
1972 current_read_size = min_t(const int, read_size - total_read,
1974 /* For windows me and 9x we do not want to request more
1975 than it negotiated since it will refuse the read then */
1977 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1978 current_read_size = min_t(const int, current_read_size,
1979 pTcon->ses->server->maxBuf - 128);
1982 while (rc == -EAGAIN) {
1983 if (open_file->invalidHandle) {
1984 rc = cifs_reopen_file(open_file, true);
1988 rc = CIFSSMBRead(xid, pTcon,
1990 current_read_size, *poffset,
1991 &bytes_read, ¤t_offset,
1994 if (rc || (bytes_read == 0)) {
2002 cifs_stats_bytes_read(pTcon, total_read);
2003 *poffset += bytes_read;
2010 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
2013 struct inode *inode = file->f_path.dentry->d_inode;
2017 if (!CIFS_I(inode)->clientCanCacheRead)
2018 cifs_invalidate_mapping(inode);
2020 rc = generic_file_mmap(file, vma);
2025 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
2030 rc = cifs_revalidate_file(file);
2032 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
2036 rc = generic_file_mmap(file, vma);
2042 static void cifs_copy_cache_pages(struct address_space *mapping,
2043 struct list_head *pages, int bytes_read, char *data)
2048 while (bytes_read > 0) {
2049 if (list_empty(pages))
2052 page = list_entry(pages->prev, struct page, lru);
2053 list_del(&page->lru);
2055 if (add_to_page_cache_lru(page, mapping, page->index,
2057 page_cache_release(page);
2058 cFYI(1, "Add page cache failed");
2059 data += PAGE_CACHE_SIZE;
2060 bytes_read -= PAGE_CACHE_SIZE;
2063 page_cache_release(page);
2065 target = kmap_atomic(page, KM_USER0);
2067 if (PAGE_CACHE_SIZE > bytes_read) {
2068 memcpy(target, data, bytes_read);
2069 /* zero the tail end of this partial page */
2070 memset(target + bytes_read, 0,
2071 PAGE_CACHE_SIZE - bytes_read);
2074 memcpy(target, data, PAGE_CACHE_SIZE);
2075 bytes_read -= PAGE_CACHE_SIZE;
2077 kunmap_atomic(target, KM_USER0);
2079 flush_dcache_page(page);
2080 SetPageUptodate(page);
2082 data += PAGE_CACHE_SIZE;
2084 /* add page to FS-Cache */
2085 cifs_readpage_to_fscache(mapping->host, page);
2090 static int cifs_readpages(struct file *file, struct address_space *mapping,
2091 struct list_head *page_list, unsigned num_pages)
2097 struct cifs_sb_info *cifs_sb;
2098 struct cifsTconInfo *pTcon;
2099 unsigned int bytes_read = 0;
2100 unsigned int read_size, i;
2101 char *smb_read_data = NULL;
2102 struct smb_com_read_rsp *pSMBr;
2103 struct cifsFileInfo *open_file;
2104 int buf_type = CIFS_NO_BUFFER;
2107 if (file->private_data == NULL) {
2112 open_file = file->private_data;
2113 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2114 pTcon = tlink_tcon(open_file->tlink);
2117 * Reads as many pages as possible from fscache. Returns -ENOBUFS
2118 * immediately if the cookie is negative
2120 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
2125 cFYI(DBG2, "rpages: num pages %d", num_pages);
2126 for (i = 0; i < num_pages; ) {
2127 unsigned contig_pages;
2128 struct page *tmp_page;
2129 unsigned long expected_index;
2131 if (list_empty(page_list))
2134 page = list_entry(page_list->prev, struct page, lru);
2135 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2137 /* count adjacent pages that we will read into */
2140 list_entry(page_list->prev, struct page, lru)->index;
2141 list_for_each_entry_reverse(tmp_page, page_list, lru) {
2142 if (tmp_page->index == expected_index) {
2148 if (contig_pages + i > num_pages)
2149 contig_pages = num_pages - i;
2151 /* for reads over a certain size could initiate async
2154 read_size = contig_pages * PAGE_CACHE_SIZE;
2155 /* Read size needs to be in multiples of one page */
2156 read_size = min_t(const unsigned int, read_size,
2157 cifs_sb->rsize & PAGE_CACHE_MASK);
2158 cFYI(DBG2, "rpages: read size 0x%x contiguous pages %d",
2159 read_size, contig_pages);
2161 while (rc == -EAGAIN) {
2162 if (open_file->invalidHandle) {
2163 rc = cifs_reopen_file(open_file, true);
2168 rc = CIFSSMBRead(xid, pTcon,
2171 &bytes_read, &smb_read_data,
2173 /* BB more RC checks ? */
2174 if (rc == -EAGAIN) {
2175 if (smb_read_data) {
2176 if (buf_type == CIFS_SMALL_BUFFER)
2177 cifs_small_buf_release(smb_read_data);
2178 else if (buf_type == CIFS_LARGE_BUFFER)
2179 cifs_buf_release(smb_read_data);
2180 smb_read_data = NULL;
2184 if ((rc < 0) || (smb_read_data == NULL)) {
2185 cFYI(1, "Read error in readpages: %d", rc);
2187 } else if (bytes_read > 0) {
2188 task_io_account_read(bytes_read);
2189 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2190 cifs_copy_cache_pages(mapping, page_list, bytes_read,
2191 smb_read_data + 4 /* RFC1001 hdr */ +
2192 le16_to_cpu(pSMBr->DataOffset));
2194 i += bytes_read >> PAGE_CACHE_SHIFT;
2195 cifs_stats_bytes_read(pTcon, bytes_read);
2196 if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2197 i++; /* account for partial page */
2199 /* server copy of file can have smaller size
2201 /* BB do we need to verify this common case ?
2202 this case is ok - if we are at server EOF
2203 we will hit it on next read */
2208 cFYI(1, "No bytes read (%d) at offset %lld . "
2209 "Cleaning remaining pages from readahead list",
2210 bytes_read, offset);
2211 /* BB turn off caching and do new lookup on
2212 file size at server? */
2215 if (smb_read_data) {
2216 if (buf_type == CIFS_SMALL_BUFFER)
2217 cifs_small_buf_release(smb_read_data);
2218 else if (buf_type == CIFS_LARGE_BUFFER)
2219 cifs_buf_release(smb_read_data);
2220 smb_read_data = NULL;
2225 /* need to free smb_read_data buf before exit */
2226 if (smb_read_data) {
2227 if (buf_type == CIFS_SMALL_BUFFER)
2228 cifs_small_buf_release(smb_read_data);
2229 else if (buf_type == CIFS_LARGE_BUFFER)
2230 cifs_buf_release(smb_read_data);
2231 smb_read_data = NULL;
2239 static int cifs_readpage_worker(struct file *file, struct page *page,
2245 /* Is the page cached? */
2246 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
2250 page_cache_get(page);
2251 read_data = kmap(page);
2252 /* for reads over a certain size could initiate async read ahead */
2254 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2259 cFYI(1, "Bytes read %d", rc);
2261 file->f_path.dentry->d_inode->i_atime =
2262 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2264 if (PAGE_CACHE_SIZE > rc)
2265 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2267 flush_dcache_page(page);
2268 SetPageUptodate(page);
2270 /* send this page to the cache */
2271 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
2277 page_cache_release(page);
2283 static int cifs_readpage(struct file *file, struct page *page)
2285 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2291 if (file->private_data == NULL) {
2297 cFYI(1, "readpage %p at offset %d 0x%x\n",
2298 page, (int)offset, (int)offset);
2300 rc = cifs_readpage_worker(file, page, &offset);
2308 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2310 struct cifsFileInfo *open_file;
2312 spin_lock(&cifs_file_list_lock);
2313 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2314 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2315 spin_unlock(&cifs_file_list_lock);
2319 spin_unlock(&cifs_file_list_lock);
2323 /* We do not want to update the file size from server for inodes
2324 open for write - to avoid races with writepage extending
2325 the file - in the future we could consider allowing
2326 refreshing the inode only on increases in the file size
2327 but this is tricky to do without racing with writebehind
2328 page caching in the current Linux kernel design */
2329 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2334 if (is_inode_writable(cifsInode)) {
2335 /* This inode is open for write at least once */
2336 struct cifs_sb_info *cifs_sb;
2338 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2339 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2340 /* since no page cache to corrupt on directio
2341 we can change size safely */
2345 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2353 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2354 loff_t pos, unsigned len, unsigned flags,
2355 struct page **pagep, void **fsdata)
2357 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2358 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2359 loff_t page_start = pos & PAGE_MASK;
2364 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2366 page = grab_cache_page_write_begin(mapping, index, flags);
2372 if (PageUptodate(page))
2376 * If we write a full page it will be up to date, no need to read from
2377 * the server. If the write is short, we'll end up doing a sync write
2380 if (len == PAGE_CACHE_SIZE)
2384 * optimize away the read when we have an oplock, and we're not
2385 * expecting to use any of the data we'd be reading in. That
2386 * is, when the page lies beyond the EOF, or straddles the EOF
2387 * and the write will cover all of the existing data.
2389 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2390 i_size = i_size_read(mapping->host);
2391 if (page_start >= i_size ||
2392 (offset == 0 && (pos + len) >= i_size)) {
2393 zero_user_segments(page, 0, offset,
2397 * PageChecked means that the parts of the page
2398 * to which we're not writing are considered up
2399 * to date. Once the data is copied to the
2400 * page, it can be set uptodate.
2402 SetPageChecked(page);
2407 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2409 * might as well read a page, it is fast enough. If we get
2410 * an error, we don't need to return it. cifs_write_end will
2411 * do a sync write instead since PG_uptodate isn't set.
2413 cifs_readpage_worker(file, page, &page_start);
2415 /* we could try using another file handle if there is one -
2416 but how would we lock it to prevent close of that handle
2417 racing with this read? In any case
2418 this will be written out by write_end so is fine */
2425 static int cifs_release_page(struct page *page, gfp_t gfp)
2427 if (PagePrivate(page))
2430 return cifs_fscache_release_page(page, gfp);
2433 static void cifs_invalidate_page(struct page *page, unsigned long offset)
2435 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
2438 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2441 void cifs_oplock_break(struct work_struct *work)
2443 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2445 struct inode *inode = cfile->dentry->d_inode;
2446 struct cifsInodeInfo *cinode = CIFS_I(inode);
2449 if (inode && S_ISREG(inode->i_mode)) {
2450 if (cinode->clientCanCacheRead)
2451 break_lease(inode, O_RDONLY);
2453 break_lease(inode, O_WRONLY);
2454 rc = filemap_fdatawrite(inode->i_mapping);
2455 if (cinode->clientCanCacheRead == 0) {
2456 rc = filemap_fdatawait(inode->i_mapping);
2457 mapping_set_error(inode->i_mapping, rc);
2458 invalidate_remote_inode(inode);
2460 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2464 * releasing stale oplock after recent reconnect of smb session using
2465 * a now incorrect file handle is not a data integrity issue but do
2466 * not bother sending an oplock release if session to server still is
2467 * disconnected since oplock already released by the server
2469 if (!cfile->oplock_break_cancelled) {
2470 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, 0,
2471 0, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, false,
2472 cinode->clientCanCacheRead ? 1 : 0);
2473 cFYI(1, "Oplock release rc = %d", rc);
2477 * We might have kicked in before is_valid_oplock_break()
2478 * finished grabbing reference for us. Make sure it's done by
2479 * waiting for cifs_file_list_lock.
2481 spin_lock(&cifs_file_list_lock);
2482 spin_unlock(&cifs_file_list_lock);
2484 cifs_oplock_break_put(cfile);
2487 /* must be called while holding cifs_file_list_lock */
2488 void cifs_oplock_break_get(struct cifsFileInfo *cfile)
2490 cifs_sb_active(cfile->dentry->d_sb);
2491 cifsFileInfo_get(cfile);
2494 void cifs_oplock_break_put(struct cifsFileInfo *cfile)
2496 struct super_block *sb = cfile->dentry->d_sb;
2498 cifsFileInfo_put(cfile);
2499 cifs_sb_deactive(sb);
2502 const struct address_space_operations cifs_addr_ops = {
2503 .readpage = cifs_readpage,
2504 .readpages = cifs_readpages,
2505 .writepage = cifs_writepage,
2506 .writepages = cifs_writepages,
2507 .write_begin = cifs_write_begin,
2508 .write_end = cifs_write_end,
2509 .set_page_dirty = __set_page_dirty_nobuffers,
2510 .releasepage = cifs_release_page,
2511 .invalidatepage = cifs_invalidate_page,
2512 /* .sync_page = cifs_sync_page, */
2517 * cifs_readpages requires the server to support a buffer large enough to
2518 * contain the header plus one complete page of data. Otherwise, we need
2519 * to leave cifs_readpages out of the address space operations.
2521 const struct address_space_operations cifs_addr_ops_smallbuf = {
2522 .readpage = cifs_readpage,
2523 .writepage = cifs_writepage,
2524 .writepages = cifs_writepages,
2525 .write_begin = cifs_write_begin,
2526 .write_end = cifs_write_end,
2527 .set_page_dirty = __set_page_dirty_nobuffers,
2528 .releasepage = cifs_release_page,
2529 .invalidatepage = cifs_invalidate_page,
2530 /* .sync_page = cifs_sync_page, */