4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2010
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <asm/div64.h>
39 #include "cifsproto.h"
40 #include "cifs_unicode.h"
41 #include "cifs_debug.h"
42 #include "cifs_fs_sb.h"
45 static inline int cifs_convert_flags(unsigned int flags)
47 if ((flags & O_ACCMODE) == O_RDONLY)
49 else if ((flags & O_ACCMODE) == O_WRONLY)
51 else if ((flags & O_ACCMODE) == O_RDWR) {
52 /* GENERIC_ALL is too much permission to request
53 can cause unnecessary access denied on create */
54 /* return GENERIC_ALL; */
55 return (GENERIC_READ | GENERIC_WRITE);
58 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
59 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
63 static u32 cifs_posix_convert_flags(unsigned int flags)
67 if ((flags & O_ACCMODE) == O_RDONLY)
68 posix_flags = SMB_O_RDONLY;
69 else if ((flags & O_ACCMODE) == O_WRONLY)
70 posix_flags = SMB_O_WRONLY;
71 else if ((flags & O_ACCMODE) == O_RDWR)
72 posix_flags = SMB_O_RDWR;
75 posix_flags |= SMB_O_CREAT;
77 posix_flags |= SMB_O_EXCL;
79 posix_flags |= SMB_O_TRUNC;
80 /* be safe and imply O_SYNC for O_DSYNC */
82 posix_flags |= SMB_O_SYNC;
83 if (flags & O_DIRECTORY)
84 posix_flags |= SMB_O_DIRECTORY;
85 if (flags & O_NOFOLLOW)
86 posix_flags |= SMB_O_NOFOLLOW;
88 posix_flags |= SMB_O_DIRECT;
93 static inline int cifs_get_disposition(unsigned int flags)
95 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
97 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
98 return FILE_OVERWRITE_IF;
99 else if ((flags & O_CREAT) == O_CREAT)
101 else if ((flags & O_TRUNC) == O_TRUNC)
102 return FILE_OVERWRITE;
107 static inline int cifs_open_inode_helper(struct inode *inode,
108 struct cifsTconInfo *pTcon, __u32 oplock, FILE_ALL_INFO *buf,
109 char *full_path, int xid)
111 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
112 struct timespec temp;
115 if (pCifsInode->clientCanCacheRead) {
116 /* we have the inode open somewhere else
117 no need to discard cache data */
118 goto client_can_cache;
121 /* BB need same check in cifs_create too? */
122 /* if not oplocked, invalidate inode pages if mtime or file
124 temp = cifs_NTtimeToUnix(buf->LastWriteTime);
125 if (timespec_equal(&inode->i_mtime, &temp) &&
127 (loff_t)le64_to_cpu(buf->EndOfFile))) {
128 cFYI(1, "inode unchanged on server");
130 if (inode->i_mapping) {
131 /* BB no need to lock inode until after invalidate
132 since namei code should already have it locked? */
133 rc = filemap_write_and_wait(inode->i_mapping);
134 mapping_set_error(inode->i_mapping, rc);
136 cFYI(1, "invalidating remote inode since open detected it "
138 invalidate_remote_inode(inode);
143 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
146 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
149 cifs_set_oplock_level(inode, oplock);
154 int cifs_posix_open(char *full_path, struct inode **pinode,
155 struct super_block *sb, int mode, unsigned int f_flags,
156 __u32 *poplock, __u16 *pnetfid, int xid)
159 FILE_UNIX_BASIC_INFO *presp_data;
160 __u32 posix_flags = 0;
161 struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
162 struct cifs_fattr fattr;
163 struct tcon_link *tlink;
164 struct cifsTconInfo *tcon;
166 cFYI(1, "posix open %s", full_path);
168 presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
169 if (presp_data == NULL)
172 tlink = cifs_sb_tlink(cifs_sb);
178 tcon = tlink_tcon(tlink);
179 mode &= ~current_umask();
181 posix_flags = cifs_posix_convert_flags(f_flags);
182 rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
183 poplock, full_path, cifs_sb->local_nls,
184 cifs_sb->mnt_cifs_flags &
185 CIFS_MOUNT_MAP_SPECIAL_CHR);
186 cifs_put_tlink(tlink);
191 if (presp_data->Type == cpu_to_le32(-1))
192 goto posix_open_ret; /* open ok, caller does qpathinfo */
195 goto posix_open_ret; /* caller does not need info */
197 cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
199 /* get new inode and set it up */
200 if (*pinode == NULL) {
201 cifs_fill_uniqueid(sb, &fattr);
202 *pinode = cifs_iget(sb, &fattr);
208 cifs_fattr_to_inode(*pinode, &fattr);
216 struct cifsFileInfo *
217 cifs_new_fileinfo(__u16 fileHandle, struct file *file,
218 struct tcon_link *tlink, __u32 oplock)
220 struct dentry *dentry = file->f_path.dentry;
221 struct inode *inode = dentry->d_inode;
222 struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
223 struct cifsFileInfo *pCifsFile;
225 pCifsFile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
226 if (pCifsFile == NULL)
229 pCifsFile->count = 1;
230 pCifsFile->netfid = fileHandle;
231 pCifsFile->pid = current->tgid;
232 pCifsFile->uid = current_fsuid();
233 pCifsFile->dentry = dget(dentry);
234 pCifsFile->f_flags = file->f_flags;
235 pCifsFile->invalidHandle = false;
236 pCifsFile->tlink = cifs_get_tlink(tlink);
237 mutex_init(&pCifsFile->fh_mutex);
238 mutex_init(&pCifsFile->lock_mutex);
239 INIT_LIST_HEAD(&pCifsFile->llist);
240 INIT_WORK(&pCifsFile->oplock_break, cifs_oplock_break);
242 spin_lock(&cifs_file_list_lock);
243 list_add(&pCifsFile->tlist, &(tlink_tcon(tlink)->openFileList));
244 /* if readable file instance put first in list*/
245 if (file->f_mode & FMODE_READ)
246 list_add(&pCifsFile->flist, &pCifsInode->openFileList);
248 list_add_tail(&pCifsFile->flist, &pCifsInode->openFileList);
249 spin_unlock(&cifs_file_list_lock);
251 cifs_set_oplock_level(inode, oplock);
253 file->private_data = pCifsFile;
258 * Release a reference on the file private data. This may involve closing
259 * the filehandle out on the server. Must be called without holding
260 * cifs_file_list_lock.
262 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
264 struct inode *inode = cifs_file->dentry->d_inode;
265 struct cifsTconInfo *tcon = tlink_tcon(cifs_file->tlink);
266 struct cifsInodeInfo *cifsi = CIFS_I(inode);
267 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
268 struct cifsLockInfo *li, *tmp;
270 spin_lock(&cifs_file_list_lock);
271 if (--cifs_file->count > 0) {
272 spin_unlock(&cifs_file_list_lock);
276 /* remove it from the lists */
277 list_del(&cifs_file->flist);
278 list_del(&cifs_file->tlist);
280 if (list_empty(&cifsi->openFileList)) {
281 cFYI(1, "closing last open instance for inode %p",
282 cifs_file->dentry->d_inode);
283 cifs_set_oplock_level(inode, 0);
285 spin_unlock(&cifs_file_list_lock);
287 if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
291 rc = CIFSSMBClose(xid, tcon, cifs_file->netfid);
295 /* Delete any outstanding lock records. We'll lose them when the file
298 mutex_lock(&cifs_file->lock_mutex);
299 list_for_each_entry_safe(li, tmp, &cifs_file->llist, llist) {
300 list_del(&li->llist);
303 mutex_unlock(&cifs_file->lock_mutex);
305 cifs_put_tlink(cifs_file->tlink);
306 dput(cifs_file->dentry);
310 int cifs_open(struct inode *inode, struct file *file)
315 struct cifs_sb_info *cifs_sb;
316 struct cifsTconInfo *tcon;
317 struct tcon_link *tlink;
318 struct cifsFileInfo *pCifsFile = NULL;
319 struct cifsInodeInfo *pCifsInode;
320 char *full_path = NULL;
324 FILE_ALL_INFO *buf = NULL;
328 cifs_sb = CIFS_SB(inode->i_sb);
329 tlink = cifs_sb_tlink(cifs_sb);
332 return PTR_ERR(tlink);
334 tcon = tlink_tcon(tlink);
336 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
338 full_path = build_path_from_dentry(file->f_path.dentry);
339 if (full_path == NULL) {
344 cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
345 inode, file->f_flags, full_path);
352 if (!tcon->broken_posix_open && tcon->unix_ext &&
353 (tcon->ses->capabilities & CAP_UNIX) &&
354 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
355 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
356 /* can not refresh inode info since size could be stale */
357 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
358 cifs_sb->mnt_file_mode /* ignored */,
359 file->f_flags, &oplock, &netfid, xid);
361 cFYI(1, "posix open succeeded");
363 pCifsFile = cifs_new_fileinfo(netfid, file, tlink,
365 if (pCifsFile == NULL) {
366 CIFSSMBClose(xid, tcon, netfid);
370 cifs_fscache_set_inode_cookie(inode, file);
373 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
374 if (tcon->ses->serverNOS)
375 cERROR(1, "server %s of type %s returned"
376 " unexpected error on SMB posix open"
377 ", disabling posix open support."
378 " Check if server update available.",
379 tcon->ses->serverName,
380 tcon->ses->serverNOS);
381 tcon->broken_posix_open = true;
382 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
383 (rc != -EOPNOTSUPP)) /* path not found or net err */
385 /* else fallthrough to retry open the old way on network i/o
389 desiredAccess = cifs_convert_flags(file->f_flags);
391 /*********************************************************************
392 * open flag mapping table:
394 * POSIX Flag CIFS Disposition
395 * ---------- ----------------
396 * O_CREAT FILE_OPEN_IF
397 * O_CREAT | O_EXCL FILE_CREATE
398 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
399 * O_TRUNC FILE_OVERWRITE
400 * none of the above FILE_OPEN
402 * Note that there is not a direct match between disposition
403 * FILE_SUPERSEDE (ie create whether or not file exists although
404 * O_CREAT | O_TRUNC is similar but truncates the existing
405 * file rather than creating a new file as FILE_SUPERSEDE does
406 * (which uses the attributes / metadata passed in on open call)
408 *? O_SYNC is a reasonable match to CIFS writethrough flag
409 *? and the read write flags match reasonably. O_LARGEFILE
410 *? is irrelevant because largefile support is always used
411 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
412 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
413 *********************************************************************/
415 disposition = cifs_get_disposition(file->f_flags);
417 /* BB pass O_SYNC flag through on file attributes .. BB */
419 /* Also refresh inode by passing in file_info buf returned by SMBOpen
420 and calling get_inode_info with returned buf (at least helps
421 non-Unix server case) */
423 /* BB we can not do this if this is the second open of a file
424 and the first handle has writebehind data, we might be
425 able to simply do a filemap_fdatawrite/filemap_fdatawait first */
426 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
432 if (tcon->ses->capabilities & CAP_NT_SMBS)
433 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
434 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
435 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
436 & CIFS_MOUNT_MAP_SPECIAL_CHR);
438 rc = -EIO; /* no NT SMB support fall into legacy open below */
441 /* Old server, try legacy style OpenX */
442 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
443 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
444 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
445 & CIFS_MOUNT_MAP_SPECIAL_CHR);
448 cFYI(1, "cifs_open returned 0x%x", rc);
452 rc = cifs_open_inode_helper(inode, tcon, oplock, buf, full_path, xid);
456 pCifsFile = cifs_new_fileinfo(netfid, file, tlink, oplock);
457 if (pCifsFile == NULL) {
462 cifs_fscache_set_inode_cookie(inode, file);
464 if (oplock & CIFS_CREATE_ACTION) {
465 /* time to set mode which we can not set earlier due to
466 problems creating new read-only files */
467 if (tcon->unix_ext) {
468 struct cifs_unix_set_info_args args = {
469 .mode = inode->i_mode,
472 .ctime = NO_CHANGE_64,
473 .atime = NO_CHANGE_64,
474 .mtime = NO_CHANGE_64,
477 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
479 cifs_sb->mnt_cifs_flags &
480 CIFS_MOUNT_MAP_SPECIAL_CHR);
488 cifs_put_tlink(tlink);
492 /* Try to reacquire byte range locks that were released when session */
493 /* to server was lost */
494 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
498 /* BB list all locks open on this file and relock */
503 static int cifs_reopen_file(struct cifsFileInfo *pCifsFile, bool can_flush)
508 struct cifs_sb_info *cifs_sb;
509 struct cifsTconInfo *tcon;
510 struct cifsInodeInfo *pCifsInode;
512 char *full_path = NULL;
514 int disposition = FILE_OPEN;
518 mutex_lock(&pCifsFile->fh_mutex);
519 if (!pCifsFile->invalidHandle) {
520 mutex_unlock(&pCifsFile->fh_mutex);
526 inode = pCifsFile->dentry->d_inode;
527 cifs_sb = CIFS_SB(inode->i_sb);
528 tcon = tlink_tcon(pCifsFile->tlink);
530 /* can not grab rename sem here because various ops, including
531 those that already have the rename sem can end up causing writepage
532 to get called and if the server was down that means we end up here,
533 and we can never tell if the caller already has the rename_sem */
534 full_path = build_path_from_dentry(pCifsFile->dentry);
535 if (full_path == NULL) {
537 mutex_unlock(&pCifsFile->fh_mutex);
542 cFYI(1, "inode = 0x%p file flags 0x%x for %s",
543 inode, pCifsFile->f_flags, full_path);
550 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
551 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
552 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
555 * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
556 * original open. Must mask them off for a reopen.
558 unsigned int oflags = pCifsFile->f_flags &
559 ~(O_CREAT | O_EXCL | O_TRUNC);
561 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
562 cifs_sb->mnt_file_mode /* ignored */,
563 oflags, &oplock, &netfid, xid);
565 cFYI(1, "posix reopen succeeded");
568 /* fallthrough to retry open the old way on errors, especially
569 in the reconnect path it is important to retry hard */
572 desiredAccess = cifs_convert_flags(pCifsFile->f_flags);
574 /* Can not refresh inode by passing in file_info buf to be returned
575 by SMBOpen and then calling get_inode_info with returned buf
576 since file might have write behind data that needs to be flushed
577 and server version of file size can be stale. If we knew for sure
578 that inode was not dirty locally we could do this */
580 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
581 CREATE_NOT_DIR, &netfid, &oplock, NULL,
582 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
583 CIFS_MOUNT_MAP_SPECIAL_CHR);
585 mutex_unlock(&pCifsFile->fh_mutex);
586 cFYI(1, "cifs_open returned 0x%x", rc);
587 cFYI(1, "oplock: %d", oplock);
588 goto reopen_error_exit;
592 pCifsFile->netfid = netfid;
593 pCifsFile->invalidHandle = false;
594 mutex_unlock(&pCifsFile->fh_mutex);
595 pCifsInode = CIFS_I(inode);
598 rc = filemap_write_and_wait(inode->i_mapping);
599 mapping_set_error(inode->i_mapping, rc);
602 rc = cifs_get_inode_info_unix(&inode,
603 full_path, inode->i_sb, xid);
605 rc = cifs_get_inode_info(&inode,
606 full_path, NULL, inode->i_sb,
608 } /* else we are writing out data to server already
609 and could deadlock if we tried to flush data, and
610 since we do not know if we have data that would
611 invalidate the current end of file on the server
612 we can not go to the server to get the new inod
615 cifs_set_oplock_level(inode, oplock);
617 cifs_relock_file(pCifsFile);
625 int cifs_close(struct inode *inode, struct file *file)
627 cifsFileInfo_put(file->private_data);
628 file->private_data = NULL;
630 /* return code from the ->release op is always ignored */
634 int cifs_closedir(struct inode *inode, struct file *file)
638 struct cifsFileInfo *pCFileStruct = file->private_data;
641 cFYI(1, "Closedir inode = 0x%p", inode);
646 struct cifsTconInfo *pTcon = tlink_tcon(pCFileStruct->tlink);
648 cFYI(1, "Freeing private data in close dir");
649 spin_lock(&cifs_file_list_lock);
650 if (!pCFileStruct->srch_inf.endOfSearch &&
651 !pCFileStruct->invalidHandle) {
652 pCFileStruct->invalidHandle = true;
653 spin_unlock(&cifs_file_list_lock);
654 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
655 cFYI(1, "Closing uncompleted readdir with rc %d",
657 /* not much we can do if it fails anyway, ignore rc */
660 spin_unlock(&cifs_file_list_lock);
661 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
663 cFYI(1, "closedir free smb buf in srch struct");
664 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
665 if (pCFileStruct->srch_inf.smallBuf)
666 cifs_small_buf_release(ptmp);
668 cifs_buf_release(ptmp);
670 cifs_put_tlink(pCFileStruct->tlink);
671 kfree(file->private_data);
672 file->private_data = NULL;
674 /* BB can we lock the filestruct while this is going on? */
679 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
680 __u64 offset, __u8 lockType)
682 struct cifsLockInfo *li =
683 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
689 mutex_lock(&fid->lock_mutex);
690 list_add(&li->llist, &fid->llist);
691 mutex_unlock(&fid->lock_mutex);
695 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
701 bool wait_flag = false;
702 struct cifs_sb_info *cifs_sb;
703 struct cifsTconInfo *tcon;
705 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
706 bool posix_locking = 0;
708 length = 1 + pfLock->fl_end - pfLock->fl_start;
712 cFYI(1, "Lock parm: 0x%x flockflags: "
713 "0x%x flocktype: 0x%x start: %lld end: %lld",
714 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
717 if (pfLock->fl_flags & FL_POSIX)
719 if (pfLock->fl_flags & FL_FLOCK)
721 if (pfLock->fl_flags & FL_SLEEP) {
722 cFYI(1, "Blocking lock");
725 if (pfLock->fl_flags & FL_ACCESS)
726 cFYI(1, "Process suspended by mandatory locking - "
727 "not implemented yet");
728 if (pfLock->fl_flags & FL_LEASE)
729 cFYI(1, "Lease on file - not implemented yet");
730 if (pfLock->fl_flags &
731 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
732 cFYI(1, "Unknown lock flags 0x%x", pfLock->fl_flags);
734 if (pfLock->fl_type == F_WRLCK) {
737 } else if (pfLock->fl_type == F_UNLCK) {
740 /* Check if unlock includes more than
742 } else if (pfLock->fl_type == F_RDLCK) {
744 lockType |= LOCKING_ANDX_SHARED_LOCK;
746 } else if (pfLock->fl_type == F_EXLCK) {
749 } else if (pfLock->fl_type == F_SHLCK) {
751 lockType |= LOCKING_ANDX_SHARED_LOCK;
754 cFYI(1, "Unknown type of lock");
756 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
757 tcon = tlink_tcon(((struct cifsFileInfo *)file->private_data)->tlink);
759 if (file->private_data == NULL) {
764 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
766 if ((tcon->ses->capabilities & CAP_UNIX) &&
767 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
768 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
770 /* BB add code here to normalize offset and length to
771 account for negative length which we can not accept over the
776 if (lockType & LOCKING_ANDX_SHARED_LOCK)
777 posix_lock_type = CIFS_RDLCK;
779 posix_lock_type = CIFS_WRLCK;
780 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
782 posix_lock_type, wait_flag);
787 /* BB we could chain these into one lock request BB */
788 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
789 0, 1, lockType, 0 /* wait flag */ );
791 rc = CIFSSMBLock(xid, tcon, netfid, length,
792 pfLock->fl_start, 1 /* numUnlock */ ,
793 0 /* numLock */ , lockType,
795 pfLock->fl_type = F_UNLCK;
797 cERROR(1, "Error unlocking previously locked "
798 "range %d during test of lock", rc);
802 /* if rc == ERR_SHARING_VIOLATION ? */
805 if (lockType & LOCKING_ANDX_SHARED_LOCK) {
806 pfLock->fl_type = F_WRLCK;
808 rc = CIFSSMBLock(xid, tcon, netfid, length,
809 pfLock->fl_start, 0, 1,
810 lockType | LOCKING_ANDX_SHARED_LOCK,
813 rc = CIFSSMBLock(xid, tcon, netfid,
814 length, pfLock->fl_start, 1, 0,
816 LOCKING_ANDX_SHARED_LOCK,
818 pfLock->fl_type = F_RDLCK;
820 cERROR(1, "Error unlocking "
821 "previously locked range %d "
822 "during test of lock", rc);
825 pfLock->fl_type = F_WRLCK;
835 if (!numLock && !numUnlock) {
836 /* if no lock or unlock then nothing
837 to do since we do not know what it is */
844 if (lockType & LOCKING_ANDX_SHARED_LOCK)
845 posix_lock_type = CIFS_RDLCK;
847 posix_lock_type = CIFS_WRLCK;
850 posix_lock_type = CIFS_UNLCK;
852 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
854 posix_lock_type, wait_flag);
856 struct cifsFileInfo *fid = file->private_data;
859 rc = CIFSSMBLock(xid, tcon, netfid, length,
861 0, numLock, lockType, wait_flag);
864 /* For Windows locks we must store them. */
865 rc = store_file_lock(fid, length,
866 pfLock->fl_start, lockType);
868 } else if (numUnlock) {
869 /* For each stored lock that this unlock overlaps
870 completely, unlock it. */
872 struct cifsLockInfo *li, *tmp;
875 mutex_lock(&fid->lock_mutex);
876 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
877 if (pfLock->fl_start <= li->offset &&
878 (pfLock->fl_start + length) >=
879 (li->offset + li->length)) {
880 stored_rc = CIFSSMBLock(xid, tcon,
882 li->length, li->offset,
883 1, 0, li->type, false);
887 list_del(&li->llist);
892 mutex_unlock(&fid->lock_mutex);
896 if (pfLock->fl_flags & FL_POSIX)
897 posix_lock_file_wait(file, pfLock);
903 * Set the timeout on write requests past EOF. For some servers (Windows)
904 * these calls can be very long.
906 * If we're writing >10M past the EOF we give a 180s timeout. Anything less
907 * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
908 * The 10M cutoff is totally arbitrary. A better scheme for this would be
909 * welcome if someone wants to suggest one.
911 * We may be able to do a better job with this if there were some way to
912 * declare that a file should be sparse.
915 cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
917 if (offset <= cifsi->server_eof)
919 else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
920 return CIFS_VLONG_OP;
925 /* update the file size (if needed) after a write */
927 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
928 unsigned int bytes_written)
930 loff_t end_of_write = offset + bytes_written;
932 if (end_of_write > cifsi->server_eof)
933 cifsi->server_eof = end_of_write;
936 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
937 size_t write_size, loff_t *poffset)
939 struct inode *inode = file->f_path.dentry->d_inode;
941 unsigned int bytes_written = 0;
942 unsigned int total_written;
943 struct cifs_sb_info *cifs_sb;
944 struct cifsTconInfo *pTcon;
946 struct cifsFileInfo *open_file;
947 struct cifsInodeInfo *cifsi = CIFS_I(inode);
949 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
951 /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
952 *poffset, file->f_path.dentry->d_name.name); */
954 if (file->private_data == NULL)
957 open_file = file->private_data;
958 pTcon = tlink_tcon(open_file->tlink);
960 rc = generic_write_checks(file, poffset, &write_size, 0);
966 long_op = cifs_write_timeout(cifsi, *poffset);
967 for (total_written = 0; write_size > total_written;
968 total_written += bytes_written) {
970 while (rc == -EAGAIN) {
971 if (file->private_data == NULL) {
972 /* file has been closed on us */
974 /* if we have gotten here we have written some data
975 and blocked, and the file has been freed on us while
976 we blocked so return what we managed to write */
977 return total_written;
979 if (open_file->invalidHandle) {
980 /* we could deadlock if we called
981 filemap_fdatawait from here so tell
982 reopen_file not to flush data to server
984 rc = cifs_reopen_file(open_file, false);
989 rc = CIFSSMBWrite(xid, pTcon,
991 min_t(const int, cifs_sb->wsize,
992 write_size - total_written),
993 *poffset, &bytes_written,
994 NULL, write_data + total_written, long_op);
996 if (rc || (bytes_written == 0)) {
1004 cifs_update_eof(cifsi, *poffset, bytes_written);
1005 *poffset += bytes_written;
1007 long_op = CIFS_STD_OP; /* subsequent writes fast -
1008 15 seconds is plenty */
1011 cifs_stats_bytes_written(pTcon, total_written);
1013 /* Do not update local mtime - server will set its actual value on write
1014 * inode->i_ctime = inode->i_mtime =
1015 * current_fs_time(inode->i_sb);*/
1016 if (total_written > 0) {
1017 spin_lock(&inode->i_lock);
1018 if (*poffset > inode->i_size)
1019 i_size_write(inode, *poffset);
1020 spin_unlock(&inode->i_lock);
1022 mark_inode_dirty_sync(inode);
1025 return total_written;
1028 static ssize_t cifs_write(struct cifsFileInfo *open_file,
1029 const char *write_data, size_t write_size,
1033 unsigned int bytes_written = 0;
1034 unsigned int total_written;
1035 struct cifs_sb_info *cifs_sb;
1036 struct cifsTconInfo *pTcon;
1038 struct dentry *dentry = open_file->dentry;
1039 struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1041 cifs_sb = CIFS_SB(dentry->d_sb);
1043 cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1044 *poffset, dentry->d_name.name);
1046 pTcon = tlink_tcon(open_file->tlink);
1050 long_op = cifs_write_timeout(cifsi, *poffset);
1051 for (total_written = 0; write_size > total_written;
1052 total_written += bytes_written) {
1054 while (rc == -EAGAIN) {
1055 if (open_file->invalidHandle) {
1056 /* we could deadlock if we called
1057 filemap_fdatawait from here so tell
1058 reopen_file not to flush data to
1060 rc = cifs_reopen_file(open_file, false);
1064 if (experimEnabled || (pTcon->ses->server &&
1065 ((pTcon->ses->server->secMode &
1066 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1071 len = min((size_t)cifs_sb->wsize,
1072 write_size - total_written);
1073 /* iov[0] is reserved for smb header */
1074 iov[1].iov_base = (char *)write_data +
1076 iov[1].iov_len = len;
1077 rc = CIFSSMBWrite2(xid, pTcon,
1078 open_file->netfid, len,
1079 *poffset, &bytes_written,
1082 rc = CIFSSMBWrite(xid, pTcon,
1084 min_t(const int, cifs_sb->wsize,
1085 write_size - total_written),
1086 *poffset, &bytes_written,
1087 write_data + total_written,
1090 if (rc || (bytes_written == 0)) {
1098 cifs_update_eof(cifsi, *poffset, bytes_written);
1099 *poffset += bytes_written;
1101 long_op = CIFS_STD_OP; /* subsequent writes fast -
1102 15 seconds is plenty */
1105 cifs_stats_bytes_written(pTcon, total_written);
1107 if (total_written > 0) {
1108 spin_lock(&dentry->d_inode->i_lock);
1109 if (*poffset > dentry->d_inode->i_size)
1110 i_size_write(dentry->d_inode, *poffset);
1111 spin_unlock(&dentry->d_inode->i_lock);
1113 mark_inode_dirty_sync(dentry->d_inode);
1115 return total_written;
1118 #ifdef CONFIG_CIFS_EXPERIMENTAL
1119 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1122 struct cifsFileInfo *open_file = NULL;
1123 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1125 /* only filter by fsuid on multiuser mounts */
1126 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1129 spin_lock(&cifs_file_list_lock);
1130 /* we could simply get the first_list_entry since write-only entries
1131 are always at the end of the list but since the first entry might
1132 have a close pending, we go through the whole list */
1133 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1134 if (fsuid_only && open_file->uid != current_fsuid())
1136 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1137 if (!open_file->invalidHandle) {
1138 /* found a good file */
1139 /* lock it so it will not be closed on us */
1140 cifsFileInfo_get(open_file);
1141 spin_unlock(&cifs_file_list_lock);
1143 } /* else might as well continue, and look for
1144 another, or simply have the caller reopen it
1145 again rather than trying to fix this handle */
1146 } else /* write only file */
1147 break; /* write only files are last so must be done */
1149 spin_unlock(&cifs_file_list_lock);
1154 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1157 struct cifsFileInfo *open_file;
1158 struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1159 bool any_available = false;
1162 /* Having a null inode here (because mapping->host was set to zero by
1163 the VFS or MM) should not happen but we had reports of on oops (due to
1164 it being zero) during stress testcases so we need to check for it */
1166 if (cifs_inode == NULL) {
1167 cERROR(1, "Null inode passed to cifs_writeable_file");
1172 /* only filter by fsuid on multiuser mounts */
1173 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1176 spin_lock(&cifs_file_list_lock);
1178 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1179 if (!any_available && open_file->pid != current->tgid)
1181 if (fsuid_only && open_file->uid != current_fsuid())
1183 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1184 cifsFileInfo_get(open_file);
1186 if (!open_file->invalidHandle) {
1187 /* found a good writable file */
1188 spin_unlock(&cifs_file_list_lock);
1192 spin_unlock(&cifs_file_list_lock);
1194 /* Had to unlock since following call can block */
1195 rc = cifs_reopen_file(open_file, false);
1199 /* if it fails, try another handle if possible */
1200 cFYI(1, "wp failed on reopen file");
1201 cifsFileInfo_put(open_file);
1203 spin_lock(&cifs_file_list_lock);
1205 /* else we simply continue to the next entry. Thus
1206 we do not loop on reopen errors. If we
1207 can not reopen the file, for example if we
1208 reconnected to a server with another client
1209 racing to delete or lock the file we would not
1210 make progress if we restarted before the beginning
1211 of the loop here. */
1214 /* couldn't find useable FH with same pid, try any available */
1215 if (!any_available) {
1216 any_available = true;
1217 goto refind_writable;
1219 spin_unlock(&cifs_file_list_lock);
1223 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1225 struct address_space *mapping = page->mapping;
1226 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1229 int bytes_written = 0;
1230 struct cifs_sb_info *cifs_sb;
1231 struct inode *inode;
1232 struct cifsFileInfo *open_file;
1234 if (!mapping || !mapping->host)
1237 inode = page->mapping->host;
1238 cifs_sb = CIFS_SB(inode->i_sb);
1240 offset += (loff_t)from;
1241 write_data = kmap(page);
1244 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1249 /* racing with truncate? */
1250 if (offset > mapping->host->i_size) {
1252 return 0; /* don't care */
1255 /* check to make sure that we are not extending the file */
1256 if (mapping->host->i_size - offset < (loff_t)to)
1257 to = (unsigned)(mapping->host->i_size - offset);
1259 open_file = find_writable_file(CIFS_I(mapping->host), false);
1261 bytes_written = cifs_write(open_file, write_data,
1262 to - from, &offset);
1263 cifsFileInfo_put(open_file);
1264 /* Does mm or vfs already set times? */
1265 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1266 if ((bytes_written > 0) && (offset))
1268 else if (bytes_written < 0)
1271 cFYI(1, "No writeable filehandles for inode");
1279 static int cifs_writepages(struct address_space *mapping,
1280 struct writeback_control *wbc)
1282 unsigned int bytes_to_write;
1283 unsigned int bytes_written;
1284 struct cifs_sb_info *cifs_sb;
1288 int range_whole = 0;
1295 struct cifsFileInfo *open_file;
1296 struct cifsTconInfo *tcon;
1297 struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1299 struct pagevec pvec;
1304 cifs_sb = CIFS_SB(mapping->host->i_sb);
1307 * If wsize is smaller that the page cache size, default to writing
1308 * one page at a time via cifs_writepage
1310 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1311 return generic_writepages(mapping, wbc);
1313 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1315 return generic_writepages(mapping, wbc);
1318 * if there's no open file, then this is likely to fail too,
1319 * but it'll at least handle the return. Maybe it should be
1322 open_file = find_writable_file(CIFS_I(mapping->host), false);
1325 return generic_writepages(mapping, wbc);
1328 tcon = tlink_tcon(open_file->tlink);
1329 if (!experimEnabled && tcon->ses->server->secMode &
1330 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1331 cifsFileInfo_put(open_file);
1333 return generic_writepages(mapping, wbc);
1335 cifsFileInfo_put(open_file);
1339 pagevec_init(&pvec, 0);
1340 if (wbc->range_cyclic) {
1341 index = mapping->writeback_index; /* Start from prev offset */
1344 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1345 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1346 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1351 while (!done && (index <= end) &&
1352 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1353 PAGECACHE_TAG_DIRTY,
1354 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1363 for (i = 0; i < nr_pages; i++) {
1364 page = pvec.pages[i];
1366 * At this point we hold neither mapping->tree_lock nor
1367 * lock on the page itself: the page may be truncated or
1368 * invalidated (changing page->mapping to NULL), or even
1369 * swizzled back from swapper_space to tmpfs file
1375 else if (!trylock_page(page))
1378 if (unlikely(page->mapping != mapping)) {
1383 if (!wbc->range_cyclic && page->index > end) {
1389 if (next && (page->index != next)) {
1390 /* Not next consecutive page */
1395 if (wbc->sync_mode != WB_SYNC_NONE)
1396 wait_on_page_writeback(page);
1398 if (PageWriteback(page) ||
1399 !clear_page_dirty_for_io(page)) {
1405 * This actually clears the dirty bit in the radix tree.
1406 * See cifs_writepage() for more commentary.
1408 set_page_writeback(page);
1410 if (page_offset(page) >= mapping->host->i_size) {
1413 end_page_writeback(page);
1418 * BB can we get rid of this? pages are held by pvec
1420 page_cache_get(page);
1422 len = min(mapping->host->i_size - page_offset(page),
1423 (loff_t)PAGE_CACHE_SIZE);
1425 /* reserve iov[0] for the smb header */
1427 iov[n_iov].iov_base = kmap(page);
1428 iov[n_iov].iov_len = len;
1429 bytes_to_write += len;
1433 offset = page_offset(page);
1435 next = page->index + 1;
1436 if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1440 open_file = find_writable_file(CIFS_I(mapping->host),
1443 cERROR(1, "No writable handles for inode");
1446 long_op = cifs_write_timeout(cifsi, offset);
1447 rc = CIFSSMBWrite2(xid, tcon, open_file->netfid,
1448 bytes_to_write, offset,
1449 &bytes_written, iov, n_iov,
1451 cifsFileInfo_put(open_file);
1452 cifs_update_eof(cifsi, offset, bytes_written);
1455 if (rc || bytes_written < bytes_to_write) {
1456 cERROR(1, "Write2 ret %d, wrote %d",
1458 mapping_set_error(mapping, rc);
1460 cifs_stats_bytes_written(tcon, bytes_written);
1463 for (i = 0; i < n_iov; i++) {
1464 page = pvec.pages[first + i];
1465 /* Should we also set page error on
1466 success rc but too little data written? */
1467 /* BB investigate retry logic on temporary
1468 server crash cases and how recovery works
1469 when page marked as error */
1474 end_page_writeback(page);
1475 page_cache_release(page);
1477 if ((wbc->nr_to_write -= n_iov) <= 0)
1481 /* Need to re-find the pages we skipped */
1482 index = pvec.pages[0]->index + 1;
1484 pagevec_release(&pvec);
1486 if (!scanned && !done) {
1488 * We hit the last page and there is more work to be done: wrap
1489 * back to the start of the file
1495 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1496 mapping->writeback_index = index;
1503 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1509 /* BB add check for wbc flags */
1510 page_cache_get(page);
1511 if (!PageUptodate(page))
1512 cFYI(1, "ppw - page not up to date");
1515 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1517 * A writepage() implementation always needs to do either this,
1518 * or re-dirty the page with "redirty_page_for_writepage()" in
1519 * the case of a failure.
1521 * Just unlocking the page will cause the radix tree tag-bits
1522 * to fail to update with the state of the page correctly.
1524 set_page_writeback(page);
1525 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1526 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1528 end_page_writeback(page);
1529 page_cache_release(page);
1534 static int cifs_write_end(struct file *file, struct address_space *mapping,
1535 loff_t pos, unsigned len, unsigned copied,
1536 struct page *page, void *fsdata)
1539 struct inode *inode = mapping->host;
1541 cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1544 if (PageChecked(page)) {
1546 SetPageUptodate(page);
1547 ClearPageChecked(page);
1548 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1549 SetPageUptodate(page);
1551 if (!PageUptodate(page)) {
1553 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1557 /* this is probably better than directly calling
1558 partialpage_write since in this function the file handle is
1559 known which we might as well leverage */
1560 /* BB check if anything else missing out of ppw
1561 such as updating last write time */
1562 page_data = kmap(page);
1563 rc = cifs_write(file->private_data, page_data + offset,
1565 /* if (rc < 0) should we set writebehind rc? */
1572 set_page_dirty(page);
1576 spin_lock(&inode->i_lock);
1577 if (pos > inode->i_size)
1578 i_size_write(inode, pos);
1579 spin_unlock(&inode->i_lock);
1583 page_cache_release(page);
1588 int cifs_fsync(struct file *file, int datasync)
1592 struct cifsTconInfo *tcon;
1593 struct cifsFileInfo *smbfile = file->private_data;
1594 struct inode *inode = file->f_path.dentry->d_inode;
1598 cFYI(1, "Sync file - name: %s datasync: 0x%x",
1599 file->f_path.dentry->d_name.name, datasync);
1601 rc = filemap_write_and_wait(inode->i_mapping);
1603 struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
1605 tcon = tlink_tcon(smbfile->tlink);
1606 if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1607 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1614 /* static void cifs_sync_page(struct page *page)
1616 struct address_space *mapping;
1617 struct inode *inode;
1618 unsigned long index = page->index;
1619 unsigned int rpages = 0;
1622 cFYI(1, "sync page %p", page);
1623 mapping = page->mapping;
1626 inode = mapping->host;
1630 /* fill in rpages then
1631 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1633 /* cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1643 * As file closes, flush all cached write data for this inode checking
1644 * for write behind errors.
1646 int cifs_flush(struct file *file, fl_owner_t id)
1648 struct inode *inode = file->f_path.dentry->d_inode;
1651 if (file->f_mode & FMODE_WRITE)
1652 rc = filemap_write_and_wait(inode->i_mapping);
1654 cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1659 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1660 size_t read_size, loff_t *poffset)
1663 unsigned int bytes_read = 0;
1664 unsigned int total_read = 0;
1665 unsigned int current_read_size;
1666 struct cifs_sb_info *cifs_sb;
1667 struct cifsTconInfo *pTcon;
1669 struct cifsFileInfo *open_file;
1670 char *smb_read_data;
1671 char __user *current_offset;
1672 struct smb_com_read_rsp *pSMBr;
1675 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1677 if (file->private_data == NULL) {
1682 open_file = file->private_data;
1683 pTcon = tlink_tcon(open_file->tlink);
1685 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1686 cFYI(1, "attempting read on write only file instance");
1688 for (total_read = 0, current_offset = read_data;
1689 read_size > total_read;
1690 total_read += bytes_read, current_offset += bytes_read) {
1691 current_read_size = min_t(const int, read_size - total_read,
1694 smb_read_data = NULL;
1695 while (rc == -EAGAIN) {
1696 int buf_type = CIFS_NO_BUFFER;
1697 if (open_file->invalidHandle) {
1698 rc = cifs_reopen_file(open_file, true);
1702 rc = CIFSSMBRead(xid, pTcon,
1704 current_read_size, *poffset,
1705 &bytes_read, &smb_read_data,
1707 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1708 if (smb_read_data) {
1709 if (copy_to_user(current_offset,
1711 4 /* RFC1001 length field */ +
1712 le16_to_cpu(pSMBr->DataOffset),
1716 if (buf_type == CIFS_SMALL_BUFFER)
1717 cifs_small_buf_release(smb_read_data);
1718 else if (buf_type == CIFS_LARGE_BUFFER)
1719 cifs_buf_release(smb_read_data);
1720 smb_read_data = NULL;
1723 if (rc || (bytes_read == 0)) {
1731 cifs_stats_bytes_read(pTcon, bytes_read);
1732 *poffset += bytes_read;
1740 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1744 unsigned int bytes_read = 0;
1745 unsigned int total_read;
1746 unsigned int current_read_size;
1747 struct cifs_sb_info *cifs_sb;
1748 struct cifsTconInfo *pTcon;
1750 char *current_offset;
1751 struct cifsFileInfo *open_file;
1752 int buf_type = CIFS_NO_BUFFER;
1755 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1757 if (file->private_data == NULL) {
1762 open_file = file->private_data;
1763 pTcon = tlink_tcon(open_file->tlink);
1765 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1766 cFYI(1, "attempting read on write only file instance");
1768 for (total_read = 0, current_offset = read_data;
1769 read_size > total_read;
1770 total_read += bytes_read, current_offset += bytes_read) {
1771 current_read_size = min_t(const int, read_size - total_read,
1773 /* For windows me and 9x we do not want to request more
1774 than it negotiated since it will refuse the read then */
1776 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1777 current_read_size = min_t(const int, current_read_size,
1778 pTcon->ses->server->maxBuf - 128);
1781 while (rc == -EAGAIN) {
1782 if (open_file->invalidHandle) {
1783 rc = cifs_reopen_file(open_file, true);
1787 rc = CIFSSMBRead(xid, pTcon,
1789 current_read_size, *poffset,
1790 &bytes_read, ¤t_offset,
1793 if (rc || (bytes_read == 0)) {
1801 cifs_stats_bytes_read(pTcon, total_read);
1802 *poffset += bytes_read;
1809 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1814 rc = cifs_revalidate_file(file);
1816 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
1820 rc = generic_file_mmap(file, vma);
1826 static void cifs_copy_cache_pages(struct address_space *mapping,
1827 struct list_head *pages, int bytes_read, char *data)
1832 while (bytes_read > 0) {
1833 if (list_empty(pages))
1836 page = list_entry(pages->prev, struct page, lru);
1837 list_del(&page->lru);
1839 if (add_to_page_cache_lru(page, mapping, page->index,
1841 page_cache_release(page);
1842 cFYI(1, "Add page cache failed");
1843 data += PAGE_CACHE_SIZE;
1844 bytes_read -= PAGE_CACHE_SIZE;
1847 page_cache_release(page);
1849 target = kmap_atomic(page, KM_USER0);
1851 if (PAGE_CACHE_SIZE > bytes_read) {
1852 memcpy(target, data, bytes_read);
1853 /* zero the tail end of this partial page */
1854 memset(target + bytes_read, 0,
1855 PAGE_CACHE_SIZE - bytes_read);
1858 memcpy(target, data, PAGE_CACHE_SIZE);
1859 bytes_read -= PAGE_CACHE_SIZE;
1861 kunmap_atomic(target, KM_USER0);
1863 flush_dcache_page(page);
1864 SetPageUptodate(page);
1866 data += PAGE_CACHE_SIZE;
1868 /* add page to FS-Cache */
1869 cifs_readpage_to_fscache(mapping->host, page);
1874 static int cifs_readpages(struct file *file, struct address_space *mapping,
1875 struct list_head *page_list, unsigned num_pages)
1881 struct cifs_sb_info *cifs_sb;
1882 struct cifsTconInfo *pTcon;
1883 unsigned int bytes_read = 0;
1884 unsigned int read_size, i;
1885 char *smb_read_data = NULL;
1886 struct smb_com_read_rsp *pSMBr;
1887 struct cifsFileInfo *open_file;
1888 int buf_type = CIFS_NO_BUFFER;
1891 if (file->private_data == NULL) {
1896 open_file = file->private_data;
1897 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1898 pTcon = tlink_tcon(open_file->tlink);
1901 * Reads as many pages as possible from fscache. Returns -ENOBUFS
1902 * immediately if the cookie is negative
1904 rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
1909 cFYI(DBG2, "rpages: num pages %d", num_pages);
1910 for (i = 0; i < num_pages; ) {
1911 unsigned contig_pages;
1912 struct page *tmp_page;
1913 unsigned long expected_index;
1915 if (list_empty(page_list))
1918 page = list_entry(page_list->prev, struct page, lru);
1919 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1921 /* count adjacent pages that we will read into */
1924 list_entry(page_list->prev, struct page, lru)->index;
1925 list_for_each_entry_reverse(tmp_page, page_list, lru) {
1926 if (tmp_page->index == expected_index) {
1932 if (contig_pages + i > num_pages)
1933 contig_pages = num_pages - i;
1935 /* for reads over a certain size could initiate async
1938 read_size = contig_pages * PAGE_CACHE_SIZE;
1939 /* Read size needs to be in multiples of one page */
1940 read_size = min_t(const unsigned int, read_size,
1941 cifs_sb->rsize & PAGE_CACHE_MASK);
1942 cFYI(DBG2, "rpages: read size 0x%x contiguous pages %d",
1943 read_size, contig_pages);
1945 while (rc == -EAGAIN) {
1946 if (open_file->invalidHandle) {
1947 rc = cifs_reopen_file(open_file, true);
1952 rc = CIFSSMBRead(xid, pTcon,
1955 &bytes_read, &smb_read_data,
1957 /* BB more RC checks ? */
1958 if (rc == -EAGAIN) {
1959 if (smb_read_data) {
1960 if (buf_type == CIFS_SMALL_BUFFER)
1961 cifs_small_buf_release(smb_read_data);
1962 else if (buf_type == CIFS_LARGE_BUFFER)
1963 cifs_buf_release(smb_read_data);
1964 smb_read_data = NULL;
1968 if ((rc < 0) || (smb_read_data == NULL)) {
1969 cFYI(1, "Read error in readpages: %d", rc);
1971 } else if (bytes_read > 0) {
1972 task_io_account_read(bytes_read);
1973 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1974 cifs_copy_cache_pages(mapping, page_list, bytes_read,
1975 smb_read_data + 4 /* RFC1001 hdr */ +
1976 le16_to_cpu(pSMBr->DataOffset));
1978 i += bytes_read >> PAGE_CACHE_SHIFT;
1979 cifs_stats_bytes_read(pTcon, bytes_read);
1980 if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
1981 i++; /* account for partial page */
1983 /* server copy of file can have smaller size
1985 /* BB do we need to verify this common case ?
1986 this case is ok - if we are at server EOF
1987 we will hit it on next read */
1992 cFYI(1, "No bytes read (%d) at offset %lld . "
1993 "Cleaning remaining pages from readahead list",
1994 bytes_read, offset);
1995 /* BB turn off caching and do new lookup on
1996 file size at server? */
1999 if (smb_read_data) {
2000 if (buf_type == CIFS_SMALL_BUFFER)
2001 cifs_small_buf_release(smb_read_data);
2002 else if (buf_type == CIFS_LARGE_BUFFER)
2003 cifs_buf_release(smb_read_data);
2004 smb_read_data = NULL;
2009 /* need to free smb_read_data buf before exit */
2010 if (smb_read_data) {
2011 if (buf_type == CIFS_SMALL_BUFFER)
2012 cifs_small_buf_release(smb_read_data);
2013 else if (buf_type == CIFS_LARGE_BUFFER)
2014 cifs_buf_release(smb_read_data);
2015 smb_read_data = NULL;
2023 static int cifs_readpage_worker(struct file *file, struct page *page,
2029 /* Is the page cached? */
2030 rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
2034 page_cache_get(page);
2035 read_data = kmap(page);
2036 /* for reads over a certain size could initiate async read ahead */
2038 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2043 cFYI(1, "Bytes read %d", rc);
2045 file->f_path.dentry->d_inode->i_atime =
2046 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2048 if (PAGE_CACHE_SIZE > rc)
2049 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2051 flush_dcache_page(page);
2052 SetPageUptodate(page);
2054 /* send this page to the cache */
2055 cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
2061 page_cache_release(page);
2067 static int cifs_readpage(struct file *file, struct page *page)
2069 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2075 if (file->private_data == NULL) {
2081 cFYI(1, "readpage %p at offset %d 0x%x\n",
2082 page, (int)offset, (int)offset);
2084 rc = cifs_readpage_worker(file, page, &offset);
2092 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2094 struct cifsFileInfo *open_file;
2096 spin_lock(&cifs_file_list_lock);
2097 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2098 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
2099 spin_unlock(&cifs_file_list_lock);
2103 spin_unlock(&cifs_file_list_lock);
2107 /* We do not want to update the file size from server for inodes
2108 open for write - to avoid races with writepage extending
2109 the file - in the future we could consider allowing
2110 refreshing the inode only on increases in the file size
2111 but this is tricky to do without racing with writebehind
2112 page caching in the current Linux kernel design */
2113 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2118 if (is_inode_writable(cifsInode)) {
2119 /* This inode is open for write at least once */
2120 struct cifs_sb_info *cifs_sb;
2122 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2123 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2124 /* since no page cache to corrupt on directio
2125 we can change size safely */
2129 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2137 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2138 loff_t pos, unsigned len, unsigned flags,
2139 struct page **pagep, void **fsdata)
2141 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2142 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2143 loff_t page_start = pos & PAGE_MASK;
2148 cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2150 page = grab_cache_page_write_begin(mapping, index, flags);
2156 if (PageUptodate(page))
2160 * If we write a full page it will be up to date, no need to read from
2161 * the server. If the write is short, we'll end up doing a sync write
2164 if (len == PAGE_CACHE_SIZE)
2168 * optimize away the read when we have an oplock, and we're not
2169 * expecting to use any of the data we'd be reading in. That
2170 * is, when the page lies beyond the EOF, or straddles the EOF
2171 * and the write will cover all of the existing data.
2173 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2174 i_size = i_size_read(mapping->host);
2175 if (page_start >= i_size ||
2176 (offset == 0 && (pos + len) >= i_size)) {
2177 zero_user_segments(page, 0, offset,
2181 * PageChecked means that the parts of the page
2182 * to which we're not writing are considered up
2183 * to date. Once the data is copied to the
2184 * page, it can be set uptodate.
2186 SetPageChecked(page);
2191 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2193 * might as well read a page, it is fast enough. If we get
2194 * an error, we don't need to return it. cifs_write_end will
2195 * do a sync write instead since PG_uptodate isn't set.
2197 cifs_readpage_worker(file, page, &page_start);
2199 /* we could try using another file handle if there is one -
2200 but how would we lock it to prevent close of that handle
2201 racing with this read? In any case
2202 this will be written out by write_end so is fine */
2209 static int cifs_release_page(struct page *page, gfp_t gfp)
2211 if (PagePrivate(page))
2214 return cifs_fscache_release_page(page, gfp);
2217 static void cifs_invalidate_page(struct page *page, unsigned long offset)
2219 struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
2222 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2225 void cifs_oplock_break(struct work_struct *work)
2227 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2229 struct inode *inode = cfile->dentry->d_inode;
2230 struct cifsInodeInfo *cinode = CIFS_I(inode);
2233 if (inode && S_ISREG(inode->i_mode)) {
2234 if (cinode->clientCanCacheRead)
2235 break_lease(inode, O_RDONLY);
2237 break_lease(inode, O_WRONLY);
2238 rc = filemap_fdatawrite(inode->i_mapping);
2239 if (cinode->clientCanCacheRead == 0) {
2240 rc = filemap_fdatawait(inode->i_mapping);
2241 mapping_set_error(inode->i_mapping, rc);
2242 invalidate_remote_inode(inode);
2244 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2248 * releasing stale oplock after recent reconnect of smb session using
2249 * a now incorrect file handle is not a data integrity issue but do
2250 * not bother sending an oplock release if session to server still is
2251 * disconnected since oplock already released by the server
2253 if (!cfile->oplock_break_cancelled) {
2254 rc = CIFSSMBLock(0, tlink_tcon(cfile->tlink), cfile->netfid, 0,
2255 0, 0, 0, LOCKING_ANDX_OPLOCK_RELEASE, false);
2256 cFYI(1, "Oplock release rc = %d", rc);
2260 * We might have kicked in before is_valid_oplock_break()
2261 * finished grabbing reference for us. Make sure it's done by
2262 * waiting for cifs_file_list_lock.
2264 spin_lock(&cifs_file_list_lock);
2265 spin_unlock(&cifs_file_list_lock);
2267 cifs_oplock_break_put(cfile);
2270 /* must be called while holding cifs_file_list_lock */
2271 void cifs_oplock_break_get(struct cifsFileInfo *cfile)
2273 cifs_sb_active(cfile->dentry->d_sb);
2274 cifsFileInfo_get(cfile);
2277 void cifs_oplock_break_put(struct cifsFileInfo *cfile)
2279 cifsFileInfo_put(cfile);
2280 cifs_sb_deactive(cfile->dentry->d_sb);
2283 const struct address_space_operations cifs_addr_ops = {
2284 .readpage = cifs_readpage,
2285 .readpages = cifs_readpages,
2286 .writepage = cifs_writepage,
2287 .writepages = cifs_writepages,
2288 .write_begin = cifs_write_begin,
2289 .write_end = cifs_write_end,
2290 .set_page_dirty = __set_page_dirty_nobuffers,
2291 .releasepage = cifs_release_page,
2292 .invalidatepage = cifs_invalidate_page,
2293 /* .sync_page = cifs_sync_page, */
2298 * cifs_readpages requires the server to support a buffer large enough to
2299 * contain the header plus one complete page of data. Otherwise, we need
2300 * to leave cifs_readpages out of the address space operations.
2302 const struct address_space_operations cifs_addr_ops_smallbuf = {
2303 .readpage = cifs_readpage,
2304 .writepage = cifs_writepage,
2305 .writepages = cifs_writepages,
2306 .write_begin = cifs_write_begin,
2307 .write_end = cifs_write_end,
2308 .set_page_dirty = __set_page_dirty_nobuffers,
2309 .releasepage = cifs_release_page,
2310 .invalidatepage = cifs_invalidate_page,
2311 /* .sync_page = cifs_sync_page, */