4 * vfs operations that deal with files
6 * Copyright (C) International Business Machines Corp., 2002,2007
7 * Author(s): Steve French (sfrench@us.ibm.com)
8 * Jeremy Allison (jra@samba.org)
10 * This library is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU Lesser General Public License as published
12 * by the Free Software Foundation; either version 2.1 of the License, or
13 * (at your option) any later version.
15 * This library is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
18 * the GNU Lesser General Public License for more details.
20 * You should have received a copy of the GNU Lesser General Public License
21 * along with this library; if not, write to the Free Software
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <asm/div64.h>
38 #include "cifsproto.h"
39 #include "cifs_unicode.h"
40 #include "cifs_debug.h"
41 #include "cifs_fs_sb.h"
43 static inline int cifs_convert_flags(unsigned int flags)
45 if ((flags & O_ACCMODE) == O_RDONLY)
47 else if ((flags & O_ACCMODE) == O_WRONLY)
49 else if ((flags & O_ACCMODE) == O_RDWR) {
50 /* GENERIC_ALL is too much permission to request
51 can cause unnecessary access denied on create */
52 /* return GENERIC_ALL; */
53 return (GENERIC_READ | GENERIC_WRITE);
56 return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
57 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
61 static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
63 fmode_t posix_flags = 0;
65 if ((flags & O_ACCMODE) == O_RDONLY)
66 posix_flags = FMODE_READ;
67 else if ((flags & O_ACCMODE) == O_WRONLY)
68 posix_flags = FMODE_WRITE;
69 else if ((flags & O_ACCMODE) == O_RDWR) {
70 /* GENERIC_ALL is too much permission to request
71 can cause unnecessary access denied on create */
72 /* return GENERIC_ALL; */
73 posix_flags = FMODE_READ | FMODE_WRITE;
75 /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
76 reopening a file. They had their effect on the original open */
78 posix_flags |= (fmode_t)O_APPEND;
80 posix_flags |= (fmode_t)O_SYNC;
81 if (flags & O_DIRECTORY)
82 posix_flags |= (fmode_t)O_DIRECTORY;
83 if (flags & O_NOFOLLOW)
84 posix_flags |= (fmode_t)O_NOFOLLOW;
86 posix_flags |= (fmode_t)O_DIRECT;
91 static inline int cifs_get_disposition(unsigned int flags)
93 if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
95 else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
96 return FILE_OVERWRITE_IF;
97 else if ((flags & O_CREAT) == O_CREAT)
99 else if ((flags & O_TRUNC) == O_TRUNC)
100 return FILE_OVERWRITE;
105 /* all arguments to this function must be checked for validity in caller */
107 cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
108 struct cifsInodeInfo *pCifsInode,
109 struct cifsFileInfo *pCifsFile, __u32 oplock,
113 write_lock(&GlobalSMBSeslock);
115 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
116 if (pCifsInode == NULL) {
117 write_unlock(&GlobalSMBSeslock);
121 if (pCifsInode->clientCanCacheRead) {
122 /* we have the inode open somewhere else
123 no need to discard cache data */
124 goto psx_client_can_cache;
127 /* BB FIXME need to fix this check to move it earlier into posix_open
128 BB fIX following section BB FIXME */
130 /* if not oplocked, invalidate inode pages if mtime or file
132 /* temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
133 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
134 (file->f_path.dentry->d_inode->i_size ==
135 (loff_t)le64_to_cpu(buf->EndOfFile))) {
136 cFYI(1, ("inode unchanged on server"));
138 if (file->f_path.dentry->d_inode->i_mapping) {
139 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
141 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
143 cFYI(1, ("invalidating remote inode since open detected it "
145 invalidate_remote_inode(file->f_path.dentry->d_inode);
148 psx_client_can_cache:
149 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
150 pCifsInode->clientCanCacheAll = true;
151 pCifsInode->clientCanCacheRead = true;
152 cFYI(1, ("Exclusive Oplock granted on inode %p",
153 file->f_path.dentry->d_inode));
154 } else if ((oplock & 0xF) == OPLOCK_READ)
155 pCifsInode->clientCanCacheRead = true;
157 /* will have to change the unlock if we reenable the
158 filemap_fdatawrite (which does not seem necessary */
159 write_unlock(&GlobalSMBSeslock);
163 static struct cifsFileInfo *
164 cifs_fill_filedata(struct file *file)
166 struct list_head *tmp;
167 struct cifsFileInfo *pCifsFile = NULL;
168 struct cifsInodeInfo *pCifsInode = NULL;
170 /* search inode for this file and fill in file->private_data */
171 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
172 read_lock(&GlobalSMBSeslock);
173 list_for_each(tmp, &pCifsInode->openFileList) {
174 pCifsFile = list_entry(tmp, struct cifsFileInfo, flist);
175 if ((pCifsFile->pfile == NULL) &&
176 (pCifsFile->pid == current->tgid)) {
177 /* mode set in cifs_create */
179 /* needed for writepage */
180 pCifsFile->pfile = file;
181 file->private_data = pCifsFile;
185 read_unlock(&GlobalSMBSeslock);
187 if (file->private_data != NULL) {
189 } else if ((file->f_flags & O_CREAT) && (file->f_flags & O_EXCL))
190 cERROR(1, ("could not find file instance for "
191 "new file %p", file));
195 /* all arguments to this function must be checked for validity in caller */
196 static inline int cifs_open_inode_helper(struct inode *inode, struct file *file,
197 struct cifsInodeInfo *pCifsInode, struct cifsFileInfo *pCifsFile,
198 struct cifsTconInfo *pTcon, int *oplock, FILE_ALL_INFO *buf,
199 char *full_path, int xid)
201 struct timespec temp;
204 /* want handles we can use to read with first
205 in the list so we do not have to walk the
206 list to search for one in write_begin */
207 if ((file->f_flags & O_ACCMODE) == O_WRONLY) {
208 list_add_tail(&pCifsFile->flist,
209 &pCifsInode->openFileList);
211 list_add(&pCifsFile->flist,
212 &pCifsInode->openFileList);
214 write_unlock(&GlobalSMBSeslock);
215 if (pCifsInode->clientCanCacheRead) {
216 /* we have the inode open somewhere else
217 no need to discard cache data */
218 goto client_can_cache;
221 /* BB need same check in cifs_create too? */
222 /* if not oplocked, invalidate inode pages if mtime or file
224 temp = cifs_NTtimeToUnix(buf->LastWriteTime);
225 if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
226 (file->f_path.dentry->d_inode->i_size ==
227 (loff_t)le64_to_cpu(buf->EndOfFile))) {
228 cFYI(1, ("inode unchanged on server"));
230 if (file->f_path.dentry->d_inode->i_mapping) {
231 /* BB no need to lock inode until after invalidate
232 since namei code should already have it locked? */
233 rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
235 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
237 cFYI(1, ("invalidating remote inode since open detected it "
239 invalidate_remote_inode(file->f_path.dentry->d_inode);
244 rc = cifs_get_inode_info_unix(&file->f_path.dentry->d_inode,
245 full_path, inode->i_sb, xid);
247 rc = cifs_get_inode_info(&file->f_path.dentry->d_inode,
248 full_path, buf, inode->i_sb, xid, NULL);
250 if ((*oplock & 0xF) == OPLOCK_EXCLUSIVE) {
251 pCifsInode->clientCanCacheAll = true;
252 pCifsInode->clientCanCacheRead = true;
253 cFYI(1, ("Exclusive Oplock granted on inode %p",
254 file->f_path.dentry->d_inode));
255 } else if ((*oplock & 0xF) == OPLOCK_READ)
256 pCifsInode->clientCanCacheRead = true;
261 int cifs_open(struct inode *inode, struct file *file)
266 struct cifs_sb_info *cifs_sb;
267 struct cifsTconInfo *tcon;
268 struct cifsFileInfo *pCifsFile;
269 struct cifsInodeInfo *pCifsInode;
270 char *full_path = NULL;
274 FILE_ALL_INFO *buf = NULL;
278 cifs_sb = CIFS_SB(inode->i_sb);
279 tcon = cifs_sb->tcon;
281 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
282 pCifsFile = cifs_fill_filedata(file);
289 full_path = build_path_from_dentry(file->f_path.dentry);
290 if (full_path == NULL) {
296 cFYI(1, ("inode = 0x%p file flags are 0x%x for %s",
297 inode, file->f_flags, full_path));
304 if (!tcon->broken_posix_open && tcon->unix_ext &&
305 (tcon->ses->capabilities & CAP_UNIX) &&
306 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
307 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
308 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
309 /* can not refresh inode info since size could be stale */
310 rc = cifs_posix_open(full_path, &inode, file->f_path.mnt,
311 cifs_sb->mnt_file_mode /* ignored */,
312 oflags, &oplock, &netfid, xid);
314 cFYI(1, ("posix open succeeded"));
315 /* no need for special case handling of setting mode
316 on read only files needed here */
318 pCifsFile = cifs_fill_filedata(file);
319 cifs_posix_open_inode_helper(inode, file, pCifsInode,
320 pCifsFile, oplock, netfid);
322 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
323 if (tcon->ses->serverNOS)
324 cERROR(1, ("server %s of type %s returned"
325 " unexpected error on SMB posix open"
326 ", disabling posix open support."
327 " Check if server update available.",
328 tcon->ses->serverName,
329 tcon->ses->serverNOS));
330 tcon->broken_posix_open = true;
331 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
332 (rc != -EOPNOTSUPP)) /* path not found or net err */
334 /* else fallthrough to retry open the old way on network i/o
338 desiredAccess = cifs_convert_flags(file->f_flags);
340 /*********************************************************************
341 * open flag mapping table:
343 * POSIX Flag CIFS Disposition
344 * ---------- ----------------
345 * O_CREAT FILE_OPEN_IF
346 * O_CREAT | O_EXCL FILE_CREATE
347 * O_CREAT | O_TRUNC FILE_OVERWRITE_IF
348 * O_TRUNC FILE_OVERWRITE
349 * none of the above FILE_OPEN
351 * Note that there is not a direct match between disposition
352 * FILE_SUPERSEDE (ie create whether or not file exists although
353 * O_CREAT | O_TRUNC is similar but truncates the existing
354 * file rather than creating a new file as FILE_SUPERSEDE does
355 * (which uses the attributes / metadata passed in on open call)
357 *? O_SYNC is a reasonable match to CIFS writethrough flag
358 *? and the read write flags match reasonably. O_LARGEFILE
359 *? is irrelevant because largefile support is always used
360 *? by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
361 * O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
362 *********************************************************************/
364 disposition = cifs_get_disposition(file->f_flags);
366 /* BB pass O_SYNC flag through on file attributes .. BB */
368 /* Also refresh inode by passing in file_info buf returned by SMBOpen
369 and calling get_inode_info with returned buf (at least helps
370 non-Unix server case) */
372 /* BB we can not do this if this is the second open of a file
373 and the first handle has writebehind data, we might be
374 able to simply do a filemap_fdatawrite/filemap_fdatawait first */
375 buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
381 if (cifs_sb->tcon->ses->capabilities & CAP_NT_SMBS)
382 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
383 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
384 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
385 & CIFS_MOUNT_MAP_SPECIAL_CHR);
387 rc = -EIO; /* no NT SMB support fall into legacy open below */
390 /* Old server, try legacy style OpenX */
391 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
392 desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
393 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
394 & CIFS_MOUNT_MAP_SPECIAL_CHR);
397 cFYI(1, ("cifs_open returned 0x%x", rc));
400 pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
402 file->private_data = pCifsFile;
403 if (file->private_data == NULL) {
408 pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
410 rc = cifs_open_inode_helper(inode, file, pCifsInode,
412 &oplock, buf, full_path, xid);
414 write_unlock(&GlobalSMBSeslock);
417 if (oplock & CIFS_CREATE_ACTION) {
418 /* time to set mode which we can not set earlier due to
419 problems creating new read-only files */
420 if (tcon->unix_ext) {
421 struct cifs_unix_set_info_args args = {
422 .mode = inode->i_mode,
425 .ctime = NO_CHANGE_64,
426 .atime = NO_CHANGE_64,
427 .mtime = NO_CHANGE_64,
430 CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
432 cifs_sb->mnt_cifs_flags &
433 CIFS_MOUNT_MAP_SPECIAL_CHR);
444 /* Try to reacquire byte range locks that were released when session */
445 /* to server was lost */
446 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
450 /* BB list all locks open on this file and relock */
455 static int cifs_reopen_file(struct file *file, bool can_flush)
460 struct cifs_sb_info *cifs_sb;
461 struct cifsTconInfo *tcon;
462 struct cifsFileInfo *pCifsFile;
463 struct cifsInodeInfo *pCifsInode;
465 char *full_path = NULL;
467 int disposition = FILE_OPEN;
470 if (file->private_data)
471 pCifsFile = (struct cifsFileInfo *)file->private_data;
476 mutex_lock(&pCifsFile->fh_mutex);
477 if (!pCifsFile->invalidHandle) {
478 mutex_unlock(&pCifsFile->fh_mutex);
484 if (file->f_path.dentry == NULL) {
485 cERROR(1, ("no valid name if dentry freed"));
488 goto reopen_error_exit;
491 inode = file->f_path.dentry->d_inode;
493 cERROR(1, ("inode not valid"));
496 goto reopen_error_exit;
499 cifs_sb = CIFS_SB(inode->i_sb);
500 tcon = cifs_sb->tcon;
502 /* can not grab rename sem here because various ops, including
503 those that already have the rename sem can end up causing writepage
504 to get called and if the server was down that means we end up here,
505 and we can never tell if the caller already has the rename_sem */
506 full_path = build_path_from_dentry(file->f_path.dentry);
507 if (full_path == NULL) {
510 mutex_unlock(&pCifsFile->fh_mutex);
515 cFYI(1, ("inode = 0x%p file flags 0x%x for %s",
516 inode, file->f_flags, full_path));
523 if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
524 (CIFS_UNIX_POSIX_PATH_OPS_CAP &
525 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
526 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
527 /* can not refresh inode info since size could be stale */
528 rc = cifs_posix_open(full_path, NULL, file->f_path.mnt,
529 cifs_sb->mnt_file_mode /* ignored */,
530 oflags, &oplock, &netfid, xid);
532 cFYI(1, ("posix reopen succeeded"));
535 /* fallthrough to retry open the old way on errors, especially
536 in the reconnect path it is important to retry hard */
539 desiredAccess = cifs_convert_flags(file->f_flags);
541 /* Can not refresh inode by passing in file_info buf to be returned
542 by SMBOpen and then calling get_inode_info with returned buf
543 since file might have write behind data that needs to be flushed
544 and server version of file size can be stale. If we knew for sure
545 that inode was not dirty locally we could do this */
547 rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
548 CREATE_NOT_DIR, &netfid, &oplock, NULL,
549 cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
550 CIFS_MOUNT_MAP_SPECIAL_CHR);
552 mutex_unlock(&pCifsFile->fh_mutex);
553 cFYI(1, ("cifs_open returned 0x%x", rc));
554 cFYI(1, ("oplock: %d", oplock));
557 pCifsFile->netfid = netfid;
558 pCifsFile->invalidHandle = false;
559 mutex_unlock(&pCifsFile->fh_mutex);
560 pCifsInode = CIFS_I(inode);
563 rc = filemap_write_and_wait(inode->i_mapping);
565 CIFS_I(inode)->write_behind_rc = rc;
566 /* temporarily disable caching while we
567 go to server to get inode info */
568 pCifsInode->clientCanCacheAll = false;
569 pCifsInode->clientCanCacheRead = false;
571 rc = cifs_get_inode_info_unix(&inode,
572 full_path, inode->i_sb, xid);
574 rc = cifs_get_inode_info(&inode,
575 full_path, NULL, inode->i_sb,
577 } /* else we are writing out data to server already
578 and could deadlock if we tried to flush data, and
579 since we do not know if we have data that would
580 invalidate the current end of file on the server
581 we can not go to the server to get the new inod
583 if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
584 pCifsInode->clientCanCacheAll = true;
585 pCifsInode->clientCanCacheRead = true;
586 cFYI(1, ("Exclusive Oplock granted on inode %p",
587 file->f_path.dentry->d_inode));
588 } else if ((oplock & 0xF) == OPLOCK_READ) {
589 pCifsInode->clientCanCacheRead = true;
590 pCifsInode->clientCanCacheAll = false;
592 pCifsInode->clientCanCacheRead = false;
593 pCifsInode->clientCanCacheAll = false;
595 cifs_relock_file(pCifsFile);
603 int cifs_close(struct inode *inode, struct file *file)
607 struct cifs_sb_info *cifs_sb;
608 struct cifsTconInfo *pTcon;
609 struct cifsFileInfo *pSMBFile =
610 (struct cifsFileInfo *)file->private_data;
614 cifs_sb = CIFS_SB(inode->i_sb);
615 pTcon = cifs_sb->tcon;
617 struct cifsLockInfo *li, *tmp;
618 write_lock(&GlobalSMBSeslock);
619 pSMBFile->closePend = true;
621 /* no sense reconnecting to close a file that is
623 if (!pTcon->need_reconnect) {
624 write_unlock(&GlobalSMBSeslock);
626 while ((atomic_read(&pSMBFile->count) != 1)
627 && (timeout <= 2048)) {
628 /* Give write a better chance to get to
629 server ahead of the close. We do not
630 want to add a wait_q here as it would
631 increase the memory utilization as
632 the struct would be in each open file,
633 but this should give enough time to
636 ("close delay, write pending"));
640 if (!pTcon->need_reconnect &&
641 !pSMBFile->invalidHandle)
642 rc = CIFSSMBClose(xid, pTcon,
645 write_unlock(&GlobalSMBSeslock);
647 write_unlock(&GlobalSMBSeslock);
649 /* Delete any outstanding lock records.
650 We'll lose them when the file is closed anyway. */
651 mutex_lock(&pSMBFile->lock_mutex);
652 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
653 list_del(&li->llist);
656 mutex_unlock(&pSMBFile->lock_mutex);
658 write_lock(&GlobalSMBSeslock);
659 list_del(&pSMBFile->flist);
660 list_del(&pSMBFile->tlist);
661 write_unlock(&GlobalSMBSeslock);
662 cifsFileInfo_put(file->private_data);
663 file->private_data = NULL;
667 read_lock(&GlobalSMBSeslock);
668 if (list_empty(&(CIFS_I(inode)->openFileList))) {
669 cFYI(1, ("closing last open instance for inode %p", inode));
670 /* if the file is not open we do not know if we can cache info
671 on this inode, much less write behind and read ahead */
672 CIFS_I(inode)->clientCanCacheRead = false;
673 CIFS_I(inode)->clientCanCacheAll = false;
675 read_unlock(&GlobalSMBSeslock);
676 if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
677 rc = CIFS_I(inode)->write_behind_rc;
682 int cifs_closedir(struct inode *inode, struct file *file)
686 struct cifsFileInfo *pCFileStruct =
687 (struct cifsFileInfo *)file->private_data;
690 cFYI(1, ("Closedir inode = 0x%p", inode));
695 struct cifsTconInfo *pTcon;
696 struct cifs_sb_info *cifs_sb =
697 CIFS_SB(file->f_path.dentry->d_sb);
699 pTcon = cifs_sb->tcon;
701 cFYI(1, ("Freeing private data in close dir"));
702 write_lock(&GlobalSMBSeslock);
703 if (!pCFileStruct->srch_inf.endOfSearch &&
704 !pCFileStruct->invalidHandle) {
705 pCFileStruct->invalidHandle = true;
706 write_unlock(&GlobalSMBSeslock);
707 rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
708 cFYI(1, ("Closing uncompleted readdir with rc %d",
710 /* not much we can do if it fails anyway, ignore rc */
713 write_unlock(&GlobalSMBSeslock);
714 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
716 cFYI(1, ("closedir free smb buf in srch struct"));
717 pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
718 if (pCFileStruct->srch_inf.smallBuf)
719 cifs_small_buf_release(ptmp);
721 cifs_buf_release(ptmp);
723 kfree(file->private_data);
724 file->private_data = NULL;
726 /* BB can we lock the filestruct while this is going on? */
731 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
732 __u64 offset, __u8 lockType)
734 struct cifsLockInfo *li =
735 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
741 mutex_lock(&fid->lock_mutex);
742 list_add(&li->llist, &fid->llist);
743 mutex_unlock(&fid->lock_mutex);
747 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
753 bool wait_flag = false;
754 struct cifs_sb_info *cifs_sb;
755 struct cifsTconInfo *tcon;
757 __u8 lockType = LOCKING_ANDX_LARGE_FILES;
758 bool posix_locking = 0;
760 length = 1 + pfLock->fl_end - pfLock->fl_start;
764 cFYI(1, ("Lock parm: 0x%x flockflags: "
765 "0x%x flocktype: 0x%x start: %lld end: %lld",
766 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
769 if (pfLock->fl_flags & FL_POSIX)
771 if (pfLock->fl_flags & FL_FLOCK)
773 if (pfLock->fl_flags & FL_SLEEP) {
774 cFYI(1, ("Blocking lock"));
777 if (pfLock->fl_flags & FL_ACCESS)
778 cFYI(1, ("Process suspended by mandatory locking - "
779 "not implemented yet"));
780 if (pfLock->fl_flags & FL_LEASE)
781 cFYI(1, ("Lease on file - not implemented yet"));
782 if (pfLock->fl_flags &
783 (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
784 cFYI(1, ("Unknown lock flags 0x%x", pfLock->fl_flags));
786 if (pfLock->fl_type == F_WRLCK) {
787 cFYI(1, ("F_WRLCK "));
789 } else if (pfLock->fl_type == F_UNLCK) {
790 cFYI(1, ("F_UNLCK"));
792 /* Check if unlock includes more than
794 } else if (pfLock->fl_type == F_RDLCK) {
795 cFYI(1, ("F_RDLCK"));
796 lockType |= LOCKING_ANDX_SHARED_LOCK;
798 } else if (pfLock->fl_type == F_EXLCK) {
799 cFYI(1, ("F_EXLCK"));
801 } else if (pfLock->fl_type == F_SHLCK) {
802 cFYI(1, ("F_SHLCK"));
803 lockType |= LOCKING_ANDX_SHARED_LOCK;
806 cFYI(1, ("Unknown type of lock"));
808 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
809 tcon = cifs_sb->tcon;
811 if (file->private_data == NULL) {
816 netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
818 if ((tcon->ses->capabilities & CAP_UNIX) &&
819 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
820 ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
822 /* BB add code here to normalize offset and length to
823 account for negative length which we can not accept over the
828 if (lockType & LOCKING_ANDX_SHARED_LOCK)
829 posix_lock_type = CIFS_RDLCK;
831 posix_lock_type = CIFS_WRLCK;
832 rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
834 posix_lock_type, wait_flag);
839 /* BB we could chain these into one lock request BB */
840 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
841 0, 1, lockType, 0 /* wait flag */ );
843 rc = CIFSSMBLock(xid, tcon, netfid, length,
844 pfLock->fl_start, 1 /* numUnlock */ ,
845 0 /* numLock */ , lockType,
847 pfLock->fl_type = F_UNLCK;
849 cERROR(1, ("Error unlocking previously locked "
850 "range %d during test of lock", rc));
854 /* if rc == ERR_SHARING_VIOLATION ? */
855 rc = 0; /* do not change lock type to unlock
856 since range in use */
863 if (!numLock && !numUnlock) {
864 /* if no lock or unlock then nothing
865 to do since we do not know what it is */
872 if (lockType & LOCKING_ANDX_SHARED_LOCK)
873 posix_lock_type = CIFS_RDLCK;
875 posix_lock_type = CIFS_WRLCK;
878 posix_lock_type = CIFS_UNLCK;
880 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
882 posix_lock_type, wait_flag);
884 struct cifsFileInfo *fid =
885 (struct cifsFileInfo *)file->private_data;
888 rc = CIFSSMBLock(xid, tcon, netfid, length,
890 0, numLock, lockType, wait_flag);
893 /* For Windows locks we must store them. */
894 rc = store_file_lock(fid, length,
895 pfLock->fl_start, lockType);
897 } else if (numUnlock) {
898 /* For each stored lock that this unlock overlaps
899 completely, unlock it. */
901 struct cifsLockInfo *li, *tmp;
904 mutex_lock(&fid->lock_mutex);
905 list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
906 if (pfLock->fl_start <= li->offset &&
907 (pfLock->fl_start + length) >=
908 (li->offset + li->length)) {
909 stored_rc = CIFSSMBLock(xid, tcon,
911 li->length, li->offset,
912 1, 0, li->type, false);
916 list_del(&li->llist);
920 mutex_unlock(&fid->lock_mutex);
924 if (pfLock->fl_flags & FL_POSIX)
925 posix_lock_file_wait(file, pfLock);
931 * Set the timeout on write requests past EOF. For some servers (Windows)
932 * these calls can be very long.
934 * If we're writing >10M past the EOF we give a 180s timeout. Anything less
935 * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
936 * The 10M cutoff is totally arbitrary. A better scheme for this would be
937 * welcome if someone wants to suggest one.
939 * We may be able to do a better job with this if there were some way to
940 * declare that a file should be sparse.
943 cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
945 if (offset <= cifsi->server_eof)
947 else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
948 return CIFS_VLONG_OP;
953 /* update the file size (if needed) after a write */
955 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
956 unsigned int bytes_written)
958 loff_t end_of_write = offset + bytes_written;
960 if (end_of_write > cifsi->server_eof)
961 cifsi->server_eof = end_of_write;
964 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
965 size_t write_size, loff_t *poffset)
968 unsigned int bytes_written = 0;
969 unsigned int total_written;
970 struct cifs_sb_info *cifs_sb;
971 struct cifsTconInfo *pTcon;
973 struct cifsFileInfo *open_file;
974 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
976 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
978 pTcon = cifs_sb->tcon;
981 (" write %d bytes to offset %lld of %s", write_size,
982 *poffset, file->f_path.dentry->d_name.name)); */
984 if (file->private_data == NULL)
986 open_file = (struct cifsFileInfo *) file->private_data;
988 rc = generic_write_checks(file, poffset, &write_size, 0);
994 long_op = cifs_write_timeout(cifsi, *poffset);
995 for (total_written = 0; write_size > total_written;
996 total_written += bytes_written) {
998 while (rc == -EAGAIN) {
999 if (file->private_data == NULL) {
1000 /* file has been closed on us */
1002 /* if we have gotten here we have written some data
1003 and blocked, and the file has been freed on us while
1004 we blocked so return what we managed to write */
1005 return total_written;
1007 if (open_file->closePend) {
1010 return total_written;
1014 if (open_file->invalidHandle) {
1015 /* we could deadlock if we called
1016 filemap_fdatawait from here so tell
1017 reopen_file not to flush data to server
1019 rc = cifs_reopen_file(file, false);
1024 rc = CIFSSMBWrite(xid, pTcon,
1026 min_t(const int, cifs_sb->wsize,
1027 write_size - total_written),
1028 *poffset, &bytes_written,
1029 NULL, write_data + total_written, long_op);
1031 if (rc || (bytes_written == 0)) {
1039 cifs_update_eof(cifsi, *poffset, bytes_written);
1040 *poffset += bytes_written;
1042 long_op = CIFS_STD_OP; /* subsequent writes fast -
1043 15 seconds is plenty */
1046 cifs_stats_bytes_written(pTcon, total_written);
1048 /* since the write may have blocked check these pointers again */
1049 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1050 struct inode *inode = file->f_path.dentry->d_inode;
1051 /* Do not update local mtime - server will set its actual value on write
1052 * inode->i_ctime = inode->i_mtime =
1053 * current_fs_time(inode->i_sb);*/
1054 if (total_written > 0) {
1055 spin_lock(&inode->i_lock);
1056 if (*poffset > file->f_path.dentry->d_inode->i_size)
1057 i_size_write(file->f_path.dentry->d_inode,
1059 spin_unlock(&inode->i_lock);
1061 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1064 return total_written;
1067 static ssize_t cifs_write(struct file *file, const char *write_data,
1068 size_t write_size, loff_t *poffset)
1071 unsigned int bytes_written = 0;
1072 unsigned int total_written;
1073 struct cifs_sb_info *cifs_sb;
1074 struct cifsTconInfo *pTcon;
1076 struct cifsFileInfo *open_file;
1077 struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1079 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1081 pTcon = cifs_sb->tcon;
1083 cFYI(1, ("write %zd bytes to offset %lld of %s", write_size,
1084 *poffset, file->f_path.dentry->d_name.name));
1086 if (file->private_data == NULL)
1088 open_file = (struct cifsFileInfo *)file->private_data;
1092 long_op = cifs_write_timeout(cifsi, *poffset);
1093 for (total_written = 0; write_size > total_written;
1094 total_written += bytes_written) {
1096 while (rc == -EAGAIN) {
1097 if (file->private_data == NULL) {
1098 /* file has been closed on us */
1100 /* if we have gotten here we have written some data
1101 and blocked, and the file has been freed on us
1102 while we blocked so return what we managed to
1104 return total_written;
1106 if (open_file->closePend) {
1109 return total_written;
1113 if (open_file->invalidHandle) {
1114 /* we could deadlock if we called
1115 filemap_fdatawait from here so tell
1116 reopen_file not to flush data to
1118 rc = cifs_reopen_file(file, false);
1122 if (experimEnabled || (pTcon->ses->server &&
1123 ((pTcon->ses->server->secMode &
1124 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1129 len = min((size_t)cifs_sb->wsize,
1130 write_size - total_written);
1131 /* iov[0] is reserved for smb header */
1132 iov[1].iov_base = (char *)write_data +
1134 iov[1].iov_len = len;
1135 rc = CIFSSMBWrite2(xid, pTcon,
1136 open_file->netfid, len,
1137 *poffset, &bytes_written,
1140 rc = CIFSSMBWrite(xid, pTcon,
1142 min_t(const int, cifs_sb->wsize,
1143 write_size - total_written),
1144 *poffset, &bytes_written,
1145 write_data + total_written,
1148 if (rc || (bytes_written == 0)) {
1156 cifs_update_eof(cifsi, *poffset, bytes_written);
1157 *poffset += bytes_written;
1159 long_op = CIFS_STD_OP; /* subsequent writes fast -
1160 15 seconds is plenty */
1163 cifs_stats_bytes_written(pTcon, total_written);
1165 /* since the write may have blocked check these pointers again */
1166 if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1167 /*BB We could make this contingent on superblock ATIME flag too */
1168 /* file->f_path.dentry->d_inode->i_ctime =
1169 file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/
1170 if (total_written > 0) {
1171 spin_lock(&file->f_path.dentry->d_inode->i_lock);
1172 if (*poffset > file->f_path.dentry->d_inode->i_size)
1173 i_size_write(file->f_path.dentry->d_inode,
1175 spin_unlock(&file->f_path.dentry->d_inode->i_lock);
1177 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1180 return total_written;
1183 #ifdef CONFIG_CIFS_EXPERIMENTAL
1184 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1186 struct cifsFileInfo *open_file = NULL;
1188 read_lock(&GlobalSMBSeslock);
1189 /* we could simply get the first_list_entry since write-only entries
1190 are always at the end of the list but since the first entry might
1191 have a close pending, we go through the whole list */
1192 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1193 if (open_file->closePend)
1195 if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
1196 (open_file->pfile->f_flags & O_RDONLY))) {
1197 if (!open_file->invalidHandle) {
1198 /* found a good file */
1199 /* lock it so it will not be closed on us */
1200 cifsFileInfo_get(open_file);
1201 read_unlock(&GlobalSMBSeslock);
1203 } /* else might as well continue, and look for
1204 another, or simply have the caller reopen it
1205 again rather than trying to fix this handle */
1206 } else /* write only file */
1207 break; /* write only files are last so must be done */
1209 read_unlock(&GlobalSMBSeslock);
1214 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1216 struct cifsFileInfo *open_file;
1217 bool any_available = false;
1220 /* Having a null inode here (because mapping->host was set to zero by
1221 the VFS or MM) should not happen but we had reports of on oops (due to
1222 it being zero) during stress testcases so we need to check for it */
1224 if (cifs_inode == NULL) {
1225 cERROR(1, ("Null inode passed to cifs_writeable_file"));
1230 read_lock(&GlobalSMBSeslock);
1232 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1233 if (open_file->closePend ||
1234 (!any_available && open_file->pid != current->tgid))
1237 if (open_file->pfile &&
1238 ((open_file->pfile->f_flags & O_RDWR) ||
1239 (open_file->pfile->f_flags & O_WRONLY))) {
1240 cifsFileInfo_get(open_file);
1242 if (!open_file->invalidHandle) {
1243 /* found a good writable file */
1244 read_unlock(&GlobalSMBSeslock);
1248 read_unlock(&GlobalSMBSeslock);
1249 /* Had to unlock since following call can block */
1250 rc = cifs_reopen_file(open_file->pfile, false);
1252 if (!open_file->closePend)
1254 else { /* start over in case this was deleted */
1255 /* since the list could be modified */
1256 read_lock(&GlobalSMBSeslock);
1257 cifsFileInfo_put(open_file);
1258 goto refind_writable;
1262 /* if it fails, try another handle if possible -
1263 (we can not do this if closePending since
1264 loop could be modified - in which case we
1265 have to start at the beginning of the list
1266 again. Note that it would be bad
1267 to hold up writepages here (rather than
1268 in caller) with continuous retries */
1269 cFYI(1, ("wp failed on reopen file"));
1270 read_lock(&GlobalSMBSeslock);
1271 /* can not use this handle, no write
1272 pending on this one after all */
1273 cifsFileInfo_put(open_file);
1275 if (open_file->closePend) /* list could have changed */
1276 goto refind_writable;
1277 /* else we simply continue to the next entry. Thus
1278 we do not loop on reopen errors. If we
1279 can not reopen the file, for example if we
1280 reconnected to a server with another client
1281 racing to delete or lock the file we would not
1282 make progress if we restarted before the beginning
1283 of the loop here. */
1286 /* couldn't find useable FH with same pid, try any available */
1287 if (!any_available) {
1288 any_available = true;
1289 goto refind_writable;
1291 read_unlock(&GlobalSMBSeslock);
1295 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1297 struct address_space *mapping = page->mapping;
1298 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1301 int bytes_written = 0;
1302 struct cifs_sb_info *cifs_sb;
1303 struct cifsTconInfo *pTcon;
1304 struct inode *inode;
1305 struct cifsFileInfo *open_file;
1307 if (!mapping || !mapping->host)
1310 inode = page->mapping->host;
1311 cifs_sb = CIFS_SB(inode->i_sb);
1312 pTcon = cifs_sb->tcon;
1314 offset += (loff_t)from;
1315 write_data = kmap(page);
1318 if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1323 /* racing with truncate? */
1324 if (offset > mapping->host->i_size) {
1326 return 0; /* don't care */
1329 /* check to make sure that we are not extending the file */
1330 if (mapping->host->i_size - offset < (loff_t)to)
1331 to = (unsigned)(mapping->host->i_size - offset);
1333 open_file = find_writable_file(CIFS_I(mapping->host));
1335 bytes_written = cifs_write(open_file->pfile, write_data,
1337 cifsFileInfo_put(open_file);
1338 /* Does mm or vfs already set times? */
1339 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1340 if ((bytes_written > 0) && (offset))
1342 else if (bytes_written < 0)
1345 cFYI(1, ("No writeable filehandles for inode"));
1353 static int cifs_writepages(struct address_space *mapping,
1354 struct writeback_control *wbc)
1356 struct backing_dev_info *bdi = mapping->backing_dev_info;
1357 unsigned int bytes_to_write;
1358 unsigned int bytes_written;
1359 struct cifs_sb_info *cifs_sb;
1363 int range_whole = 0;
1370 struct cifsFileInfo *open_file;
1371 struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1373 struct pagevec pvec;
1378 cifs_sb = CIFS_SB(mapping->host->i_sb);
1381 * If wsize is smaller that the page cache size, default to writing
1382 * one page at a time via cifs_writepage
1384 if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1385 return generic_writepages(mapping, wbc);
1387 if ((cifs_sb->tcon->ses) && (cifs_sb->tcon->ses->server))
1388 if (cifs_sb->tcon->ses->server->secMode &
1389 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1390 if (!experimEnabled)
1391 return generic_writepages(mapping, wbc);
1393 iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1395 return generic_writepages(mapping, wbc);
1399 * BB: Is this meaningful for a non-block-device file system?
1400 * If it is, we should test it again after we do I/O
1402 if (wbc->nonblocking && bdi_write_congested(bdi)) {
1403 wbc->encountered_congestion = 1;
1410 pagevec_init(&pvec, 0);
1411 if (wbc->range_cyclic) {
1412 index = mapping->writeback_index; /* Start from prev offset */
1415 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1416 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1417 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1422 while (!done && (index <= end) &&
1423 (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1424 PAGECACHE_TAG_DIRTY,
1425 min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1434 for (i = 0; i < nr_pages; i++) {
1435 page = pvec.pages[i];
1437 * At this point we hold neither mapping->tree_lock nor
1438 * lock on the page itself: the page may be truncated or
1439 * invalidated (changing page->mapping to NULL), or even
1440 * swizzled back from swapper_space to tmpfs file
1446 else if (!trylock_page(page))
1449 if (unlikely(page->mapping != mapping)) {
1454 if (!wbc->range_cyclic && page->index > end) {
1460 if (next && (page->index != next)) {
1461 /* Not next consecutive page */
1466 if (wbc->sync_mode != WB_SYNC_NONE)
1467 wait_on_page_writeback(page);
1469 if (PageWriteback(page) ||
1470 !clear_page_dirty_for_io(page)) {
1476 * This actually clears the dirty bit in the radix tree.
1477 * See cifs_writepage() for more commentary.
1479 set_page_writeback(page);
1481 if (page_offset(page) >= mapping->host->i_size) {
1484 end_page_writeback(page);
1489 * BB can we get rid of this? pages are held by pvec
1491 page_cache_get(page);
1493 len = min(mapping->host->i_size - page_offset(page),
1494 (loff_t)PAGE_CACHE_SIZE);
1496 /* reserve iov[0] for the smb header */
1498 iov[n_iov].iov_base = kmap(page);
1499 iov[n_iov].iov_len = len;
1500 bytes_to_write += len;
1504 offset = page_offset(page);
1506 next = page->index + 1;
1507 if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1511 /* Search for a writable handle every time we call
1512 * CIFSSMBWrite2. We can't rely on the last handle
1513 * we used to still be valid
1515 open_file = find_writable_file(CIFS_I(mapping->host));
1517 cERROR(1, ("No writable handles for inode"));
1520 long_op = cifs_write_timeout(cifsi, offset);
1521 rc = CIFSSMBWrite2(xid, cifs_sb->tcon,
1523 bytes_to_write, offset,
1524 &bytes_written, iov, n_iov,
1526 cifsFileInfo_put(open_file);
1527 cifs_update_eof(cifsi, offset, bytes_written);
1529 if (rc || bytes_written < bytes_to_write) {
1530 cERROR(1, ("Write2 ret %d, wrote %d",
1531 rc, bytes_written));
1532 /* BB what if continued retry is
1533 requested via mount flags? */
1535 set_bit(AS_ENOSPC, &mapping->flags);
1537 set_bit(AS_EIO, &mapping->flags);
1539 cifs_stats_bytes_written(cifs_sb->tcon,
1543 for (i = 0; i < n_iov; i++) {
1544 page = pvec.pages[first + i];
1545 /* Should we also set page error on
1546 success rc but too little data written? */
1547 /* BB investigate retry logic on temporary
1548 server crash cases and how recovery works
1549 when page marked as error */
1554 end_page_writeback(page);
1555 page_cache_release(page);
1557 if ((wbc->nr_to_write -= n_iov) <= 0)
1561 /* Need to re-find the pages we skipped */
1562 index = pvec.pages[0]->index + 1;
1564 pagevec_release(&pvec);
1566 if (!scanned && !done) {
1568 * We hit the last page and there is more work to be done: wrap
1569 * back to the start of the file
1575 if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1576 mapping->writeback_index = index;
1583 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1589 /* BB add check for wbc flags */
1590 page_cache_get(page);
1591 if (!PageUptodate(page))
1592 cFYI(1, ("ppw - page not up to date"));
1595 * Set the "writeback" flag, and clear "dirty" in the radix tree.
1597 * A writepage() implementation always needs to do either this,
1598 * or re-dirty the page with "redirty_page_for_writepage()" in
1599 * the case of a failure.
1601 * Just unlocking the page will cause the radix tree tag-bits
1602 * to fail to update with the state of the page correctly.
1604 set_page_writeback(page);
1605 rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1606 SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1608 end_page_writeback(page);
1609 page_cache_release(page);
1614 static int cifs_write_end(struct file *file, struct address_space *mapping,
1615 loff_t pos, unsigned len, unsigned copied,
1616 struct page *page, void *fsdata)
1619 struct inode *inode = mapping->host;
1621 cFYI(1, ("write_end for page %p from pos %lld with %d bytes",
1622 page, pos, copied));
1624 if (PageChecked(page)) {
1626 SetPageUptodate(page);
1627 ClearPageChecked(page);
1628 } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1629 SetPageUptodate(page);
1631 if (!PageUptodate(page)) {
1633 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1637 /* this is probably better than directly calling
1638 partialpage_write since in this function the file handle is
1639 known which we might as well leverage */
1640 /* BB check if anything else missing out of ppw
1641 such as updating last write time */
1642 page_data = kmap(page);
1643 rc = cifs_write(file, page_data + offset, copied, &pos);
1644 /* if (rc < 0) should we set writebehind rc? */
1651 set_page_dirty(page);
1655 spin_lock(&inode->i_lock);
1656 if (pos > inode->i_size)
1657 i_size_write(inode, pos);
1658 spin_unlock(&inode->i_lock);
1662 page_cache_release(page);
1667 int cifs_fsync(struct file *file, struct dentry *dentry, int datasync)
1671 struct cifsTconInfo *tcon;
1672 struct cifsFileInfo *smbfile =
1673 (struct cifsFileInfo *)file->private_data;
1674 struct inode *inode = file->f_path.dentry->d_inode;
1678 cFYI(1, ("Sync file - name: %s datasync: 0x%x",
1679 dentry->d_name.name, datasync));
1681 rc = filemap_write_and_wait(inode->i_mapping);
1683 rc = CIFS_I(inode)->write_behind_rc;
1684 CIFS_I(inode)->write_behind_rc = 0;
1685 tcon = CIFS_SB(inode->i_sb)->tcon;
1686 if (!rc && tcon && smbfile &&
1687 !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1688 rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1695 /* static void cifs_sync_page(struct page *page)
1697 struct address_space *mapping;
1698 struct inode *inode;
1699 unsigned long index = page->index;
1700 unsigned int rpages = 0;
1703 cFYI(1, ("sync page %p",page));
1704 mapping = page->mapping;
1707 inode = mapping->host;
1711 /* fill in rpages then
1712 result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1714 /* cFYI(1, ("rpages is %d for sync page of Index %ld", rpages, index));
1724 * As file closes, flush all cached write data for this inode checking
1725 * for write behind errors.
1727 int cifs_flush(struct file *file, fl_owner_t id)
1729 struct inode *inode = file->f_path.dentry->d_inode;
1732 /* Rather than do the steps manually:
1733 lock the inode for writing
1734 loop through pages looking for write behind data (dirty pages)
1735 coalesce into contiguous 16K (or smaller) chunks to write to server
1736 send to server (prefer in parallel)
1737 deal with writebehind errors
1738 unlock inode for writing
1739 filemapfdatawrite appears easier for the time being */
1741 rc = filemap_fdatawrite(inode->i_mapping);
1742 /* reset wb rc if we were able to write out dirty pages */
1744 rc = CIFS_I(inode)->write_behind_rc;
1745 CIFS_I(inode)->write_behind_rc = 0;
1748 cFYI(1, ("Flush inode %p file %p rc %d", inode, file, rc));
1753 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1754 size_t read_size, loff_t *poffset)
1757 unsigned int bytes_read = 0;
1758 unsigned int total_read = 0;
1759 unsigned int current_read_size;
1760 struct cifs_sb_info *cifs_sb;
1761 struct cifsTconInfo *pTcon;
1763 struct cifsFileInfo *open_file;
1764 char *smb_read_data;
1765 char __user *current_offset;
1766 struct smb_com_read_rsp *pSMBr;
1769 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1770 pTcon = cifs_sb->tcon;
1772 if (file->private_data == NULL) {
1777 open_file = (struct cifsFileInfo *)file->private_data;
1779 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1780 cFYI(1, ("attempting read on write only file instance"));
1782 for (total_read = 0, current_offset = read_data;
1783 read_size > total_read;
1784 total_read += bytes_read, current_offset += bytes_read) {
1785 current_read_size = min_t(const int, read_size - total_read,
1788 smb_read_data = NULL;
1789 while (rc == -EAGAIN) {
1790 int buf_type = CIFS_NO_BUFFER;
1791 if ((open_file->invalidHandle) &&
1792 (!open_file->closePend)) {
1793 rc = cifs_reopen_file(file, true);
1797 rc = CIFSSMBRead(xid, pTcon,
1799 current_read_size, *poffset,
1800 &bytes_read, &smb_read_data,
1802 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1803 if (smb_read_data) {
1804 if (copy_to_user(current_offset,
1806 4 /* RFC1001 length field */ +
1807 le16_to_cpu(pSMBr->DataOffset),
1811 if (buf_type == CIFS_SMALL_BUFFER)
1812 cifs_small_buf_release(smb_read_data);
1813 else if (buf_type == CIFS_LARGE_BUFFER)
1814 cifs_buf_release(smb_read_data);
1815 smb_read_data = NULL;
1818 if (rc || (bytes_read == 0)) {
1826 cifs_stats_bytes_read(pTcon, bytes_read);
1827 *poffset += bytes_read;
1835 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1839 unsigned int bytes_read = 0;
1840 unsigned int total_read;
1841 unsigned int current_read_size;
1842 struct cifs_sb_info *cifs_sb;
1843 struct cifsTconInfo *pTcon;
1845 char *current_offset;
1846 struct cifsFileInfo *open_file;
1847 int buf_type = CIFS_NO_BUFFER;
1850 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1851 pTcon = cifs_sb->tcon;
1853 if (file->private_data == NULL) {
1858 open_file = (struct cifsFileInfo *)file->private_data;
1860 if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1861 cFYI(1, ("attempting read on write only file instance"));
1863 for (total_read = 0, current_offset = read_data;
1864 read_size > total_read;
1865 total_read += bytes_read, current_offset += bytes_read) {
1866 current_read_size = min_t(const int, read_size - total_read,
1868 /* For windows me and 9x we do not want to request more
1869 than it negotiated since it will refuse the read then */
1871 !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1872 current_read_size = min_t(const int, current_read_size,
1873 pTcon->ses->server->maxBuf - 128);
1876 while (rc == -EAGAIN) {
1877 if ((open_file->invalidHandle) &&
1878 (!open_file->closePend)) {
1879 rc = cifs_reopen_file(file, true);
1883 rc = CIFSSMBRead(xid, pTcon,
1885 current_read_size, *poffset,
1886 &bytes_read, ¤t_offset,
1889 if (rc || (bytes_read == 0)) {
1897 cifs_stats_bytes_read(pTcon, total_read);
1898 *poffset += bytes_read;
1905 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1907 struct dentry *dentry = file->f_path.dentry;
1911 rc = cifs_revalidate(dentry);
1913 cFYI(1, ("Validation prior to mmap failed, error=%d", rc));
1917 rc = generic_file_mmap(file, vma);
1923 static void cifs_copy_cache_pages(struct address_space *mapping,
1924 struct list_head *pages, int bytes_read, char *data,
1925 struct pagevec *plru_pvec)
1930 while (bytes_read > 0) {
1931 if (list_empty(pages))
1934 page = list_entry(pages->prev, struct page, lru);
1935 list_del(&page->lru);
1937 if (add_to_page_cache(page, mapping, page->index,
1939 page_cache_release(page);
1940 cFYI(1, ("Add page cache failed"));
1941 data += PAGE_CACHE_SIZE;
1942 bytes_read -= PAGE_CACHE_SIZE;
1946 target = kmap_atomic(page, KM_USER0);
1948 if (PAGE_CACHE_SIZE > bytes_read) {
1949 memcpy(target, data, bytes_read);
1950 /* zero the tail end of this partial page */
1951 memset(target + bytes_read, 0,
1952 PAGE_CACHE_SIZE - bytes_read);
1955 memcpy(target, data, PAGE_CACHE_SIZE);
1956 bytes_read -= PAGE_CACHE_SIZE;
1958 kunmap_atomic(target, KM_USER0);
1960 flush_dcache_page(page);
1961 SetPageUptodate(page);
1963 if (!pagevec_add(plru_pvec, page))
1964 __pagevec_lru_add_file(plru_pvec);
1965 data += PAGE_CACHE_SIZE;
1970 static int cifs_readpages(struct file *file, struct address_space *mapping,
1971 struct list_head *page_list, unsigned num_pages)
1977 struct cifs_sb_info *cifs_sb;
1978 struct cifsTconInfo *pTcon;
1979 unsigned int bytes_read = 0;
1980 unsigned int read_size, i;
1981 char *smb_read_data = NULL;
1982 struct smb_com_read_rsp *pSMBr;
1983 struct pagevec lru_pvec;
1984 struct cifsFileInfo *open_file;
1985 int buf_type = CIFS_NO_BUFFER;
1988 if (file->private_data == NULL) {
1993 open_file = (struct cifsFileInfo *)file->private_data;
1994 cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1995 pTcon = cifs_sb->tcon;
1997 pagevec_init(&lru_pvec, 0);
1998 cFYI(DBG2, ("rpages: num pages %d", num_pages));
1999 for (i = 0; i < num_pages; ) {
2000 unsigned contig_pages;
2001 struct page *tmp_page;
2002 unsigned long expected_index;
2004 if (list_empty(page_list))
2007 page = list_entry(page_list->prev, struct page, lru);
2008 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2010 /* count adjacent pages that we will read into */
2013 list_entry(page_list->prev, struct page, lru)->index;
2014 list_for_each_entry_reverse(tmp_page, page_list, lru) {
2015 if (tmp_page->index == expected_index) {
2021 if (contig_pages + i > num_pages)
2022 contig_pages = num_pages - i;
2024 /* for reads over a certain size could initiate async
2027 read_size = contig_pages * PAGE_CACHE_SIZE;
2028 /* Read size needs to be in multiples of one page */
2029 read_size = min_t(const unsigned int, read_size,
2030 cifs_sb->rsize & PAGE_CACHE_MASK);
2031 cFYI(DBG2, ("rpages: read size 0x%x contiguous pages %d",
2032 read_size, contig_pages));
2034 while (rc == -EAGAIN) {
2035 if ((open_file->invalidHandle) &&
2036 (!open_file->closePend)) {
2037 rc = cifs_reopen_file(file, true);
2042 rc = CIFSSMBRead(xid, pTcon,
2045 &bytes_read, &smb_read_data,
2047 /* BB more RC checks ? */
2048 if (rc == -EAGAIN) {
2049 if (smb_read_data) {
2050 if (buf_type == CIFS_SMALL_BUFFER)
2051 cifs_small_buf_release(smb_read_data);
2052 else if (buf_type == CIFS_LARGE_BUFFER)
2053 cifs_buf_release(smb_read_data);
2054 smb_read_data = NULL;
2058 if ((rc < 0) || (smb_read_data == NULL)) {
2059 cFYI(1, ("Read error in readpages: %d", rc));
2061 } else if (bytes_read > 0) {
2062 task_io_account_read(bytes_read);
2063 pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2064 cifs_copy_cache_pages(mapping, page_list, bytes_read,
2065 smb_read_data + 4 /* RFC1001 hdr */ +
2066 le16_to_cpu(pSMBr->DataOffset), &lru_pvec);
2068 i += bytes_read >> PAGE_CACHE_SHIFT;
2069 cifs_stats_bytes_read(pTcon, bytes_read);
2070 if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2071 i++; /* account for partial page */
2073 /* server copy of file can have smaller size
2075 /* BB do we need to verify this common case ?
2076 this case is ok - if we are at server EOF
2077 we will hit it on next read */
2082 cFYI(1, ("No bytes read (%d) at offset %lld . "
2083 "Cleaning remaining pages from readahead list",
2084 bytes_read, offset));
2085 /* BB turn off caching and do new lookup on
2086 file size at server? */
2089 if (smb_read_data) {
2090 if (buf_type == CIFS_SMALL_BUFFER)
2091 cifs_small_buf_release(smb_read_data);
2092 else if (buf_type == CIFS_LARGE_BUFFER)
2093 cifs_buf_release(smb_read_data);
2094 smb_read_data = NULL;
2099 pagevec_lru_add_file(&lru_pvec);
2101 /* need to free smb_read_data buf before exit */
2102 if (smb_read_data) {
2103 if (buf_type == CIFS_SMALL_BUFFER)
2104 cifs_small_buf_release(smb_read_data);
2105 else if (buf_type == CIFS_LARGE_BUFFER)
2106 cifs_buf_release(smb_read_data);
2107 smb_read_data = NULL;
2114 static int cifs_readpage_worker(struct file *file, struct page *page,
2120 page_cache_get(page);
2121 read_data = kmap(page);
2122 /* for reads over a certain size could initiate async read ahead */
2124 rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2129 cFYI(1, ("Bytes read %d", rc));
2131 file->f_path.dentry->d_inode->i_atime =
2132 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2134 if (PAGE_CACHE_SIZE > rc)
2135 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2137 flush_dcache_page(page);
2138 SetPageUptodate(page);
2143 page_cache_release(page);
2147 static int cifs_readpage(struct file *file, struct page *page)
2149 loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2155 if (file->private_data == NULL) {
2161 cFYI(1, ("readpage %p at offset %d 0x%x\n",
2162 page, (int)offset, (int)offset));
2164 rc = cifs_readpage_worker(file, page, &offset);
2172 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2174 struct cifsFileInfo *open_file;
2176 read_lock(&GlobalSMBSeslock);
2177 list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2178 if (open_file->closePend)
2180 if (open_file->pfile &&
2181 ((open_file->pfile->f_flags & O_RDWR) ||
2182 (open_file->pfile->f_flags & O_WRONLY))) {
2183 read_unlock(&GlobalSMBSeslock);
2187 read_unlock(&GlobalSMBSeslock);
2191 /* We do not want to update the file size from server for inodes
2192 open for write - to avoid races with writepage extending
2193 the file - in the future we could consider allowing
2194 refreshing the inode only on increases in the file size
2195 but this is tricky to do without racing with writebehind
2196 page caching in the current Linux kernel design */
2197 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2202 if (is_inode_writable(cifsInode)) {
2203 /* This inode is open for write at least once */
2204 struct cifs_sb_info *cifs_sb;
2206 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2207 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2208 /* since no page cache to corrupt on directio
2209 we can change size safely */
2213 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2221 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2222 loff_t pos, unsigned len, unsigned flags,
2223 struct page **pagep, void **fsdata)
2225 pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2226 loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2227 loff_t page_start = pos & PAGE_MASK;
2232 cFYI(1, ("write_begin from %lld len %d", (long long)pos, len));
2234 page = grab_cache_page_write_begin(mapping, index, flags);
2240 if (PageUptodate(page))
2244 * If we write a full page it will be up to date, no need to read from
2245 * the server. If the write is short, we'll end up doing a sync write
2248 if (len == PAGE_CACHE_SIZE)
2252 * optimize away the read when we have an oplock, and we're not
2253 * expecting to use any of the data we'd be reading in. That
2254 * is, when the page lies beyond the EOF, or straddles the EOF
2255 * and the write will cover all of the existing data.
2257 if (CIFS_I(mapping->host)->clientCanCacheRead) {
2258 i_size = i_size_read(mapping->host);
2259 if (page_start >= i_size ||
2260 (offset == 0 && (pos + len) >= i_size)) {
2261 zero_user_segments(page, 0, offset,
2265 * PageChecked means that the parts of the page
2266 * to which we're not writing are considered up
2267 * to date. Once the data is copied to the
2268 * page, it can be set uptodate.
2270 SetPageChecked(page);
2275 if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2277 * might as well read a page, it is fast enough. If we get
2278 * an error, we don't need to return it. cifs_write_end will
2279 * do a sync write instead since PG_uptodate isn't set.
2281 cifs_readpage_worker(file, page, &page_start);
2283 /* we could try using another file handle if there is one -
2284 but how would we lock it to prevent close of that handle
2285 racing with this read? In any case
2286 this will be written out by write_end so is fine */
2294 cifs_oplock_break(struct slow_work *work)
2296 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2298 struct inode *inode = cfile->pInode;
2299 struct cifsInodeInfo *cinode = CIFS_I(inode);
2300 struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->mnt->mnt_sb);
2303 if (inode && S_ISREG(inode->i_mode)) {
2304 #ifdef CONFIG_CIFS_EXPERIMENTAL
2305 if (cinode->clientCanCacheAll == 0)
2306 break_lease(inode, FMODE_READ);
2307 else if (cinode->clientCanCacheRead == 0)
2308 break_lease(inode, FMODE_WRITE);
2310 rc = filemap_fdatawrite(inode->i_mapping);
2311 if (cinode->clientCanCacheRead == 0) {
2312 waitrc = filemap_fdatawait(inode->i_mapping);
2313 invalidate_remote_inode(inode);
2318 cinode->write_behind_rc = rc;
2319 cFYI(1, ("Oplock flush inode %p rc %d", inode, rc));
2323 * releasing stale oplock after recent reconnect of smb session using
2324 * a now incorrect file handle is not a data integrity issue but do
2325 * not bother sending an oplock release if session to server still is
2326 * disconnected since oplock already released by the server
2328 if (!cfile->closePend && !cfile->oplock_break_cancelled) {
2329 rc = CIFSSMBLock(0, cifs_sb->tcon, cfile->netfid, 0, 0, 0, 0,
2330 LOCKING_ANDX_OPLOCK_RELEASE, false);
2331 cFYI(1, ("Oplock release rc = %d", rc));
2336 cifs_oplock_break_get(struct slow_work *work)
2338 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2341 cifsFileInfo_get(cfile);
2346 cifs_oplock_break_put(struct slow_work *work)
2348 struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2351 cifsFileInfo_put(cfile);
2354 const struct slow_work_ops cifs_oplock_break_ops = {
2355 .get_ref = cifs_oplock_break_get,
2356 .put_ref = cifs_oplock_break_put,
2357 .execute = cifs_oplock_break,
2360 const struct address_space_operations cifs_addr_ops = {
2361 .readpage = cifs_readpage,
2362 .readpages = cifs_readpages,
2363 .writepage = cifs_writepage,
2364 .writepages = cifs_writepages,
2365 .write_begin = cifs_write_begin,
2366 .write_end = cifs_write_end,
2367 .set_page_dirty = __set_page_dirty_nobuffers,
2368 /* .sync_page = cifs_sync_page, */
2373 * cifs_readpages requires the server to support a buffer large enough to
2374 * contain the header plus one complete page of data. Otherwise, we need
2375 * to leave cifs_readpages out of the address space operations.
2377 const struct address_space_operations cifs_addr_ops_smallbuf = {
2378 .readpage = cifs_readpage,
2379 .writepage = cifs_writepage,
2380 .writepages = cifs_writepages,
2381 .write_begin = cifs_write_begin,
2382 .write_end = cifs_write_end,
2383 .set_page_dirty = __set_page_dirty_nobuffers,
2384 /* .sync_page = cifs_sync_page, */