]> git.karo-electronics.de Git - mv-sheeva.git/blob - fs/cifs/file.c
cifs: add refcounted and timestamped container for holding tcons
[mv-sheeva.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <asm/div64.h>
36 #include "cifsfs.h"
37 #include "cifspdu.h"
38 #include "cifsglob.h"
39 #include "cifsproto.h"
40 #include "cifs_unicode.h"
41 #include "cifs_debug.h"
42 #include "cifs_fs_sb.h"
43 #include "fscache.h"
44
45 static inline int cifs_convert_flags(unsigned int flags)
46 {
47         if ((flags & O_ACCMODE) == O_RDONLY)
48                 return GENERIC_READ;
49         else if ((flags & O_ACCMODE) == O_WRONLY)
50                 return GENERIC_WRITE;
51         else if ((flags & O_ACCMODE) == O_RDWR) {
52                 /* GENERIC_ALL is too much permission to request
53                    can cause unnecessary access denied on create */
54                 /* return GENERIC_ALL; */
55                 return (GENERIC_READ | GENERIC_WRITE);
56         }
57
58         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
59                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
60                 FILE_READ_DATA);
61 }
62
63 static inline fmode_t cifs_posix_convert_flags(unsigned int flags)
64 {
65         fmode_t posix_flags = 0;
66
67         if ((flags & O_ACCMODE) == O_RDONLY)
68                 posix_flags = FMODE_READ;
69         else if ((flags & O_ACCMODE) == O_WRONLY)
70                 posix_flags = FMODE_WRITE;
71         else if ((flags & O_ACCMODE) == O_RDWR) {
72                 /* GENERIC_ALL is too much permission to request
73                    can cause unnecessary access denied on create */
74                 /* return GENERIC_ALL; */
75                 posix_flags = FMODE_READ | FMODE_WRITE;
76         }
77         /* can not map O_CREAT or O_EXCL or O_TRUNC flags when
78            reopening a file.  They had their effect on the original open */
79         if (flags & O_APPEND)
80                 posix_flags |= (fmode_t)O_APPEND;
81         if (flags & O_DSYNC)
82                 posix_flags |= (fmode_t)O_DSYNC;
83         if (flags & __O_SYNC)
84                 posix_flags |= (fmode_t)__O_SYNC;
85         if (flags & O_DIRECTORY)
86                 posix_flags |= (fmode_t)O_DIRECTORY;
87         if (flags & O_NOFOLLOW)
88                 posix_flags |= (fmode_t)O_NOFOLLOW;
89         if (flags & O_DIRECT)
90                 posix_flags |= (fmode_t)O_DIRECT;
91
92         return posix_flags;
93 }
94
95 static inline int cifs_get_disposition(unsigned int flags)
96 {
97         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
98                 return FILE_CREATE;
99         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
100                 return FILE_OVERWRITE_IF;
101         else if ((flags & O_CREAT) == O_CREAT)
102                 return FILE_OPEN_IF;
103         else if ((flags & O_TRUNC) == O_TRUNC)
104                 return FILE_OVERWRITE;
105         else
106                 return FILE_OPEN;
107 }
108
109 /* all arguments to this function must be checked for validity in caller */
110 static inline int
111 cifs_posix_open_inode_helper(struct inode *inode, struct file *file,
112                              struct cifsInodeInfo *pCifsInode, __u32 oplock,
113                              u16 netfid)
114 {
115
116         write_lock(&GlobalSMBSeslock);
117
118         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
119         if (pCifsInode == NULL) {
120                 write_unlock(&GlobalSMBSeslock);
121                 return -EINVAL;
122         }
123
124         if (pCifsInode->clientCanCacheRead) {
125                 /* we have the inode open somewhere else
126                    no need to discard cache data */
127                 goto psx_client_can_cache;
128         }
129
130         /* BB FIXME need to fix this check to move it earlier into posix_open
131            BB  fIX following section BB FIXME */
132
133         /* if not oplocked, invalidate inode pages if mtime or file
134            size changed */
135 /*      temp = cifs_NTtimeToUnix(le64_to_cpu(buf->LastWriteTime));
136         if (timespec_equal(&file->f_path.dentry->d_inode->i_mtime, &temp) &&
137                            (file->f_path.dentry->d_inode->i_size ==
138                             (loff_t)le64_to_cpu(buf->EndOfFile))) {
139                 cFYI(1, "inode unchanged on server");
140         } else {
141                 if (file->f_path.dentry->d_inode->i_mapping) {
142                         rc = filemap_write_and_wait(file->f_path.dentry->d_inode->i_mapping);
143                         if (rc != 0)
144                                 CIFS_I(file->f_path.dentry->d_inode)->write_behind_rc = rc;
145                 }
146                 cFYI(1, "invalidating remote inode since open detected it "
147                          "changed");
148                 invalidate_remote_inode(file->f_path.dentry->d_inode);
149         } */
150
151 psx_client_can_cache:
152         if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
153                 pCifsInode->clientCanCacheAll = true;
154                 pCifsInode->clientCanCacheRead = true;
155                 cFYI(1, "Exclusive Oplock granted on inode %p",
156                          file->f_path.dentry->d_inode);
157         } else if ((oplock & 0xF) == OPLOCK_READ)
158                 pCifsInode->clientCanCacheRead = true;
159
160         /* will have to change the unlock if we reenable the
161            filemap_fdatawrite (which does not seem necessary */
162         write_unlock(&GlobalSMBSeslock);
163         return 0;
164 }
165
166 /* all arguments to this function must be checked for validity in caller */
167 static inline int cifs_open_inode_helper(struct inode *inode,
168         struct cifsTconInfo *pTcon, __u32 oplock, FILE_ALL_INFO *buf,
169         char *full_path, int xid)
170 {
171         struct cifsInodeInfo *pCifsInode = CIFS_I(inode);
172         struct timespec temp;
173         int rc;
174
175         if (pCifsInode->clientCanCacheRead) {
176                 /* we have the inode open somewhere else
177                    no need to discard cache data */
178                 goto client_can_cache;
179         }
180
181         /* BB need same check in cifs_create too? */
182         /* if not oplocked, invalidate inode pages if mtime or file
183            size changed */
184         temp = cifs_NTtimeToUnix(buf->LastWriteTime);
185         if (timespec_equal(&inode->i_mtime, &temp) &&
186                            (inode->i_size ==
187                             (loff_t)le64_to_cpu(buf->EndOfFile))) {
188                 cFYI(1, "inode unchanged on server");
189         } else {
190                 if (inode->i_mapping) {
191                         /* BB no need to lock inode until after invalidate
192                         since namei code should already have it locked? */
193                         rc = filemap_write_and_wait(inode->i_mapping);
194                         if (rc != 0)
195                                 pCifsInode->write_behind_rc = rc;
196                 }
197                 cFYI(1, "invalidating remote inode since open detected it "
198                          "changed");
199                 invalidate_remote_inode(inode);
200         }
201
202 client_can_cache:
203         if (pTcon->unix_ext)
204                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
205                                               xid);
206         else
207                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
208                                          xid, NULL);
209
210         if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
211                 pCifsInode->clientCanCacheAll = true;
212                 pCifsInode->clientCanCacheRead = true;
213                 cFYI(1, "Exclusive Oplock granted on inode %p", inode);
214         } else if ((oplock & 0xF) == OPLOCK_READ)
215                 pCifsInode->clientCanCacheRead = true;
216
217         return rc;
218 }
219
220 int cifs_open(struct inode *inode, struct file *file)
221 {
222         int rc = -EACCES;
223         int xid;
224         __u32 oplock;
225         struct cifs_sb_info *cifs_sb;
226         struct cifsTconInfo *tcon;
227         struct tcon_link *tlink;
228         struct cifsFileInfo *pCifsFile = NULL;
229         struct cifsInodeInfo *pCifsInode;
230         char *full_path = NULL;
231         int desiredAccess;
232         int disposition;
233         __u16 netfid;
234         FILE_ALL_INFO *buf = NULL;
235
236         xid = GetXid();
237
238         cifs_sb = CIFS_SB(inode->i_sb);
239         tlink = cifs_sb_tlink(cifs_sb);
240         if (IS_ERR(tlink)) {
241                 FreeXid(xid);
242                 return PTR_ERR(tlink);
243         }
244         tcon = tlink_tcon(tlink);
245
246         pCifsInode = CIFS_I(file->f_path.dentry->d_inode);
247
248         full_path = build_path_from_dentry(file->f_path.dentry);
249         if (full_path == NULL) {
250                 rc = -ENOMEM;
251                 goto out;
252         }
253
254         cFYI(1, "inode = 0x%p file flags are 0x%x for %s",
255                  inode, file->f_flags, full_path);
256
257         if (oplockEnabled)
258                 oplock = REQ_OPLOCK;
259         else
260                 oplock = 0;
261
262         if (!tcon->broken_posix_open && tcon->unix_ext &&
263             (tcon->ses->capabilities & CAP_UNIX) &&
264             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
265                         le64_to_cpu(tcon->fsUnixInfo.Capability))) {
266                 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
267                 oflags |= SMB_O_CREAT;
268                 /* can not refresh inode info since size could be stale */
269                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
270                                 cifs_sb->mnt_file_mode /* ignored */,
271                                 oflags, &oplock, &netfid, xid);
272                 if (rc == 0) {
273                         cFYI(1, "posix open succeeded");
274                         /* no need for special case handling of setting mode
275                            on read only files needed here */
276
277                         rc = cifs_posix_open_inode_helper(inode, file,
278                                         pCifsInode, oplock, netfid);
279                         if (rc != 0) {
280                                 CIFSSMBClose(xid, tcon, netfid);
281                                 goto out;
282                         }
283
284                         pCifsFile = cifs_new_fileinfo(inode, netfid, file,
285                                                         file->f_path.mnt,
286                                                         tcon, oflags, oplock);
287                         if (pCifsFile == NULL) {
288                                 CIFSSMBClose(xid, tcon, netfid);
289                                 rc = -ENOMEM;
290                         }
291
292                         cifs_fscache_set_inode_cookie(inode, file);
293
294                         goto out;
295                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
296                         if (tcon->ses->serverNOS)
297                                 cERROR(1, "server %s of type %s returned"
298                                            " unexpected error on SMB posix open"
299                                            ", disabling posix open support."
300                                            " Check if server update available.",
301                                            tcon->ses->serverName,
302                                            tcon->ses->serverNOS);
303                         tcon->broken_posix_open = true;
304                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
305                          (rc != -EOPNOTSUPP)) /* path not found or net err */
306                         goto out;
307                 /* else fallthrough to retry open the old way on network i/o
308                    or DFS errors */
309         }
310
311         desiredAccess = cifs_convert_flags(file->f_flags);
312
313 /*********************************************************************
314  *  open flag mapping table:
315  *
316  *      POSIX Flag            CIFS Disposition
317  *      ----------            ----------------
318  *      O_CREAT               FILE_OPEN_IF
319  *      O_CREAT | O_EXCL      FILE_CREATE
320  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
321  *      O_TRUNC               FILE_OVERWRITE
322  *      none of the above     FILE_OPEN
323  *
324  *      Note that there is not a direct match between disposition
325  *      FILE_SUPERSEDE (ie create whether or not file exists although
326  *      O_CREAT | O_TRUNC is similar but truncates the existing
327  *      file rather than creating a new file as FILE_SUPERSEDE does
328  *      (which uses the attributes / metadata passed in on open call)
329  *?
330  *?  O_SYNC is a reasonable match to CIFS writethrough flag
331  *?  and the read write flags match reasonably.  O_LARGEFILE
332  *?  is irrelevant because largefile support is always used
333  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
334  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
335  *********************************************************************/
336
337         disposition = cifs_get_disposition(file->f_flags);
338
339         /* BB pass O_SYNC flag through on file attributes .. BB */
340
341         /* Also refresh inode by passing in file_info buf returned by SMBOpen
342            and calling get_inode_info with returned buf (at least helps
343            non-Unix server case) */
344
345         /* BB we can not do this if this is the second open of a file
346            and the first handle has writebehind data, we might be
347            able to simply do a filemap_fdatawrite/filemap_fdatawait first */
348         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
349         if (!buf) {
350                 rc = -ENOMEM;
351                 goto out;
352         }
353
354         if (tcon->ses->capabilities & CAP_NT_SMBS)
355                 rc = CIFSSMBOpen(xid, tcon, full_path, disposition,
356                          desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
357                          cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
358                                  & CIFS_MOUNT_MAP_SPECIAL_CHR);
359         else
360                 rc = -EIO; /* no NT SMB support fall into legacy open below */
361
362         if (rc == -EIO) {
363                 /* Old server, try legacy style OpenX */
364                 rc = SMBLegacyOpen(xid, tcon, full_path, disposition,
365                         desiredAccess, CREATE_NOT_DIR, &netfid, &oplock, buf,
366                         cifs_sb->local_nls, cifs_sb->mnt_cifs_flags
367                                 & CIFS_MOUNT_MAP_SPECIAL_CHR);
368         }
369         if (rc) {
370                 cFYI(1, "cifs_open returned 0x%x", rc);
371                 goto out;
372         }
373
374         rc = cifs_open_inode_helper(inode, tcon, oplock, buf, full_path, xid);
375         if (rc != 0)
376                 goto out;
377
378         pCifsFile = cifs_new_fileinfo(inode, netfid, file, file->f_path.mnt,
379                                         tcon, file->f_flags, oplock);
380         if (pCifsFile == NULL) {
381                 rc = -ENOMEM;
382                 goto out;
383         }
384
385         cifs_fscache_set_inode_cookie(inode, file);
386
387         if (oplock & CIFS_CREATE_ACTION) {
388                 /* time to set mode which we can not set earlier due to
389                    problems creating new read-only files */
390                 if (tcon->unix_ext) {
391                         struct cifs_unix_set_info_args args = {
392                                 .mode   = inode->i_mode,
393                                 .uid    = NO_CHANGE_64,
394                                 .gid    = NO_CHANGE_64,
395                                 .ctime  = NO_CHANGE_64,
396                                 .atime  = NO_CHANGE_64,
397                                 .mtime  = NO_CHANGE_64,
398                                 .device = 0,
399                         };
400                         CIFSSMBUnixSetPathInfo(xid, tcon, full_path, &args,
401                                                cifs_sb->local_nls,
402                                                cifs_sb->mnt_cifs_flags &
403                                                 CIFS_MOUNT_MAP_SPECIAL_CHR);
404                 }
405         }
406
407 out:
408         kfree(buf);
409         kfree(full_path);
410         FreeXid(xid);
411         cifs_put_tlink(tlink);
412         return rc;
413 }
414
415 /* Try to reacquire byte range locks that were released when session */
416 /* to server was lost */
417 static int cifs_relock_file(struct cifsFileInfo *cifsFile)
418 {
419         int rc = 0;
420
421 /* BB list all locks open on this file and relock */
422
423         return rc;
424 }
425
426 static int cifs_reopen_file(struct file *file, bool can_flush)
427 {
428         int rc = -EACCES;
429         int xid;
430         __u32 oplock;
431         struct cifs_sb_info *cifs_sb;
432         struct cifsTconInfo *tcon;
433         struct cifsFileInfo *pCifsFile;
434         struct cifsInodeInfo *pCifsInode;
435         struct inode *inode;
436         char *full_path = NULL;
437         int desiredAccess;
438         int disposition = FILE_OPEN;
439         __u16 netfid;
440
441         if (file->private_data)
442                 pCifsFile = file->private_data;
443         else
444                 return -EBADF;
445
446         xid = GetXid();
447         mutex_lock(&pCifsFile->fh_mutex);
448         if (!pCifsFile->invalidHandle) {
449                 mutex_unlock(&pCifsFile->fh_mutex);
450                 rc = 0;
451                 FreeXid(xid);
452                 return rc;
453         }
454
455         if (file->f_path.dentry == NULL) {
456                 cERROR(1, "no valid name if dentry freed");
457                 dump_stack();
458                 rc = -EBADF;
459                 goto reopen_error_exit;
460         }
461
462         inode = file->f_path.dentry->d_inode;
463         if (inode == NULL) {
464                 cERROR(1, "inode not valid");
465                 dump_stack();
466                 rc = -EBADF;
467                 goto reopen_error_exit;
468         }
469
470         cifs_sb = CIFS_SB(inode->i_sb);
471         tcon = pCifsFile->tcon;
472
473 /* can not grab rename sem here because various ops, including
474    those that already have the rename sem can end up causing writepage
475    to get called and if the server was down that means we end up here,
476    and we can never tell if the caller already has the rename_sem */
477         full_path = build_path_from_dentry(file->f_path.dentry);
478         if (full_path == NULL) {
479                 rc = -ENOMEM;
480 reopen_error_exit:
481                 mutex_unlock(&pCifsFile->fh_mutex);
482                 FreeXid(xid);
483                 return rc;
484         }
485
486         cFYI(1, "inode = 0x%p file flags 0x%x for %s",
487                  inode, file->f_flags, full_path);
488
489         if (oplockEnabled)
490                 oplock = REQ_OPLOCK;
491         else
492                 oplock = 0;
493
494         if (tcon->unix_ext && (tcon->ses->capabilities & CAP_UNIX) &&
495             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
496                         le64_to_cpu(tcon->fsUnixInfo.Capability))) {
497                 int oflags = (int) cifs_posix_convert_flags(file->f_flags);
498                 /* can not refresh inode info since size could be stale */
499                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
500                                 cifs_sb->mnt_file_mode /* ignored */,
501                                 oflags, &oplock, &netfid, xid);
502                 if (rc == 0) {
503                         cFYI(1, "posix reopen succeeded");
504                         goto reopen_success;
505                 }
506                 /* fallthrough to retry open the old way on errors, especially
507                    in the reconnect path it is important to retry hard */
508         }
509
510         desiredAccess = cifs_convert_flags(file->f_flags);
511
512         /* Can not refresh inode by passing in file_info buf to be returned
513            by SMBOpen and then calling get_inode_info with returned buf
514            since file might have write behind data that needs to be flushed
515            and server version of file size can be stale. If we knew for sure
516            that inode was not dirty locally we could do this */
517
518         rc = CIFSSMBOpen(xid, tcon, full_path, disposition, desiredAccess,
519                          CREATE_NOT_DIR, &netfid, &oplock, NULL,
520                          cifs_sb->local_nls, cifs_sb->mnt_cifs_flags &
521                                 CIFS_MOUNT_MAP_SPECIAL_CHR);
522         if (rc) {
523                 mutex_unlock(&pCifsFile->fh_mutex);
524                 cFYI(1, "cifs_open returned 0x%x", rc);
525                 cFYI(1, "oplock: %d", oplock);
526         } else {
527 reopen_success:
528                 pCifsFile->netfid = netfid;
529                 pCifsFile->invalidHandle = false;
530                 mutex_unlock(&pCifsFile->fh_mutex);
531                 pCifsInode = CIFS_I(inode);
532                 if (pCifsInode) {
533                         if (can_flush) {
534                                 rc = filemap_write_and_wait(inode->i_mapping);
535                                 if (rc != 0)
536                                         CIFS_I(inode)->write_behind_rc = rc;
537                         /* temporarily disable caching while we
538                            go to server to get inode info */
539                                 pCifsInode->clientCanCacheAll = false;
540                                 pCifsInode->clientCanCacheRead = false;
541                                 if (tcon->unix_ext)
542                                         rc = cifs_get_inode_info_unix(&inode,
543                                                 full_path, inode->i_sb, xid);
544                                 else
545                                         rc = cifs_get_inode_info(&inode,
546                                                 full_path, NULL, inode->i_sb,
547                                                 xid, NULL);
548                         } /* else we are writing out data to server already
549                              and could deadlock if we tried to flush data, and
550                              since we do not know if we have data that would
551                              invalidate the current end of file on the server
552                              we can not go to the server to get the new inod
553                              info */
554                         if ((oplock & 0xF) == OPLOCK_EXCLUSIVE) {
555                                 pCifsInode->clientCanCacheAll = true;
556                                 pCifsInode->clientCanCacheRead = true;
557                                 cFYI(1, "Exclusive Oplock granted on inode %p",
558                                          file->f_path.dentry->d_inode);
559                         } else if ((oplock & 0xF) == OPLOCK_READ) {
560                                 pCifsInode->clientCanCacheRead = true;
561                                 pCifsInode->clientCanCacheAll = false;
562                         } else {
563                                 pCifsInode->clientCanCacheRead = false;
564                                 pCifsInode->clientCanCacheAll = false;
565                         }
566                         cifs_relock_file(pCifsFile);
567                 }
568         }
569         kfree(full_path);
570         FreeXid(xid);
571         return rc;
572 }
573
574 int cifs_close(struct inode *inode, struct file *file)
575 {
576         int rc = 0;
577         int xid, timeout;
578         struct cifs_sb_info *cifs_sb;
579         struct cifsTconInfo *pTcon;
580         struct cifsFileInfo *pSMBFile = file->private_data;
581
582         xid = GetXid();
583
584         cifs_sb = CIFS_SB(inode->i_sb);
585         pTcon = pSMBFile->tcon;
586         if (pSMBFile) {
587                 struct cifsLockInfo *li, *tmp;
588                 write_lock(&GlobalSMBSeslock);
589                 pSMBFile->closePend = true;
590                 if (pTcon) {
591                         /* no sense reconnecting to close a file that is
592                            already closed */
593                         if (!pTcon->need_reconnect) {
594                                 write_unlock(&GlobalSMBSeslock);
595                                 timeout = 2;
596                                 while ((atomic_read(&pSMBFile->count) != 1)
597                                         && (timeout <= 2048)) {
598                                         /* Give write a better chance to get to
599                                         server ahead of the close.  We do not
600                                         want to add a wait_q here as it would
601                                         increase the memory utilization as
602                                         the struct would be in each open file,
603                                         but this should give enough time to
604                                         clear the socket */
605                                         cFYI(DBG2, "close delay, write pending");
606                                         msleep(timeout);
607                                         timeout *= 4;
608                                 }
609                                 if (!pTcon->need_reconnect &&
610                                     !pSMBFile->invalidHandle)
611                                         rc = CIFSSMBClose(xid, pTcon,
612                                                   pSMBFile->netfid);
613                         } else
614                                 write_unlock(&GlobalSMBSeslock);
615                 } else
616                         write_unlock(&GlobalSMBSeslock);
617
618                 /* Delete any outstanding lock records.
619                    We'll lose them when the file is closed anyway. */
620                 mutex_lock(&pSMBFile->lock_mutex);
621                 list_for_each_entry_safe(li, tmp, &pSMBFile->llist, llist) {
622                         list_del(&li->llist);
623                         kfree(li);
624                 }
625                 mutex_unlock(&pSMBFile->lock_mutex);
626
627                 write_lock(&GlobalSMBSeslock);
628                 list_del(&pSMBFile->flist);
629                 list_del(&pSMBFile->tlist);
630                 write_unlock(&GlobalSMBSeslock);
631                 cifsFileInfo_put(file->private_data);
632                 file->private_data = NULL;
633         } else
634                 rc = -EBADF;
635
636         read_lock(&GlobalSMBSeslock);
637         if (list_empty(&(CIFS_I(inode)->openFileList))) {
638                 cFYI(1, "closing last open instance for inode %p", inode);
639                 /* if the file is not open we do not know if we can cache info
640                    on this inode, much less write behind and read ahead */
641                 CIFS_I(inode)->clientCanCacheRead = false;
642                 CIFS_I(inode)->clientCanCacheAll  = false;
643         }
644         read_unlock(&GlobalSMBSeslock);
645         if ((rc == 0) && CIFS_I(inode)->write_behind_rc)
646                 rc = CIFS_I(inode)->write_behind_rc;
647         FreeXid(xid);
648         return rc;
649 }
650
651 int cifs_closedir(struct inode *inode, struct file *file)
652 {
653         int rc = 0;
654         int xid;
655         struct cifsFileInfo *pCFileStruct = file->private_data;
656         char *ptmp;
657
658         cFYI(1, "Closedir inode = 0x%p", inode);
659
660         xid = GetXid();
661
662         if (pCFileStruct) {
663                 struct cifsTconInfo *pTcon = pCFileStruct->tcon;
664
665                 cFYI(1, "Freeing private data in close dir");
666                 write_lock(&GlobalSMBSeslock);
667                 if (!pCFileStruct->srch_inf.endOfSearch &&
668                     !pCFileStruct->invalidHandle) {
669                         pCFileStruct->invalidHandle = true;
670                         write_unlock(&GlobalSMBSeslock);
671                         rc = CIFSFindClose(xid, pTcon, pCFileStruct->netfid);
672                         cFYI(1, "Closing uncompleted readdir with rc %d",
673                                  rc);
674                         /* not much we can do if it fails anyway, ignore rc */
675                         rc = 0;
676                 } else
677                         write_unlock(&GlobalSMBSeslock);
678                 ptmp = pCFileStruct->srch_inf.ntwrk_buf_start;
679                 if (ptmp) {
680                         cFYI(1, "closedir free smb buf in srch struct");
681                         pCFileStruct->srch_inf.ntwrk_buf_start = NULL;
682                         if (pCFileStruct->srch_inf.smallBuf)
683                                 cifs_small_buf_release(ptmp);
684                         else
685                                 cifs_buf_release(ptmp);
686                 }
687                 kfree(file->private_data);
688                 file->private_data = NULL;
689         }
690         /* BB can we lock the filestruct while this is going on? */
691         FreeXid(xid);
692         return rc;
693 }
694
695 static int store_file_lock(struct cifsFileInfo *fid, __u64 len,
696                                 __u64 offset, __u8 lockType)
697 {
698         struct cifsLockInfo *li =
699                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
700         if (li == NULL)
701                 return -ENOMEM;
702         li->offset = offset;
703         li->length = len;
704         li->type = lockType;
705         mutex_lock(&fid->lock_mutex);
706         list_add(&li->llist, &fid->llist);
707         mutex_unlock(&fid->lock_mutex);
708         return 0;
709 }
710
711 int cifs_lock(struct file *file, int cmd, struct file_lock *pfLock)
712 {
713         int rc, xid;
714         __u32 numLock = 0;
715         __u32 numUnlock = 0;
716         __u64 length;
717         bool wait_flag = false;
718         struct cifs_sb_info *cifs_sb;
719         struct cifsTconInfo *tcon;
720         __u16 netfid;
721         __u8 lockType = LOCKING_ANDX_LARGE_FILES;
722         bool posix_locking = 0;
723
724         length = 1 + pfLock->fl_end - pfLock->fl_start;
725         rc = -EACCES;
726         xid = GetXid();
727
728         cFYI(1, "Lock parm: 0x%x flockflags: "
729                  "0x%x flocktype: 0x%x start: %lld end: %lld",
730                 cmd, pfLock->fl_flags, pfLock->fl_type, pfLock->fl_start,
731                 pfLock->fl_end);
732
733         if (pfLock->fl_flags & FL_POSIX)
734                 cFYI(1, "Posix");
735         if (pfLock->fl_flags & FL_FLOCK)
736                 cFYI(1, "Flock");
737         if (pfLock->fl_flags & FL_SLEEP) {
738                 cFYI(1, "Blocking lock");
739                 wait_flag = true;
740         }
741         if (pfLock->fl_flags & FL_ACCESS)
742                 cFYI(1, "Process suspended by mandatory locking - "
743                          "not implemented yet");
744         if (pfLock->fl_flags & FL_LEASE)
745                 cFYI(1, "Lease on file - not implemented yet");
746         if (pfLock->fl_flags &
747             (~(FL_POSIX | FL_FLOCK | FL_SLEEP | FL_ACCESS | FL_LEASE)))
748                 cFYI(1, "Unknown lock flags 0x%x", pfLock->fl_flags);
749
750         if (pfLock->fl_type == F_WRLCK) {
751                 cFYI(1, "F_WRLCK ");
752                 numLock = 1;
753         } else if (pfLock->fl_type == F_UNLCK) {
754                 cFYI(1, "F_UNLCK");
755                 numUnlock = 1;
756                 /* Check if unlock includes more than
757                 one lock range */
758         } else if (pfLock->fl_type == F_RDLCK) {
759                 cFYI(1, "F_RDLCK");
760                 lockType |= LOCKING_ANDX_SHARED_LOCK;
761                 numLock = 1;
762         } else if (pfLock->fl_type == F_EXLCK) {
763                 cFYI(1, "F_EXLCK");
764                 numLock = 1;
765         } else if (pfLock->fl_type == F_SHLCK) {
766                 cFYI(1, "F_SHLCK");
767                 lockType |= LOCKING_ANDX_SHARED_LOCK;
768                 numLock = 1;
769         } else
770                 cFYI(1, "Unknown type of lock");
771
772         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
773         tcon = ((struct cifsFileInfo *)file->private_data)->tcon;
774
775         if (file->private_data == NULL) {
776                 rc = -EBADF;
777                 FreeXid(xid);
778                 return rc;
779         }
780         netfid = ((struct cifsFileInfo *)file->private_data)->netfid;
781
782         if ((tcon->ses->capabilities & CAP_UNIX) &&
783             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
784             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
785                 posix_locking = 1;
786         /* BB add code here to normalize offset and length to
787         account for negative length which we can not accept over the
788         wire */
789         if (IS_GETLK(cmd)) {
790                 if (posix_locking) {
791                         int posix_lock_type;
792                         if (lockType & LOCKING_ANDX_SHARED_LOCK)
793                                 posix_lock_type = CIFS_RDLCK;
794                         else
795                                 posix_lock_type = CIFS_WRLCK;
796                         rc = CIFSSMBPosixLock(xid, tcon, netfid, 1 /* get */,
797                                         length, pfLock,
798                                         posix_lock_type, wait_flag);
799                         FreeXid(xid);
800                         return rc;
801                 }
802
803                 /* BB we could chain these into one lock request BB */
804                 rc = CIFSSMBLock(xid, tcon, netfid, length, pfLock->fl_start,
805                                  0, 1, lockType, 0 /* wait flag */ );
806                 if (rc == 0) {
807                         rc = CIFSSMBLock(xid, tcon, netfid, length,
808                                          pfLock->fl_start, 1 /* numUnlock */ ,
809                                          0 /* numLock */ , lockType,
810                                          0 /* wait flag */ );
811                         pfLock->fl_type = F_UNLCK;
812                         if (rc != 0)
813                                 cERROR(1, "Error unlocking previously locked "
814                                            "range %d during test of lock", rc);
815                         rc = 0;
816
817                 } else {
818                         /* if rc == ERR_SHARING_VIOLATION ? */
819                         rc = 0;
820
821                         if (lockType & LOCKING_ANDX_SHARED_LOCK) {
822                                 pfLock->fl_type = F_WRLCK;
823                         } else {
824                                 rc = CIFSSMBLock(xid, tcon, netfid, length,
825                                         pfLock->fl_start, 0, 1,
826                                         lockType | LOCKING_ANDX_SHARED_LOCK,
827                                         0 /* wait flag */);
828                                 if (rc == 0) {
829                                         rc = CIFSSMBLock(xid, tcon, netfid,
830                                                 length, pfLock->fl_start, 1, 0,
831                                                 lockType |
832                                                 LOCKING_ANDX_SHARED_LOCK,
833                                                 0 /* wait flag */);
834                                         pfLock->fl_type = F_RDLCK;
835                                         if (rc != 0)
836                                                 cERROR(1, "Error unlocking "
837                                                 "previously locked range %d "
838                                                 "during test of lock", rc);
839                                         rc = 0;
840                                 } else {
841                                         pfLock->fl_type = F_WRLCK;
842                                         rc = 0;
843                                 }
844                         }
845                 }
846
847                 FreeXid(xid);
848                 return rc;
849         }
850
851         if (!numLock && !numUnlock) {
852                 /* if no lock or unlock then nothing
853                 to do since we do not know what it is */
854                 FreeXid(xid);
855                 return -EOPNOTSUPP;
856         }
857
858         if (posix_locking) {
859                 int posix_lock_type;
860                 if (lockType & LOCKING_ANDX_SHARED_LOCK)
861                         posix_lock_type = CIFS_RDLCK;
862                 else
863                         posix_lock_type = CIFS_WRLCK;
864
865                 if (numUnlock == 1)
866                         posix_lock_type = CIFS_UNLCK;
867
868                 rc = CIFSSMBPosixLock(xid, tcon, netfid, 0 /* set */,
869                                       length, pfLock,
870                                       posix_lock_type, wait_flag);
871         } else {
872                 struct cifsFileInfo *fid = file->private_data;
873
874                 if (numLock) {
875                         rc = CIFSSMBLock(xid, tcon, netfid, length,
876                                         pfLock->fl_start,
877                                         0, numLock, lockType, wait_flag);
878
879                         if (rc == 0) {
880                                 /* For Windows locks we must store them. */
881                                 rc = store_file_lock(fid, length,
882                                                 pfLock->fl_start, lockType);
883                         }
884                 } else if (numUnlock) {
885                         /* For each stored lock that this unlock overlaps
886                            completely, unlock it. */
887                         int stored_rc = 0;
888                         struct cifsLockInfo *li, *tmp;
889
890                         rc = 0;
891                         mutex_lock(&fid->lock_mutex);
892                         list_for_each_entry_safe(li, tmp, &fid->llist, llist) {
893                                 if (pfLock->fl_start <= li->offset &&
894                                                 (pfLock->fl_start + length) >=
895                                                 (li->offset + li->length)) {
896                                         stored_rc = CIFSSMBLock(xid, tcon,
897                                                         netfid,
898                                                         li->length, li->offset,
899                                                         1, 0, li->type, false);
900                                         if (stored_rc)
901                                                 rc = stored_rc;
902                                         else {
903                                                 list_del(&li->llist);
904                                                 kfree(li);
905                                         }
906                                 }
907                         }
908                         mutex_unlock(&fid->lock_mutex);
909                 }
910         }
911
912         if (pfLock->fl_flags & FL_POSIX)
913                 posix_lock_file_wait(file, pfLock);
914         FreeXid(xid);
915         return rc;
916 }
917
918 /*
919  * Set the timeout on write requests past EOF. For some servers (Windows)
920  * these calls can be very long.
921  *
922  * If we're writing >10M past the EOF we give a 180s timeout. Anything less
923  * than that gets a 45s timeout. Writes not past EOF get 15s timeouts.
924  * The 10M cutoff is totally arbitrary. A better scheme for this would be
925  * welcome if someone wants to suggest one.
926  *
927  * We may be able to do a better job with this if there were some way to
928  * declare that a file should be sparse.
929  */
930 static int
931 cifs_write_timeout(struct cifsInodeInfo *cifsi, loff_t offset)
932 {
933         if (offset <= cifsi->server_eof)
934                 return CIFS_STD_OP;
935         else if (offset > (cifsi->server_eof + (10 * 1024 * 1024)))
936                 return CIFS_VLONG_OP;
937         else
938                 return CIFS_LONG_OP;
939 }
940
941 /* update the file size (if needed) after a write */
942 static void
943 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
944                       unsigned int bytes_written)
945 {
946         loff_t end_of_write = offset + bytes_written;
947
948         if (end_of_write > cifsi->server_eof)
949                 cifsi->server_eof = end_of_write;
950 }
951
952 ssize_t cifs_user_write(struct file *file, const char __user *write_data,
953         size_t write_size, loff_t *poffset)
954 {
955         int rc = 0;
956         unsigned int bytes_written = 0;
957         unsigned int total_written;
958         struct cifs_sb_info *cifs_sb;
959         struct cifsTconInfo *pTcon;
960         int xid, long_op;
961         struct cifsFileInfo *open_file;
962         struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
963
964         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
965
966         /* cFYI(1, " write %d bytes to offset %lld of %s", write_size,
967            *poffset, file->f_path.dentry->d_name.name); */
968
969         if (file->private_data == NULL)
970                 return -EBADF;
971
972         open_file = file->private_data;
973         pTcon = open_file->tcon;
974
975         rc = generic_write_checks(file, poffset, &write_size, 0);
976         if (rc)
977                 return rc;
978
979         xid = GetXid();
980
981         long_op = cifs_write_timeout(cifsi, *poffset);
982         for (total_written = 0; write_size > total_written;
983              total_written += bytes_written) {
984                 rc = -EAGAIN;
985                 while (rc == -EAGAIN) {
986                         if (file->private_data == NULL) {
987                                 /* file has been closed on us */
988                                 FreeXid(xid);
989                         /* if we have gotten here we have written some data
990                            and blocked, and the file has been freed on us while
991                            we blocked so return what we managed to write */
992                                 return total_written;
993                         }
994                         if (open_file->closePend) {
995                                 FreeXid(xid);
996                                 if (total_written)
997                                         return total_written;
998                                 else
999                                         return -EBADF;
1000                         }
1001                         if (open_file->invalidHandle) {
1002                                 /* we could deadlock if we called
1003                                    filemap_fdatawait from here so tell
1004                                    reopen_file not to flush data to server
1005                                    now */
1006                                 rc = cifs_reopen_file(file, false);
1007                                 if (rc != 0)
1008                                         break;
1009                         }
1010
1011                         rc = CIFSSMBWrite(xid, pTcon,
1012                                 open_file->netfid,
1013                                 min_t(const int, cifs_sb->wsize,
1014                                       write_size - total_written),
1015                                 *poffset, &bytes_written,
1016                                 NULL, write_data + total_written, long_op);
1017                 }
1018                 if (rc || (bytes_written == 0)) {
1019                         if (total_written)
1020                                 break;
1021                         else {
1022                                 FreeXid(xid);
1023                                 return rc;
1024                         }
1025                 } else {
1026                         cifs_update_eof(cifsi, *poffset, bytes_written);
1027                         *poffset += bytes_written;
1028                 }
1029                 long_op = CIFS_STD_OP; /* subsequent writes fast -
1030                                     15 seconds is plenty */
1031         }
1032
1033         cifs_stats_bytes_written(pTcon, total_written);
1034
1035         /* since the write may have blocked check these pointers again */
1036         if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1037                 struct inode *inode = file->f_path.dentry->d_inode;
1038 /* Do not update local mtime - server will set its actual value on write
1039  *              inode->i_ctime = inode->i_mtime =
1040  *                      current_fs_time(inode->i_sb);*/
1041                 if (total_written > 0) {
1042                         spin_lock(&inode->i_lock);
1043                         if (*poffset > file->f_path.dentry->d_inode->i_size)
1044                                 i_size_write(file->f_path.dentry->d_inode,
1045                                         *poffset);
1046                         spin_unlock(&inode->i_lock);
1047                 }
1048                 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1049         }
1050         FreeXid(xid);
1051         return total_written;
1052 }
1053
1054 static ssize_t cifs_write(struct file *file, const char *write_data,
1055                           size_t write_size, loff_t *poffset)
1056 {
1057         int rc = 0;
1058         unsigned int bytes_written = 0;
1059         unsigned int total_written;
1060         struct cifs_sb_info *cifs_sb;
1061         struct cifsTconInfo *pTcon;
1062         int xid, long_op;
1063         struct cifsFileInfo *open_file;
1064         struct cifsInodeInfo *cifsi = CIFS_I(file->f_path.dentry->d_inode);
1065
1066         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1067
1068         cFYI(1, "write %zd bytes to offset %lld of %s", write_size,
1069            *poffset, file->f_path.dentry->d_name.name);
1070
1071         if (file->private_data == NULL)
1072                 return -EBADF;
1073         open_file = file->private_data;
1074         pTcon = open_file->tcon;
1075
1076         xid = GetXid();
1077
1078         long_op = cifs_write_timeout(cifsi, *poffset);
1079         for (total_written = 0; write_size > total_written;
1080              total_written += bytes_written) {
1081                 rc = -EAGAIN;
1082                 while (rc == -EAGAIN) {
1083                         if (file->private_data == NULL) {
1084                                 /* file has been closed on us */
1085                                 FreeXid(xid);
1086                         /* if we have gotten here we have written some data
1087                            and blocked, and the file has been freed on us
1088                            while we blocked so return what we managed to
1089                            write */
1090                                 return total_written;
1091                         }
1092                         if (open_file->closePend) {
1093                                 FreeXid(xid);
1094                                 if (total_written)
1095                                         return total_written;
1096                                 else
1097                                         return -EBADF;
1098                         }
1099                         if (open_file->invalidHandle) {
1100                                 /* we could deadlock if we called
1101                                    filemap_fdatawait from here so tell
1102                                    reopen_file not to flush data to
1103                                    server now */
1104                                 rc = cifs_reopen_file(file, false);
1105                                 if (rc != 0)
1106                                         break;
1107                         }
1108                         if (experimEnabled || (pTcon->ses->server &&
1109                                 ((pTcon->ses->server->secMode &
1110                                 (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED))
1111                                 == 0))) {
1112                                 struct kvec iov[2];
1113                                 unsigned int len;
1114
1115                                 len = min((size_t)cifs_sb->wsize,
1116                                           write_size - total_written);
1117                                 /* iov[0] is reserved for smb header */
1118                                 iov[1].iov_base = (char *)write_data +
1119                                                   total_written;
1120                                 iov[1].iov_len = len;
1121                                 rc = CIFSSMBWrite2(xid, pTcon,
1122                                                 open_file->netfid, len,
1123                                                 *poffset, &bytes_written,
1124                                                 iov, 1, long_op);
1125                         } else
1126                                 rc = CIFSSMBWrite(xid, pTcon,
1127                                          open_file->netfid,
1128                                          min_t(const int, cifs_sb->wsize,
1129                                                write_size - total_written),
1130                                          *poffset, &bytes_written,
1131                                          write_data + total_written,
1132                                          NULL, long_op);
1133                 }
1134                 if (rc || (bytes_written == 0)) {
1135                         if (total_written)
1136                                 break;
1137                         else {
1138                                 FreeXid(xid);
1139                                 return rc;
1140                         }
1141                 } else {
1142                         cifs_update_eof(cifsi, *poffset, bytes_written);
1143                         *poffset += bytes_written;
1144                 }
1145                 long_op = CIFS_STD_OP; /* subsequent writes fast -
1146                                     15 seconds is plenty */
1147         }
1148
1149         cifs_stats_bytes_written(pTcon, total_written);
1150
1151         /* since the write may have blocked check these pointers again */
1152         if ((file->f_path.dentry) && (file->f_path.dentry->d_inode)) {
1153 /*BB We could make this contingent on superblock ATIME flag too */
1154 /*              file->f_path.dentry->d_inode->i_ctime =
1155                 file->f_path.dentry->d_inode->i_mtime = CURRENT_TIME;*/
1156                 if (total_written > 0) {
1157                         spin_lock(&file->f_path.dentry->d_inode->i_lock);
1158                         if (*poffset > file->f_path.dentry->d_inode->i_size)
1159                                 i_size_write(file->f_path.dentry->d_inode,
1160                                              *poffset);
1161                         spin_unlock(&file->f_path.dentry->d_inode->i_lock);
1162                 }
1163                 mark_inode_dirty_sync(file->f_path.dentry->d_inode);
1164         }
1165         FreeXid(xid);
1166         return total_written;
1167 }
1168
1169 #ifdef CONFIG_CIFS_EXPERIMENTAL
1170 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode)
1171 {
1172         struct cifsFileInfo *open_file = NULL;
1173
1174         read_lock(&GlobalSMBSeslock);
1175         /* we could simply get the first_list_entry since write-only entries
1176            are always at the end of the list but since the first entry might
1177            have a close pending, we go through the whole list */
1178         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1179                 if (open_file->closePend)
1180                         continue;
1181                 if (open_file->pfile && ((open_file->pfile->f_flags & O_RDWR) ||
1182                     (open_file->pfile->f_flags & O_RDONLY))) {
1183                         if (!open_file->invalidHandle) {
1184                                 /* found a good file */
1185                                 /* lock it so it will not be closed on us */
1186                                 cifsFileInfo_get(open_file);
1187                                 read_unlock(&GlobalSMBSeslock);
1188                                 return open_file;
1189                         } /* else might as well continue, and look for
1190                              another, or simply have the caller reopen it
1191                              again rather than trying to fix this handle */
1192                 } else /* write only file */
1193                         break; /* write only files are last so must be done */
1194         }
1195         read_unlock(&GlobalSMBSeslock);
1196         return NULL;
1197 }
1198 #endif
1199
1200 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode)
1201 {
1202         struct cifsFileInfo *open_file;
1203         bool any_available = false;
1204         int rc;
1205
1206         /* Having a null inode here (because mapping->host was set to zero by
1207         the VFS or MM) should not happen but we had reports of on oops (due to
1208         it being zero) during stress testcases so we need to check for it */
1209
1210         if (cifs_inode == NULL) {
1211                 cERROR(1, "Null inode passed to cifs_writeable_file");
1212                 dump_stack();
1213                 return NULL;
1214         }
1215
1216         read_lock(&GlobalSMBSeslock);
1217 refind_writable:
1218         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1219                 if (open_file->closePend ||
1220                     (!any_available && open_file->pid != current->tgid))
1221                         continue;
1222
1223                 if (open_file->pfile &&
1224                     ((open_file->pfile->f_flags & O_RDWR) ||
1225                      (open_file->pfile->f_flags & O_WRONLY))) {
1226                         cifsFileInfo_get(open_file);
1227
1228                         if (!open_file->invalidHandle) {
1229                                 /* found a good writable file */
1230                                 read_unlock(&GlobalSMBSeslock);
1231                                 return open_file;
1232                         }
1233
1234                         read_unlock(&GlobalSMBSeslock);
1235                         /* Had to unlock since following call can block */
1236                         rc = cifs_reopen_file(open_file->pfile, false);
1237                         if (!rc) {
1238                                 if (!open_file->closePend)
1239                                         return open_file;
1240                                 else { /* start over in case this was deleted */
1241                                        /* since the list could be modified */
1242                                         read_lock(&GlobalSMBSeslock);
1243                                         cifsFileInfo_put(open_file);
1244                                         goto refind_writable;
1245                                 }
1246                         }
1247
1248                         /* if it fails, try another handle if possible -
1249                         (we can not do this if closePending since
1250                         loop could be modified - in which case we
1251                         have to start at the beginning of the list
1252                         again. Note that it would be bad
1253                         to hold up writepages here (rather than
1254                         in caller) with continuous retries */
1255                         cFYI(1, "wp failed on reopen file");
1256                         read_lock(&GlobalSMBSeslock);
1257                         /* can not use this handle, no write
1258                            pending on this one after all */
1259                         cifsFileInfo_put(open_file);
1260
1261                         if (open_file->closePend) /* list could have changed */
1262                                 goto refind_writable;
1263                         /* else we simply continue to the next entry. Thus
1264                            we do not loop on reopen errors.  If we
1265                            can not reopen the file, for example if we
1266                            reconnected to a server with another client
1267                            racing to delete or lock the file we would not
1268                            make progress if we restarted before the beginning
1269                            of the loop here. */
1270                 }
1271         }
1272         /* couldn't find useable FH with same pid, try any available */
1273         if (!any_available) {
1274                 any_available = true;
1275                 goto refind_writable;
1276         }
1277         read_unlock(&GlobalSMBSeslock);
1278         return NULL;
1279 }
1280
1281 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1282 {
1283         struct address_space *mapping = page->mapping;
1284         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1285         char *write_data;
1286         int rc = -EFAULT;
1287         int bytes_written = 0;
1288         struct cifs_sb_info *cifs_sb;
1289         struct inode *inode;
1290         struct cifsFileInfo *open_file;
1291
1292         if (!mapping || !mapping->host)
1293                 return -EFAULT;
1294
1295         inode = page->mapping->host;
1296         cifs_sb = CIFS_SB(inode->i_sb);
1297
1298         offset += (loff_t)from;
1299         write_data = kmap(page);
1300         write_data += from;
1301
1302         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1303                 kunmap(page);
1304                 return -EIO;
1305         }
1306
1307         /* racing with truncate? */
1308         if (offset > mapping->host->i_size) {
1309                 kunmap(page);
1310                 return 0; /* don't care */
1311         }
1312
1313         /* check to make sure that we are not extending the file */
1314         if (mapping->host->i_size - offset < (loff_t)to)
1315                 to = (unsigned)(mapping->host->i_size - offset);
1316
1317         open_file = find_writable_file(CIFS_I(mapping->host));
1318         if (open_file) {
1319                 bytes_written = cifs_write(open_file->pfile, write_data,
1320                                            to-from, &offset);
1321                 cifsFileInfo_put(open_file);
1322                 /* Does mm or vfs already set times? */
1323                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1324                 if ((bytes_written > 0) && (offset))
1325                         rc = 0;
1326                 else if (bytes_written < 0)
1327                         rc = bytes_written;
1328         } else {
1329                 cFYI(1, "No writeable filehandles for inode");
1330                 rc = -EIO;
1331         }
1332
1333         kunmap(page);
1334         return rc;
1335 }
1336
1337 static int cifs_writepages(struct address_space *mapping,
1338                            struct writeback_control *wbc)
1339 {
1340         struct backing_dev_info *bdi = mapping->backing_dev_info;
1341         unsigned int bytes_to_write;
1342         unsigned int bytes_written;
1343         struct cifs_sb_info *cifs_sb;
1344         int done = 0;
1345         pgoff_t end;
1346         pgoff_t index;
1347         int range_whole = 0;
1348         struct kvec *iov;
1349         int len;
1350         int n_iov = 0;
1351         pgoff_t next;
1352         int nr_pages;
1353         __u64 offset = 0;
1354         struct cifsFileInfo *open_file;
1355         struct cifsTconInfo *tcon;
1356         struct cifsInodeInfo *cifsi = CIFS_I(mapping->host);
1357         struct page *page;
1358         struct pagevec pvec;
1359         int rc = 0;
1360         int scanned = 0;
1361         int xid, long_op;
1362
1363         /*
1364          * BB: Is this meaningful for a non-block-device file system?
1365          * If it is, we should test it again after we do I/O
1366          */
1367         if (wbc->nonblocking && bdi_write_congested(bdi)) {
1368                 wbc->encountered_congestion = 1;
1369                 return 0;
1370         }
1371
1372         cifs_sb = CIFS_SB(mapping->host->i_sb);
1373
1374         /*
1375          * If wsize is smaller that the page cache size, default to writing
1376          * one page at a time via cifs_writepage
1377          */
1378         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1379                 return generic_writepages(mapping, wbc);
1380
1381         iov = kmalloc(32 * sizeof(struct kvec), GFP_KERNEL);
1382         if (iov == NULL)
1383                 return generic_writepages(mapping, wbc);
1384
1385         /*
1386          * if there's no open file, then this is likely to fail too,
1387          * but it'll at least handle the return. Maybe it should be
1388          * a BUG() instead?
1389          */
1390         open_file = find_writable_file(CIFS_I(mapping->host));
1391         if (!open_file) {
1392                 kfree(iov);
1393                 return generic_writepages(mapping, wbc);
1394         }
1395
1396         tcon = open_file->tcon;
1397         if (!experimEnabled && tcon->ses->server->secMode &
1398                         (SECMODE_SIGN_REQUIRED | SECMODE_SIGN_ENABLED)) {
1399                 cifsFileInfo_put(open_file);
1400                 return generic_writepages(mapping, wbc);
1401         }
1402         cifsFileInfo_put(open_file);
1403
1404         xid = GetXid();
1405
1406         pagevec_init(&pvec, 0);
1407         if (wbc->range_cyclic) {
1408                 index = mapping->writeback_index; /* Start from prev offset */
1409                 end = -1;
1410         } else {
1411                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1412                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1413                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1414                         range_whole = 1;
1415                 scanned = 1;
1416         }
1417 retry:
1418         while (!done && (index <= end) &&
1419                (nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
1420                         PAGECACHE_TAG_DIRTY,
1421                         min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1))) {
1422                 int first;
1423                 unsigned int i;
1424
1425                 first = -1;
1426                 next = 0;
1427                 n_iov = 0;
1428                 bytes_to_write = 0;
1429
1430                 for (i = 0; i < nr_pages; i++) {
1431                         page = pvec.pages[i];
1432                         /*
1433                          * At this point we hold neither mapping->tree_lock nor
1434                          * lock on the page itself: the page may be truncated or
1435                          * invalidated (changing page->mapping to NULL), or even
1436                          * swizzled back from swapper_space to tmpfs file
1437                          * mapping
1438                          */
1439
1440                         if (first < 0)
1441                                 lock_page(page);
1442                         else if (!trylock_page(page))
1443                                 break;
1444
1445                         if (unlikely(page->mapping != mapping)) {
1446                                 unlock_page(page);
1447                                 break;
1448                         }
1449
1450                         if (!wbc->range_cyclic && page->index > end) {
1451                                 done = 1;
1452                                 unlock_page(page);
1453                                 break;
1454                         }
1455
1456                         if (next && (page->index != next)) {
1457                                 /* Not next consecutive page */
1458                                 unlock_page(page);
1459                                 break;
1460                         }
1461
1462                         if (wbc->sync_mode != WB_SYNC_NONE)
1463                                 wait_on_page_writeback(page);
1464
1465                         if (PageWriteback(page) ||
1466                                         !clear_page_dirty_for_io(page)) {
1467                                 unlock_page(page);
1468                                 break;
1469                         }
1470
1471                         /*
1472                          * This actually clears the dirty bit in the radix tree.
1473                          * See cifs_writepage() for more commentary.
1474                          */
1475                         set_page_writeback(page);
1476
1477                         if (page_offset(page) >= mapping->host->i_size) {
1478                                 done = 1;
1479                                 unlock_page(page);
1480                                 end_page_writeback(page);
1481                                 break;
1482                         }
1483
1484                         /*
1485                          * BB can we get rid of this?  pages are held by pvec
1486                          */
1487                         page_cache_get(page);
1488
1489                         len = min(mapping->host->i_size - page_offset(page),
1490                                   (loff_t)PAGE_CACHE_SIZE);
1491
1492                         /* reserve iov[0] for the smb header */
1493                         n_iov++;
1494                         iov[n_iov].iov_base = kmap(page);
1495                         iov[n_iov].iov_len = len;
1496                         bytes_to_write += len;
1497
1498                         if (first < 0) {
1499                                 first = i;
1500                                 offset = page_offset(page);
1501                         }
1502                         next = page->index + 1;
1503                         if (bytes_to_write + PAGE_CACHE_SIZE > cifs_sb->wsize)
1504                                 break;
1505                 }
1506                 if (n_iov) {
1507                         open_file = find_writable_file(CIFS_I(mapping->host));
1508                         if (!open_file) {
1509                                 cERROR(1, "No writable handles for inode");
1510                                 rc = -EBADF;
1511                         } else {
1512                                 long_op = cifs_write_timeout(cifsi, offset);
1513                                 rc = CIFSSMBWrite2(xid, tcon, open_file->netfid,
1514                                                    bytes_to_write, offset,
1515                                                    &bytes_written, iov, n_iov,
1516                                                    long_op);
1517                                 cifsFileInfo_put(open_file);
1518                                 cifs_update_eof(cifsi, offset, bytes_written);
1519                         }
1520
1521                         if (rc || bytes_written < bytes_to_write) {
1522                                 cERROR(1, "Write2 ret %d, wrote %d",
1523                                           rc, bytes_written);
1524                                 /* BB what if continued retry is
1525                                    requested via mount flags? */
1526                                 if (rc == -ENOSPC)
1527                                         set_bit(AS_ENOSPC, &mapping->flags);
1528                                 else
1529                                         set_bit(AS_EIO, &mapping->flags);
1530                         } else {
1531                                 cifs_stats_bytes_written(tcon, bytes_written);
1532                         }
1533
1534                         for (i = 0; i < n_iov; i++) {
1535                                 page = pvec.pages[first + i];
1536                                 /* Should we also set page error on
1537                                 success rc but too little data written? */
1538                                 /* BB investigate retry logic on temporary
1539                                 server crash cases and how recovery works
1540                                 when page marked as error */
1541                                 if (rc)
1542                                         SetPageError(page);
1543                                 kunmap(page);
1544                                 unlock_page(page);
1545                                 end_page_writeback(page);
1546                                 page_cache_release(page);
1547                         }
1548                         if ((wbc->nr_to_write -= n_iov) <= 0)
1549                                 done = 1;
1550                         index = next;
1551                 } else
1552                         /* Need to re-find the pages we skipped */
1553                         index = pvec.pages[0]->index + 1;
1554
1555                 pagevec_release(&pvec);
1556         }
1557         if (!scanned && !done) {
1558                 /*
1559                  * We hit the last page and there is more work to be done: wrap
1560                  * back to the start of the file
1561                  */
1562                 scanned = 1;
1563                 index = 0;
1564                 goto retry;
1565         }
1566         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
1567                 mapping->writeback_index = index;
1568
1569         FreeXid(xid);
1570         kfree(iov);
1571         return rc;
1572 }
1573
1574 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
1575 {
1576         int rc = -EFAULT;
1577         int xid;
1578
1579         xid = GetXid();
1580 /* BB add check for wbc flags */
1581         page_cache_get(page);
1582         if (!PageUptodate(page))
1583                 cFYI(1, "ppw - page not up to date");
1584
1585         /*
1586          * Set the "writeback" flag, and clear "dirty" in the radix tree.
1587          *
1588          * A writepage() implementation always needs to do either this,
1589          * or re-dirty the page with "redirty_page_for_writepage()" in
1590          * the case of a failure.
1591          *
1592          * Just unlocking the page will cause the radix tree tag-bits
1593          * to fail to update with the state of the page correctly.
1594          */
1595         set_page_writeback(page);
1596         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
1597         SetPageUptodate(page); /* BB add check for error and Clearuptodate? */
1598         unlock_page(page);
1599         end_page_writeback(page);
1600         page_cache_release(page);
1601         FreeXid(xid);
1602         return rc;
1603 }
1604
1605 static int cifs_write_end(struct file *file, struct address_space *mapping,
1606                         loff_t pos, unsigned len, unsigned copied,
1607                         struct page *page, void *fsdata)
1608 {
1609         int rc;
1610         struct inode *inode = mapping->host;
1611
1612         cFYI(1, "write_end for page %p from pos %lld with %d bytes",
1613                  page, pos, copied);
1614
1615         if (PageChecked(page)) {
1616                 if (copied == len)
1617                         SetPageUptodate(page);
1618                 ClearPageChecked(page);
1619         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
1620                 SetPageUptodate(page);
1621
1622         if (!PageUptodate(page)) {
1623                 char *page_data;
1624                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
1625                 int xid;
1626
1627                 xid = GetXid();
1628                 /* this is probably better than directly calling
1629                    partialpage_write since in this function the file handle is
1630                    known which we might as well leverage */
1631                 /* BB check if anything else missing out of ppw
1632                    such as updating last write time */
1633                 page_data = kmap(page);
1634                 rc = cifs_write(file, page_data + offset, copied, &pos);
1635                 /* if (rc < 0) should we set writebehind rc? */
1636                 kunmap(page);
1637
1638                 FreeXid(xid);
1639         } else {
1640                 rc = copied;
1641                 pos += copied;
1642                 set_page_dirty(page);
1643         }
1644
1645         if (rc > 0) {
1646                 spin_lock(&inode->i_lock);
1647                 if (pos > inode->i_size)
1648                         i_size_write(inode, pos);
1649                 spin_unlock(&inode->i_lock);
1650         }
1651
1652         unlock_page(page);
1653         page_cache_release(page);
1654
1655         return rc;
1656 }
1657
1658 int cifs_fsync(struct file *file, int datasync)
1659 {
1660         int xid;
1661         int rc = 0;
1662         struct cifsTconInfo *tcon;
1663         struct cifsFileInfo *smbfile = file->private_data;
1664         struct inode *inode = file->f_path.dentry->d_inode;
1665
1666         xid = GetXid();
1667
1668         cFYI(1, "Sync file - name: %s datasync: 0x%x",
1669                 file->f_path.dentry->d_name.name, datasync);
1670
1671         rc = filemap_write_and_wait(inode->i_mapping);
1672         if (rc == 0) {
1673                 rc = CIFS_I(inode)->write_behind_rc;
1674                 CIFS_I(inode)->write_behind_rc = 0;
1675                 tcon = smbfile->tcon;
1676                 if (!rc && tcon && smbfile &&
1677                    !(CIFS_SB(inode->i_sb)->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC))
1678                         rc = CIFSSMBFlush(xid, tcon, smbfile->netfid);
1679         }
1680
1681         FreeXid(xid);
1682         return rc;
1683 }
1684
1685 /* static void cifs_sync_page(struct page *page)
1686 {
1687         struct address_space *mapping;
1688         struct inode *inode;
1689         unsigned long index = page->index;
1690         unsigned int rpages = 0;
1691         int rc = 0;
1692
1693         cFYI(1, "sync page %p", page);
1694         mapping = page->mapping;
1695         if (!mapping)
1696                 return 0;
1697         inode = mapping->host;
1698         if (!inode)
1699                 return; */
1700
1701 /*      fill in rpages then
1702         result = cifs_pagein_inode(inode, index, rpages); */ /* BB finish */
1703
1704 /*      cFYI(1, "rpages is %d for sync page of Index %ld", rpages, index);
1705
1706 #if 0
1707         if (rc < 0)
1708                 return rc;
1709         return 0;
1710 #endif
1711 } */
1712
1713 /*
1714  * As file closes, flush all cached write data for this inode checking
1715  * for write behind errors.
1716  */
1717 int cifs_flush(struct file *file, fl_owner_t id)
1718 {
1719         struct inode *inode = file->f_path.dentry->d_inode;
1720         int rc = 0;
1721
1722         /* Rather than do the steps manually:
1723            lock the inode for writing
1724            loop through pages looking for write behind data (dirty pages)
1725            coalesce into contiguous 16K (or smaller) chunks to write to server
1726            send to server (prefer in parallel)
1727            deal with writebehind errors
1728            unlock inode for writing
1729            filemapfdatawrite appears easier for the time being */
1730
1731         rc = filemap_fdatawrite(inode->i_mapping);
1732         /* reset wb rc if we were able to write out dirty pages */
1733         if (!rc) {
1734                 rc = CIFS_I(inode)->write_behind_rc;
1735                 CIFS_I(inode)->write_behind_rc = 0;
1736         }
1737
1738         cFYI(1, "Flush inode %p file %p rc %d", inode, file, rc);
1739
1740         return rc;
1741 }
1742
1743 ssize_t cifs_user_read(struct file *file, char __user *read_data,
1744         size_t read_size, loff_t *poffset)
1745 {
1746         int rc = -EACCES;
1747         unsigned int bytes_read = 0;
1748         unsigned int total_read = 0;
1749         unsigned int current_read_size;
1750         struct cifs_sb_info *cifs_sb;
1751         struct cifsTconInfo *pTcon;
1752         int xid;
1753         struct cifsFileInfo *open_file;
1754         char *smb_read_data;
1755         char __user *current_offset;
1756         struct smb_com_read_rsp *pSMBr;
1757
1758         xid = GetXid();
1759         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1760
1761         if (file->private_data == NULL) {
1762                 rc = -EBADF;
1763                 FreeXid(xid);
1764                 return rc;
1765         }
1766         open_file = file->private_data;
1767         pTcon = open_file->tcon;
1768
1769         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1770                 cFYI(1, "attempting read on write only file instance");
1771
1772         for (total_read = 0, current_offset = read_data;
1773              read_size > total_read;
1774              total_read += bytes_read, current_offset += bytes_read) {
1775                 current_read_size = min_t(const int, read_size - total_read,
1776                                           cifs_sb->rsize);
1777                 rc = -EAGAIN;
1778                 smb_read_data = NULL;
1779                 while (rc == -EAGAIN) {
1780                         int buf_type = CIFS_NO_BUFFER;
1781                         if ((open_file->invalidHandle) &&
1782                             (!open_file->closePend)) {
1783                                 rc = cifs_reopen_file(file, true);
1784                                 if (rc != 0)
1785                                         break;
1786                         }
1787                         rc = CIFSSMBRead(xid, pTcon,
1788                                          open_file->netfid,
1789                                          current_read_size, *poffset,
1790                                          &bytes_read, &smb_read_data,
1791                                          &buf_type);
1792                         pSMBr = (struct smb_com_read_rsp *)smb_read_data;
1793                         if (smb_read_data) {
1794                                 if (copy_to_user(current_offset,
1795                                                 smb_read_data +
1796                                                 4 /* RFC1001 length field */ +
1797                                                 le16_to_cpu(pSMBr->DataOffset),
1798                                                 bytes_read))
1799                                         rc = -EFAULT;
1800
1801                                 if (buf_type == CIFS_SMALL_BUFFER)
1802                                         cifs_small_buf_release(smb_read_data);
1803                                 else if (buf_type == CIFS_LARGE_BUFFER)
1804                                         cifs_buf_release(smb_read_data);
1805                                 smb_read_data = NULL;
1806                         }
1807                 }
1808                 if (rc || (bytes_read == 0)) {
1809                         if (total_read) {
1810                                 break;
1811                         } else {
1812                                 FreeXid(xid);
1813                                 return rc;
1814                         }
1815                 } else {
1816                         cifs_stats_bytes_read(pTcon, bytes_read);
1817                         *poffset += bytes_read;
1818                 }
1819         }
1820         FreeXid(xid);
1821         return total_read;
1822 }
1823
1824
1825 static ssize_t cifs_read(struct file *file, char *read_data, size_t read_size,
1826         loff_t *poffset)
1827 {
1828         int rc = -EACCES;
1829         unsigned int bytes_read = 0;
1830         unsigned int total_read;
1831         unsigned int current_read_size;
1832         struct cifs_sb_info *cifs_sb;
1833         struct cifsTconInfo *pTcon;
1834         int xid;
1835         char *current_offset;
1836         struct cifsFileInfo *open_file;
1837         int buf_type = CIFS_NO_BUFFER;
1838
1839         xid = GetXid();
1840         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1841
1842         if (file->private_data == NULL) {
1843                 rc = -EBADF;
1844                 FreeXid(xid);
1845                 return rc;
1846         }
1847         open_file = file->private_data;
1848         pTcon = open_file->tcon;
1849
1850         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
1851                 cFYI(1, "attempting read on write only file instance");
1852
1853         for (total_read = 0, current_offset = read_data;
1854              read_size > total_read;
1855              total_read += bytes_read, current_offset += bytes_read) {
1856                 current_read_size = min_t(const int, read_size - total_read,
1857                                           cifs_sb->rsize);
1858                 /* For windows me and 9x we do not want to request more
1859                 than it negotiated since it will refuse the read then */
1860                 if ((pTcon->ses) &&
1861                         !(pTcon->ses->capabilities & CAP_LARGE_FILES)) {
1862                         current_read_size = min_t(const int, current_read_size,
1863                                         pTcon->ses->server->maxBuf - 128);
1864                 }
1865                 rc = -EAGAIN;
1866                 while (rc == -EAGAIN) {
1867                         if ((open_file->invalidHandle) &&
1868                             (!open_file->closePend)) {
1869                                 rc = cifs_reopen_file(file, true);
1870                                 if (rc != 0)
1871                                         break;
1872                         }
1873                         rc = CIFSSMBRead(xid, pTcon,
1874                                          open_file->netfid,
1875                                          current_read_size, *poffset,
1876                                          &bytes_read, &current_offset,
1877                                          &buf_type);
1878                 }
1879                 if (rc || (bytes_read == 0)) {
1880                         if (total_read) {
1881                                 break;
1882                         } else {
1883                                 FreeXid(xid);
1884                                 return rc;
1885                         }
1886                 } else {
1887                         cifs_stats_bytes_read(pTcon, total_read);
1888                         *poffset += bytes_read;
1889                 }
1890         }
1891         FreeXid(xid);
1892         return total_read;
1893 }
1894
1895 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
1896 {
1897         int rc, xid;
1898
1899         xid = GetXid();
1900         rc = cifs_revalidate_file(file);
1901         if (rc) {
1902                 cFYI(1, "Validation prior to mmap failed, error=%d", rc);
1903                 FreeXid(xid);
1904                 return rc;
1905         }
1906         rc = generic_file_mmap(file, vma);
1907         FreeXid(xid);
1908         return rc;
1909 }
1910
1911
1912 static void cifs_copy_cache_pages(struct address_space *mapping,
1913         struct list_head *pages, int bytes_read, char *data)
1914 {
1915         struct page *page;
1916         char *target;
1917
1918         while (bytes_read > 0) {
1919                 if (list_empty(pages))
1920                         break;
1921
1922                 page = list_entry(pages->prev, struct page, lru);
1923                 list_del(&page->lru);
1924
1925                 if (add_to_page_cache_lru(page, mapping, page->index,
1926                                       GFP_KERNEL)) {
1927                         page_cache_release(page);
1928                         cFYI(1, "Add page cache failed");
1929                         data += PAGE_CACHE_SIZE;
1930                         bytes_read -= PAGE_CACHE_SIZE;
1931                         continue;
1932                 }
1933                 page_cache_release(page);
1934
1935                 target = kmap_atomic(page, KM_USER0);
1936
1937                 if (PAGE_CACHE_SIZE > bytes_read) {
1938                         memcpy(target, data, bytes_read);
1939                         /* zero the tail end of this partial page */
1940                         memset(target + bytes_read, 0,
1941                                PAGE_CACHE_SIZE - bytes_read);
1942                         bytes_read = 0;
1943                 } else {
1944                         memcpy(target, data, PAGE_CACHE_SIZE);
1945                         bytes_read -= PAGE_CACHE_SIZE;
1946                 }
1947                 kunmap_atomic(target, KM_USER0);
1948
1949                 flush_dcache_page(page);
1950                 SetPageUptodate(page);
1951                 unlock_page(page);
1952                 data += PAGE_CACHE_SIZE;
1953
1954                 /* add page to FS-Cache */
1955                 cifs_readpage_to_fscache(mapping->host, page);
1956         }
1957         return;
1958 }
1959
1960 static int cifs_readpages(struct file *file, struct address_space *mapping,
1961         struct list_head *page_list, unsigned num_pages)
1962 {
1963         int rc = -EACCES;
1964         int xid;
1965         loff_t offset;
1966         struct page *page;
1967         struct cifs_sb_info *cifs_sb;
1968         struct cifsTconInfo *pTcon;
1969         unsigned int bytes_read = 0;
1970         unsigned int read_size, i;
1971         char *smb_read_data = NULL;
1972         struct smb_com_read_rsp *pSMBr;
1973         struct cifsFileInfo *open_file;
1974         int buf_type = CIFS_NO_BUFFER;
1975
1976         xid = GetXid();
1977         if (file->private_data == NULL) {
1978                 rc = -EBADF;
1979                 FreeXid(xid);
1980                 return rc;
1981         }
1982         open_file = file->private_data;
1983         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1984         pTcon = open_file->tcon;
1985
1986         /*
1987          * Reads as many pages as possible from fscache. Returns -ENOBUFS
1988          * immediately if the cookie is negative
1989          */
1990         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
1991                                          &num_pages);
1992         if (rc == 0)
1993                 goto read_complete;
1994
1995         cFYI(DBG2, "rpages: num pages %d", num_pages);
1996         for (i = 0; i < num_pages; ) {
1997                 unsigned contig_pages;
1998                 struct page *tmp_page;
1999                 unsigned long expected_index;
2000
2001                 if (list_empty(page_list))
2002                         break;
2003
2004                 page = list_entry(page_list->prev, struct page, lru);
2005                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2006
2007                 /* count adjacent pages that we will read into */
2008                 contig_pages = 0;
2009                 expected_index =
2010                         list_entry(page_list->prev, struct page, lru)->index;
2011                 list_for_each_entry_reverse(tmp_page, page_list, lru) {
2012                         if (tmp_page->index == expected_index) {
2013                                 contig_pages++;
2014                                 expected_index++;
2015                         } else
2016                                 break;
2017                 }
2018                 if (contig_pages + i >  num_pages)
2019                         contig_pages = num_pages - i;
2020
2021                 /* for reads over a certain size could initiate async
2022                    read ahead */
2023
2024                 read_size = contig_pages * PAGE_CACHE_SIZE;
2025                 /* Read size needs to be in multiples of one page */
2026                 read_size = min_t(const unsigned int, read_size,
2027                                   cifs_sb->rsize & PAGE_CACHE_MASK);
2028                 cFYI(DBG2, "rpages: read size 0x%x  contiguous pages %d",
2029                                 read_size, contig_pages);
2030                 rc = -EAGAIN;
2031                 while (rc == -EAGAIN) {
2032                         if ((open_file->invalidHandle) &&
2033                             (!open_file->closePend)) {
2034                                 rc = cifs_reopen_file(file, true);
2035                                 if (rc != 0)
2036                                         break;
2037                         }
2038
2039                         rc = CIFSSMBRead(xid, pTcon,
2040                                          open_file->netfid,
2041                                          read_size, offset,
2042                                          &bytes_read, &smb_read_data,
2043                                          &buf_type);
2044                         /* BB more RC checks ? */
2045                         if (rc == -EAGAIN) {
2046                                 if (smb_read_data) {
2047                                         if (buf_type == CIFS_SMALL_BUFFER)
2048                                                 cifs_small_buf_release(smb_read_data);
2049                                         else if (buf_type == CIFS_LARGE_BUFFER)
2050                                                 cifs_buf_release(smb_read_data);
2051                                         smb_read_data = NULL;
2052                                 }
2053                         }
2054                 }
2055                 if ((rc < 0) || (smb_read_data == NULL)) {
2056                         cFYI(1, "Read error in readpages: %d", rc);
2057                         break;
2058                 } else if (bytes_read > 0) {
2059                         task_io_account_read(bytes_read);
2060                         pSMBr = (struct smb_com_read_rsp *)smb_read_data;
2061                         cifs_copy_cache_pages(mapping, page_list, bytes_read,
2062                                 smb_read_data + 4 /* RFC1001 hdr */ +
2063                                 le16_to_cpu(pSMBr->DataOffset));
2064
2065                         i +=  bytes_read >> PAGE_CACHE_SHIFT;
2066                         cifs_stats_bytes_read(pTcon, bytes_read);
2067                         if ((bytes_read & PAGE_CACHE_MASK) != bytes_read) {
2068                                 i++; /* account for partial page */
2069
2070                                 /* server copy of file can have smaller size
2071                                    than client */
2072                                 /* BB do we need to verify this common case ?
2073                                    this case is ok - if we are at server EOF
2074                                    we will hit it on next read */
2075
2076                                 /* break; */
2077                         }
2078                 } else {
2079                         cFYI(1, "No bytes read (%d) at offset %lld . "
2080                                 "Cleaning remaining pages from readahead list",
2081                                 bytes_read, offset);
2082                         /* BB turn off caching and do new lookup on
2083                            file size at server? */
2084                         break;
2085                 }
2086                 if (smb_read_data) {
2087                         if (buf_type == CIFS_SMALL_BUFFER)
2088                                 cifs_small_buf_release(smb_read_data);
2089                         else if (buf_type == CIFS_LARGE_BUFFER)
2090                                 cifs_buf_release(smb_read_data);
2091                         smb_read_data = NULL;
2092                 }
2093                 bytes_read = 0;
2094         }
2095
2096 /* need to free smb_read_data buf before exit */
2097         if (smb_read_data) {
2098                 if (buf_type == CIFS_SMALL_BUFFER)
2099                         cifs_small_buf_release(smb_read_data);
2100                 else if (buf_type == CIFS_LARGE_BUFFER)
2101                         cifs_buf_release(smb_read_data);
2102                 smb_read_data = NULL;
2103         }
2104
2105 read_complete:
2106         FreeXid(xid);
2107         return rc;
2108 }
2109
2110 static int cifs_readpage_worker(struct file *file, struct page *page,
2111         loff_t *poffset)
2112 {
2113         char *read_data;
2114         int rc;
2115
2116         /* Is the page cached? */
2117         rc = cifs_readpage_from_fscache(file->f_path.dentry->d_inode, page);
2118         if (rc == 0)
2119                 goto read_complete;
2120
2121         page_cache_get(page);
2122         read_data = kmap(page);
2123         /* for reads over a certain size could initiate async read ahead */
2124
2125         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
2126
2127         if (rc < 0)
2128                 goto io_error;
2129         else
2130                 cFYI(1, "Bytes read %d", rc);
2131
2132         file->f_path.dentry->d_inode->i_atime =
2133                 current_fs_time(file->f_path.dentry->d_inode->i_sb);
2134
2135         if (PAGE_CACHE_SIZE > rc)
2136                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
2137
2138         flush_dcache_page(page);
2139         SetPageUptodate(page);
2140
2141         /* send this page to the cache */
2142         cifs_readpage_to_fscache(file->f_path.dentry->d_inode, page);
2143
2144         rc = 0;
2145
2146 io_error:
2147         kunmap(page);
2148         page_cache_release(page);
2149
2150 read_complete:
2151         return rc;
2152 }
2153
2154 static int cifs_readpage(struct file *file, struct page *page)
2155 {
2156         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
2157         int rc = -EACCES;
2158         int xid;
2159
2160         xid = GetXid();
2161
2162         if (file->private_data == NULL) {
2163                 rc = -EBADF;
2164                 FreeXid(xid);
2165                 return rc;
2166         }
2167
2168         cFYI(1, "readpage %p at offset %d 0x%x\n",
2169                  page, (int)offset, (int)offset);
2170
2171         rc = cifs_readpage_worker(file, page, &offset);
2172
2173         unlock_page(page);
2174
2175         FreeXid(xid);
2176         return rc;
2177 }
2178
2179 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
2180 {
2181         struct cifsFileInfo *open_file;
2182
2183         read_lock(&GlobalSMBSeslock);
2184         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
2185                 if (open_file->closePend)
2186                         continue;
2187                 if (open_file->pfile &&
2188                     ((open_file->pfile->f_flags & O_RDWR) ||
2189                      (open_file->pfile->f_flags & O_WRONLY))) {
2190                         read_unlock(&GlobalSMBSeslock);
2191                         return 1;
2192                 }
2193         }
2194         read_unlock(&GlobalSMBSeslock);
2195         return 0;
2196 }
2197
2198 /* We do not want to update the file size from server for inodes
2199    open for write - to avoid races with writepage extending
2200    the file - in the future we could consider allowing
2201    refreshing the inode only on increases in the file size
2202    but this is tricky to do without racing with writebehind
2203    page caching in the current Linux kernel design */
2204 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
2205 {
2206         if (!cifsInode)
2207                 return true;
2208
2209         if (is_inode_writable(cifsInode)) {
2210                 /* This inode is open for write at least once */
2211                 struct cifs_sb_info *cifs_sb;
2212
2213                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
2214                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
2215                         /* since no page cache to corrupt on directio
2216                         we can change size safely */
2217                         return true;
2218                 }
2219
2220                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
2221                         return true;
2222
2223                 return false;
2224         } else
2225                 return true;
2226 }
2227
2228 static int cifs_write_begin(struct file *file, struct address_space *mapping,
2229                         loff_t pos, unsigned len, unsigned flags,
2230                         struct page **pagep, void **fsdata)
2231 {
2232         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
2233         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
2234         loff_t page_start = pos & PAGE_MASK;
2235         loff_t i_size;
2236         struct page *page;
2237         int rc = 0;
2238
2239         cFYI(1, "write_begin from %lld len %d", (long long)pos, len);
2240
2241         page = grab_cache_page_write_begin(mapping, index, flags);
2242         if (!page) {
2243                 rc = -ENOMEM;
2244                 goto out;
2245         }
2246
2247         if (PageUptodate(page))
2248                 goto out;
2249
2250         /*
2251          * If we write a full page it will be up to date, no need to read from
2252          * the server. If the write is short, we'll end up doing a sync write
2253          * instead.
2254          */
2255         if (len == PAGE_CACHE_SIZE)
2256                 goto out;
2257
2258         /*
2259          * optimize away the read when we have an oplock, and we're not
2260          * expecting to use any of the data we'd be reading in. That
2261          * is, when the page lies beyond the EOF, or straddles the EOF
2262          * and the write will cover all of the existing data.
2263          */
2264         if (CIFS_I(mapping->host)->clientCanCacheRead) {
2265                 i_size = i_size_read(mapping->host);
2266                 if (page_start >= i_size ||
2267                     (offset == 0 && (pos + len) >= i_size)) {
2268                         zero_user_segments(page, 0, offset,
2269                                            offset + len,
2270                                            PAGE_CACHE_SIZE);
2271                         /*
2272                          * PageChecked means that the parts of the page
2273                          * to which we're not writing are considered up
2274                          * to date. Once the data is copied to the
2275                          * page, it can be set uptodate.
2276                          */
2277                         SetPageChecked(page);
2278                         goto out;
2279                 }
2280         }
2281
2282         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
2283                 /*
2284                  * might as well read a page, it is fast enough. If we get
2285                  * an error, we don't need to return it. cifs_write_end will
2286                  * do a sync write instead since PG_uptodate isn't set.
2287                  */
2288                 cifs_readpage_worker(file, page, &page_start);
2289         } else {
2290                 /* we could try using another file handle if there is one -
2291                    but how would we lock it to prevent close of that handle
2292                    racing with this read? In any case
2293                    this will be written out by write_end so is fine */
2294         }
2295 out:
2296         *pagep = page;
2297         return rc;
2298 }
2299
2300 static int cifs_release_page(struct page *page, gfp_t gfp)
2301 {
2302         if (PagePrivate(page))
2303                 return 0;
2304
2305         return cifs_fscache_release_page(page, gfp);
2306 }
2307
2308 static void cifs_invalidate_page(struct page *page, unsigned long offset)
2309 {
2310         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
2311
2312         if (offset == 0)
2313                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
2314 }
2315
2316 void cifs_oplock_break(struct work_struct *work)
2317 {
2318         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
2319                                                   oplock_break);
2320         struct inode *inode = cfile->pInode;
2321         struct cifsInodeInfo *cinode = CIFS_I(inode);
2322         int rc, waitrc = 0;
2323
2324         if (inode && S_ISREG(inode->i_mode)) {
2325                 if (cinode->clientCanCacheRead)
2326                         break_lease(inode, O_RDONLY);
2327                 else
2328                         break_lease(inode, O_WRONLY);
2329                 rc = filemap_fdatawrite(inode->i_mapping);
2330                 if (cinode->clientCanCacheRead == 0) {
2331                         waitrc = filemap_fdatawait(inode->i_mapping);
2332                         invalidate_remote_inode(inode);
2333                 }
2334                 if (!rc)
2335                         rc = waitrc;
2336                 if (rc)
2337                         cinode->write_behind_rc = rc;
2338                 cFYI(1, "Oplock flush inode %p rc %d", inode, rc);
2339         }
2340
2341         /*
2342          * releasing stale oplock after recent reconnect of smb session using
2343          * a now incorrect file handle is not a data integrity issue but do
2344          * not bother sending an oplock release if session to server still is
2345          * disconnected since oplock already released by the server
2346          */
2347         if (!cfile->closePend && !cfile->oplock_break_cancelled) {
2348                 rc = CIFSSMBLock(0, cfile->tcon, cfile->netfid, 0, 0, 0, 0,
2349                                  LOCKING_ANDX_OPLOCK_RELEASE, false);
2350                 cFYI(1, "Oplock release rc = %d", rc);
2351         }
2352
2353         /*
2354          * We might have kicked in before is_valid_oplock_break()
2355          * finished grabbing reference for us.  Make sure it's done by
2356          * waiting for GlobalSMSSeslock.
2357          */
2358         write_lock(&GlobalSMBSeslock);
2359         write_unlock(&GlobalSMBSeslock);
2360
2361         cifs_oplock_break_put(cfile);
2362 }
2363
2364 void cifs_oplock_break_get(struct cifsFileInfo *cfile)
2365 {
2366         mntget(cfile->mnt);
2367         cifsFileInfo_get(cfile);
2368 }
2369
2370 void cifs_oplock_break_put(struct cifsFileInfo *cfile)
2371 {
2372         mntput(cfile->mnt);
2373         cifsFileInfo_put(cfile);
2374 }
2375
2376 const struct address_space_operations cifs_addr_ops = {
2377         .readpage = cifs_readpage,
2378         .readpages = cifs_readpages,
2379         .writepage = cifs_writepage,
2380         .writepages = cifs_writepages,
2381         .write_begin = cifs_write_begin,
2382         .write_end = cifs_write_end,
2383         .set_page_dirty = __set_page_dirty_nobuffers,
2384         .releasepage = cifs_release_page,
2385         .invalidatepage = cifs_invalidate_page,
2386         /* .sync_page = cifs_sync_page, */
2387         /* .direct_IO = */
2388 };
2389
2390 /*
2391  * cifs_readpages requires the server to support a buffer large enough to
2392  * contain the header plus one complete page of data.  Otherwise, we need
2393  * to leave cifs_readpages out of the address space operations.
2394  */
2395 const struct address_space_operations cifs_addr_ops_smallbuf = {
2396         .readpage = cifs_readpage,
2397         .writepage = cifs_writepage,
2398         .writepages = cifs_writepages,
2399         .write_begin = cifs_write_begin,
2400         .write_end = cifs_write_end,
2401         .set_page_dirty = __set_page_dirty_nobuffers,
2402         .releasepage = cifs_release_page,
2403         .invalidatepage = cifs_invalidate_page,
2404         /* .sync_page = cifs_sync_page, */
2405         /* .direct_IO = */
2406 };