watchdog: jz4740: Pass device to clk_get
[pandora-kernel.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_sb->mnt_cifs_flags &
144                                         CIFS_MOUNT_MAP_SPECIAL_CHR);
145         cifs_put_tlink(tlink);
146
147         if (rc)
148                 goto posix_open_ret;
149
150         if (presp_data->Type == cpu_to_le32(-1))
151                 goto posix_open_ret; /* open ok, caller does qpathinfo */
152
153         if (!pinode)
154                 goto posix_open_ret; /* caller does not need info */
155
156         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
157
158         /* get new inode and set it up */
159         if (*pinode == NULL) {
160                 cifs_fill_uniqueid(sb, &fattr);
161                 *pinode = cifs_iget(sb, &fattr);
162                 if (!*pinode) {
163                         rc = -ENOMEM;
164                         goto posix_open_ret;
165                 }
166         } else {
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186
187         if (!server->ops->open)
188                 return -ENOSYS;
189
190         desired_access = cifs_convert_flags(f_flags);
191
192 /*********************************************************************
193  *  open flag mapping table:
194  *
195  *      POSIX Flag            CIFS Disposition
196  *      ----------            ----------------
197  *      O_CREAT               FILE_OPEN_IF
198  *      O_CREAT | O_EXCL      FILE_CREATE
199  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
200  *      O_TRUNC               FILE_OVERWRITE
201  *      none of the above     FILE_OPEN
202  *
203  *      Note that there is not a direct match between disposition
204  *      FILE_SUPERSEDE (ie create whether or not file exists although
205  *      O_CREAT | O_TRUNC is similar but truncates the existing
206  *      file rather than creating a new file as FILE_SUPERSEDE does
207  *      (which uses the attributes / metadata passed in on open call)
208  *?
209  *?  O_SYNC is a reasonable match to CIFS writethrough flag
210  *?  and the read write flags match reasonably.  O_LARGEFILE
211  *?  is irrelevant because largefile support is always used
212  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
213  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
214  *********************************************************************/
215
216         disposition = cifs_get_disposition(f_flags);
217
218         /* BB pass O_SYNC flag through on file attributes .. BB */
219
220         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
221         if (!buf)
222                 return -ENOMEM;
223
224         if (backup_cred(cifs_sb))
225                 create_options |= CREATE_OPEN_BACKUP_INTENT;
226
227         rc = server->ops->open(xid, tcon, full_path, disposition,
228                                desired_access, create_options, fid, oplock, buf,
229                                cifs_sb);
230
231         if (rc)
232                 goto out;
233
234         if (tcon->unix_ext)
235                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
236                                               xid);
237         else
238                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
239                                          xid, &fid->netfid);
240
241 out:
242         kfree(buf);
243         return rc;
244 }
245
246 static bool
247 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
248 {
249         struct cifs_fid_locks *cur;
250         bool has_locks = false;
251
252         down_read(&cinode->lock_sem);
253         list_for_each_entry(cur, &cinode->llist, llist) {
254                 if (!list_empty(&cur->locks)) {
255                         has_locks = true;
256                         break;
257                 }
258         }
259         up_read(&cinode->lock_sem);
260         return has_locks;
261 }
262
263 struct cifsFileInfo *
264 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
265                   struct tcon_link *tlink, __u32 oplock)
266 {
267         struct dentry *dentry = file->f_path.dentry;
268         struct inode *inode = dentry->d_inode;
269         struct cifsInodeInfo *cinode = CIFS_I(inode);
270         struct cifsFileInfo *cfile;
271         struct cifs_fid_locks *fdlocks;
272         struct cifs_tcon *tcon = tlink_tcon(tlink);
273         struct TCP_Server_Info *server = tcon->ses->server;
274
275         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
276         if (cfile == NULL)
277                 return cfile;
278
279         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
280         if (!fdlocks) {
281                 kfree(cfile);
282                 return NULL;
283         }
284
285         INIT_LIST_HEAD(&fdlocks->locks);
286         fdlocks->cfile = cfile;
287         cfile->llist = fdlocks;
288         down_write(&cinode->lock_sem);
289         list_add(&fdlocks->llist, &cinode->llist);
290         up_write(&cinode->lock_sem);
291
292         cfile->count = 1;
293         cfile->pid = current->tgid;
294         cfile->uid = current_fsuid();
295         cfile->dentry = dget(dentry);
296         cfile->f_flags = file->f_flags;
297         cfile->invalidHandle = false;
298         cfile->tlink = cifs_get_tlink(tlink);
299         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
300         mutex_init(&cfile->fh_mutex);
301
302         cifs_sb_active(inode->i_sb);
303
304         /*
305          * If the server returned a read oplock and we have mandatory brlocks,
306          * set oplock level to None.
307          */
308         if (oplock == server->vals->oplock_read &&
309                                                 cifs_has_mand_locks(cinode)) {
310                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
311                 oplock = 0;
312         }
313
314         spin_lock(&cifs_file_list_lock);
315         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
316                 oplock = fid->pending_open->oplock;
317         list_del(&fid->pending_open->olist);
318
319         server->ops->set_fid(cfile, fid, oplock);
320
321         list_add(&cfile->tlist, &tcon->openFileList);
322         /* if readable file instance put first in list*/
323         if (file->f_mode & FMODE_READ)
324                 list_add(&cfile->flist, &cinode->openFileList);
325         else
326                 list_add_tail(&cfile->flist, &cinode->openFileList);
327         spin_unlock(&cifs_file_list_lock);
328
329         file->private_data = cfile;
330         return cfile;
331 }
332
333 struct cifsFileInfo *
334 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
335 {
336         spin_lock(&cifs_file_list_lock);
337         cifsFileInfo_get_locked(cifs_file);
338         spin_unlock(&cifs_file_list_lock);
339         return cifs_file;
340 }
341
342 /*
343  * Release a reference on the file private data. This may involve closing
344  * the filehandle out on the server. Must be called without holding
345  * cifs_file_list_lock.
346  */
347 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
348 {
349         struct inode *inode = cifs_file->dentry->d_inode;
350         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
351         struct TCP_Server_Info *server = tcon->ses->server;
352         struct cifsInodeInfo *cifsi = CIFS_I(inode);
353         struct super_block *sb = inode->i_sb;
354         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
355         struct cifsLockInfo *li, *tmp;
356         struct cifs_fid fid;
357         struct cifs_pending_open open;
358
359         spin_lock(&cifs_file_list_lock);
360         if (--cifs_file->count > 0) {
361                 spin_unlock(&cifs_file_list_lock);
362                 return;
363         }
364
365         if (server->ops->get_lease_key)
366                 server->ops->get_lease_key(inode, &fid);
367
368         /* store open in pending opens to make sure we don't miss lease break */
369         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
370
371         /* remove it from the lists */
372         list_del(&cifs_file->flist);
373         list_del(&cifs_file->tlist);
374
375         if (list_empty(&cifsi->openFileList)) {
376                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
377                          cifs_file->dentry->d_inode);
378                 /*
379                  * In strict cache mode we need invalidate mapping on the last
380                  * close  because it may cause a error when we open this file
381                  * again and get at least level II oplock.
382                  */
383                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
384                         CIFS_I(inode)->invalid_mapping = true;
385                 cifs_set_oplock_level(cifsi, 0);
386         }
387         spin_unlock(&cifs_file_list_lock);
388
389         cancel_work_sync(&cifs_file->oplock_break);
390
391         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
392                 struct TCP_Server_Info *server = tcon->ses->server;
393                 unsigned int xid;
394
395                 xid = get_xid();
396                 if (server->ops->close)
397                         server->ops->close(xid, tcon, &cifs_file->fid);
398                 _free_xid(xid);
399         }
400
401         cifs_del_pending_open(&open);
402
403         /*
404          * Delete any outstanding lock records. We'll lose them when the file
405          * is closed anyway.
406          */
407         down_write(&cifsi->lock_sem);
408         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
409                 list_del(&li->llist);
410                 cifs_del_lock_waiters(li);
411                 kfree(li);
412         }
413         list_del(&cifs_file->llist->llist);
414         kfree(cifs_file->llist);
415         up_write(&cifsi->lock_sem);
416
417         cifs_put_tlink(cifs_file->tlink);
418         dput(cifs_file->dentry);
419         cifs_sb_deactive(sb);
420         kfree(cifs_file);
421 }
422
423 int cifs_open(struct inode *inode, struct file *file)
424
425 {
426         int rc = -EACCES;
427         unsigned int xid;
428         __u32 oplock;
429         struct cifs_sb_info *cifs_sb;
430         struct TCP_Server_Info *server;
431         struct cifs_tcon *tcon;
432         struct tcon_link *tlink;
433         struct cifsFileInfo *cfile = NULL;
434         char *full_path = NULL;
435         bool posix_open_ok = false;
436         struct cifs_fid fid;
437         struct cifs_pending_open open;
438
439         xid = get_xid();
440
441         cifs_sb = CIFS_SB(inode->i_sb);
442         tlink = cifs_sb_tlink(cifs_sb);
443         if (IS_ERR(tlink)) {
444                 free_xid(xid);
445                 return PTR_ERR(tlink);
446         }
447         tcon = tlink_tcon(tlink);
448         server = tcon->ses->server;
449
450         full_path = build_path_from_dentry(file->f_path.dentry);
451         if (full_path == NULL) {
452                 rc = -ENOMEM;
453                 goto out;
454         }
455
456         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
457                  inode, file->f_flags, full_path);
458
459         if (server->oplocks)
460                 oplock = REQ_OPLOCK;
461         else
462                 oplock = 0;
463
464         if (!tcon->broken_posix_open && tcon->unix_ext &&
465             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
466                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
467                 /* can not refresh inode info since size could be stale */
468                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
469                                 cifs_sb->mnt_file_mode /* ignored */,
470                                 file->f_flags, &oplock, &fid.netfid, xid);
471                 if (rc == 0) {
472                         cifs_dbg(FYI, "posix open succeeded\n");
473                         posix_open_ok = true;
474                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
475                         if (tcon->ses->serverNOS)
476                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
477                                          tcon->ses->serverName,
478                                          tcon->ses->serverNOS);
479                         tcon->broken_posix_open = true;
480                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
481                          (rc != -EOPNOTSUPP)) /* path not found or net err */
482                         goto out;
483                 /*
484                  * Else fallthrough to retry open the old way on network i/o
485                  * or DFS errors.
486                  */
487         }
488
489         if (server->ops->get_lease_key)
490                 server->ops->get_lease_key(inode, &fid);
491
492         cifs_add_pending_open(&fid, tlink, &open);
493
494         if (!posix_open_ok) {
495                 if (server->ops->get_lease_key)
496                         server->ops->get_lease_key(inode, &fid);
497
498                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
499                                   file->f_flags, &oplock, &fid, xid);
500                 if (rc) {
501                         cifs_del_pending_open(&open);
502                         goto out;
503                 }
504         }
505
506         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
507         if (cfile == NULL) {
508                 if (server->ops->close)
509                         server->ops->close(xid, tcon, &fid);
510                 cifs_del_pending_open(&open);
511                 rc = -ENOMEM;
512                 goto out;
513         }
514
515         cifs_fscache_set_inode_cookie(inode, file);
516
517         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
518                 /*
519                  * Time to set mode which we can not set earlier due to
520                  * problems creating new read-only files.
521                  */
522                 struct cifs_unix_set_info_args args = {
523                         .mode   = inode->i_mode,
524                         .uid    = INVALID_UID, /* no change */
525                         .gid    = INVALID_GID, /* no change */
526                         .ctime  = NO_CHANGE_64,
527                         .atime  = NO_CHANGE_64,
528                         .mtime  = NO_CHANGE_64,
529                         .device = 0,
530                 };
531                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
532                                        cfile->pid);
533         }
534
535 out:
536         kfree(full_path);
537         free_xid(xid);
538         cifs_put_tlink(tlink);
539         return rc;
540 }
541
542 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
543
544 /*
545  * Try to reacquire byte range locks that were released when session
546  * to server was lost.
547  */
548 static int
549 cifs_relock_file(struct cifsFileInfo *cfile)
550 {
551         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
552         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
553         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
554         int rc = 0;
555
556         /* we are going to update can_cache_brlcks here - need a write access */
557         down_write(&cinode->lock_sem);
558         if (cinode->can_cache_brlcks) {
559                 /* can cache locks - no need to push them */
560                 up_write(&cinode->lock_sem);
561                 return rc;
562         }
563
564         if (cap_unix(tcon->ses) &&
565             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
566             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
567                 rc = cifs_push_posix_locks(cfile);
568         else
569                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
570
571         up_write(&cinode->lock_sem);
572         return rc;
573 }
574
575 static int
576 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
577 {
578         int rc = -EACCES;
579         unsigned int xid;
580         __u32 oplock;
581         struct cifs_sb_info *cifs_sb;
582         struct cifs_tcon *tcon;
583         struct TCP_Server_Info *server;
584         struct cifsInodeInfo *cinode;
585         struct inode *inode;
586         char *full_path = NULL;
587         int desired_access;
588         int disposition = FILE_OPEN;
589         int create_options = CREATE_NOT_DIR;
590         struct cifs_fid fid;
591
592         xid = get_xid();
593         mutex_lock(&cfile->fh_mutex);
594         if (!cfile->invalidHandle) {
595                 mutex_unlock(&cfile->fh_mutex);
596                 rc = 0;
597                 free_xid(xid);
598                 return rc;
599         }
600
601         inode = cfile->dentry->d_inode;
602         cifs_sb = CIFS_SB(inode->i_sb);
603         tcon = tlink_tcon(cfile->tlink);
604         server = tcon->ses->server;
605
606         /*
607          * Can not grab rename sem here because various ops, including those
608          * that already have the rename sem can end up causing writepage to get
609          * called and if the server was down that means we end up here, and we
610          * can never tell if the caller already has the rename_sem.
611          */
612         full_path = build_path_from_dentry(cfile->dentry);
613         if (full_path == NULL) {
614                 rc = -ENOMEM;
615                 mutex_unlock(&cfile->fh_mutex);
616                 free_xid(xid);
617                 return rc;
618         }
619
620         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
621                  inode, cfile->f_flags, full_path);
622
623         if (tcon->ses->server->oplocks)
624                 oplock = REQ_OPLOCK;
625         else
626                 oplock = 0;
627
628         if (tcon->unix_ext && cap_unix(tcon->ses) &&
629             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
630                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
631                 /*
632                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
633                  * original open. Must mask them off for a reopen.
634                  */
635                 unsigned int oflags = cfile->f_flags &
636                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
637
638                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
639                                      cifs_sb->mnt_file_mode /* ignored */,
640                                      oflags, &oplock, &fid.netfid, xid);
641                 if (rc == 0) {
642                         cifs_dbg(FYI, "posix reopen succeeded\n");
643                         goto reopen_success;
644                 }
645                 /*
646                  * fallthrough to retry open the old way on errors, especially
647                  * in the reconnect path it is important to retry hard
648                  */
649         }
650
651         desired_access = cifs_convert_flags(cfile->f_flags);
652
653         if (backup_cred(cifs_sb))
654                 create_options |= CREATE_OPEN_BACKUP_INTENT;
655
656         if (server->ops->get_lease_key)
657                 server->ops->get_lease_key(inode, &fid);
658
659         /*
660          * Can not refresh inode by passing in file_info buf to be returned by
661          * CIFSSMBOpen and then calling get_inode_info with returned buf since
662          * file might have write behind data that needs to be flushed and server
663          * version of file size can be stale. If we knew for sure that inode was
664          * not dirty locally we could do this.
665          */
666         rc = server->ops->open(xid, tcon, full_path, disposition,
667                                desired_access, create_options, &fid, &oplock,
668                                NULL, cifs_sb);
669         if (rc) {
670                 mutex_unlock(&cfile->fh_mutex);
671                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
672                 cifs_dbg(FYI, "oplock: %d\n", oplock);
673                 goto reopen_error_exit;
674         }
675
676 reopen_success:
677         cfile->invalidHandle = false;
678         mutex_unlock(&cfile->fh_mutex);
679         cinode = CIFS_I(inode);
680
681         if (can_flush) {
682                 rc = filemap_write_and_wait(inode->i_mapping);
683                 mapping_set_error(inode->i_mapping, rc);
684
685                 if (tcon->unix_ext)
686                         rc = cifs_get_inode_info_unix(&inode, full_path,
687                                                       inode->i_sb, xid);
688                 else
689                         rc = cifs_get_inode_info(&inode, full_path, NULL,
690                                                  inode->i_sb, xid, NULL);
691         }
692         /*
693          * Else we are writing out data to server already and could deadlock if
694          * we tried to flush data, and since we do not know if we have data that
695          * would invalidate the current end of file on the server we can not go
696          * to the server to get the new inode info.
697          */
698
699         server->ops->set_fid(cfile, &fid, oplock);
700         cifs_relock_file(cfile);
701
702 reopen_error_exit:
703         kfree(full_path);
704         free_xid(xid);
705         return rc;
706 }
707
708 int cifs_close(struct inode *inode, struct file *file)
709 {
710         if (file->private_data != NULL) {
711                 cifsFileInfo_put(file->private_data);
712                 file->private_data = NULL;
713         }
714
715         /* return code from the ->release op is always ignored */
716         return 0;
717 }
718
719 int cifs_closedir(struct inode *inode, struct file *file)
720 {
721         int rc = 0;
722         unsigned int xid;
723         struct cifsFileInfo *cfile = file->private_data;
724         struct cifs_tcon *tcon;
725         struct TCP_Server_Info *server;
726         char *buf;
727
728         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
729
730         if (cfile == NULL)
731                 return rc;
732
733         xid = get_xid();
734         tcon = tlink_tcon(cfile->tlink);
735         server = tcon->ses->server;
736
737         cifs_dbg(FYI, "Freeing private data in close dir\n");
738         spin_lock(&cifs_file_list_lock);
739         if (!cfile->srch_inf.endOfSearch && !cfile->invalidHandle) {
740                 cfile->invalidHandle = true;
741                 spin_unlock(&cifs_file_list_lock);
742                 if (server->ops->close_dir)
743                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
744                 else
745                         rc = -ENOSYS;
746                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
747                 /* not much we can do if it fails anyway, ignore rc */
748                 rc = 0;
749         } else
750                 spin_unlock(&cifs_file_list_lock);
751
752         buf = cfile->srch_inf.ntwrk_buf_start;
753         if (buf) {
754                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
755                 cfile->srch_inf.ntwrk_buf_start = NULL;
756                 if (cfile->srch_inf.smallBuf)
757                         cifs_small_buf_release(buf);
758                 else
759                         cifs_buf_release(buf);
760         }
761
762         cifs_put_tlink(cfile->tlink);
763         kfree(file->private_data);
764         file->private_data = NULL;
765         /* BB can we lock the filestruct while this is going on? */
766         free_xid(xid);
767         return rc;
768 }
769
770 static struct cifsLockInfo *
771 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
772 {
773         struct cifsLockInfo *lock =
774                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
775         if (!lock)
776                 return lock;
777         lock->offset = offset;
778         lock->length = length;
779         lock->type = type;
780         lock->pid = current->tgid;
781         INIT_LIST_HEAD(&lock->blist);
782         init_waitqueue_head(&lock->block_q);
783         return lock;
784 }
785
786 void
787 cifs_del_lock_waiters(struct cifsLockInfo *lock)
788 {
789         struct cifsLockInfo *li, *tmp;
790         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
791                 list_del_init(&li->blist);
792                 wake_up(&li->block_q);
793         }
794 }
795
796 #define CIFS_LOCK_OP    0
797 #define CIFS_READ_OP    1
798 #define CIFS_WRITE_OP   2
799
800 /* @rw_check : 0 - no op, 1 - read, 2 - write */
801 static bool
802 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
803                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
804                             struct cifsLockInfo **conf_lock, int rw_check)
805 {
806         struct cifsLockInfo *li;
807         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
808         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
809
810         list_for_each_entry(li, &fdlocks->locks, llist) {
811                 if (offset + length <= li->offset ||
812                     offset >= li->offset + li->length)
813                         continue;
814                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
815                     server->ops->compare_fids(cfile, cur_cfile)) {
816                         /* shared lock prevents write op through the same fid */
817                         if (!(li->type & server->vals->shared_lock_type) ||
818                             rw_check != CIFS_WRITE_OP)
819                                 continue;
820                 }
821                 if ((type & server->vals->shared_lock_type) &&
822                     ((server->ops->compare_fids(cfile, cur_cfile) &&
823                      current->tgid == li->pid) || type == li->type))
824                         continue;
825                 if (conf_lock)
826                         *conf_lock = li;
827                 return true;
828         }
829         return false;
830 }
831
832 bool
833 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
834                         __u8 type, struct cifsLockInfo **conf_lock,
835                         int rw_check)
836 {
837         bool rc = false;
838         struct cifs_fid_locks *cur;
839         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
840
841         list_for_each_entry(cur, &cinode->llist, llist) {
842                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
843                                                  cfile, conf_lock, rw_check);
844                 if (rc)
845                         break;
846         }
847
848         return rc;
849 }
850
851 /*
852  * Check if there is another lock that prevents us to set the lock (mandatory
853  * style). If such a lock exists, update the flock structure with its
854  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
855  * or leave it the same if we can't. Returns 0 if we don't need to request to
856  * the server or 1 otherwise.
857  */
858 static int
859 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
860                __u8 type, struct file_lock *flock)
861 {
862         int rc = 0;
863         struct cifsLockInfo *conf_lock;
864         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
865         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
866         bool exist;
867
868         down_read(&cinode->lock_sem);
869
870         exist = cifs_find_lock_conflict(cfile, offset, length, type,
871                                         &conf_lock, CIFS_LOCK_OP);
872         if (exist) {
873                 flock->fl_start = conf_lock->offset;
874                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
875                 flock->fl_pid = conf_lock->pid;
876                 if (conf_lock->type & server->vals->shared_lock_type)
877                         flock->fl_type = F_RDLCK;
878                 else
879                         flock->fl_type = F_WRLCK;
880         } else if (!cinode->can_cache_brlcks)
881                 rc = 1;
882         else
883                 flock->fl_type = F_UNLCK;
884
885         up_read(&cinode->lock_sem);
886         return rc;
887 }
888
889 static void
890 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
891 {
892         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
893         down_write(&cinode->lock_sem);
894         list_add_tail(&lock->llist, &cfile->llist->locks);
895         up_write(&cinode->lock_sem);
896 }
897
898 /*
899  * Set the byte-range lock (mandatory style). Returns:
900  * 1) 0, if we set the lock and don't need to request to the server;
901  * 2) 1, if no locks prevent us but we need to request to the server;
902  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
903  */
904 static int
905 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
906                  bool wait)
907 {
908         struct cifsLockInfo *conf_lock;
909         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
910         bool exist;
911         int rc = 0;
912
913 try_again:
914         exist = false;
915         down_write(&cinode->lock_sem);
916
917         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
918                                         lock->type, &conf_lock, CIFS_LOCK_OP);
919         if (!exist && cinode->can_cache_brlcks) {
920                 list_add_tail(&lock->llist, &cfile->llist->locks);
921                 up_write(&cinode->lock_sem);
922                 return rc;
923         }
924
925         if (!exist)
926                 rc = 1;
927         else if (!wait)
928                 rc = -EACCES;
929         else {
930                 list_add_tail(&lock->blist, &conf_lock->blist);
931                 up_write(&cinode->lock_sem);
932                 rc = wait_event_interruptible(lock->block_q,
933                                         (lock->blist.prev == &lock->blist) &&
934                                         (lock->blist.next == &lock->blist));
935                 if (!rc)
936                         goto try_again;
937                 down_write(&cinode->lock_sem);
938                 list_del_init(&lock->blist);
939         }
940
941         up_write(&cinode->lock_sem);
942         return rc;
943 }
944
945 /*
946  * Check if there is another lock that prevents us to set the lock (posix
947  * style). If such a lock exists, update the flock structure with its
948  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
949  * or leave it the same if we can't. Returns 0 if we don't need to request to
950  * the server or 1 otherwise.
951  */
952 static int
953 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
954 {
955         int rc = 0;
956         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
957         unsigned char saved_type = flock->fl_type;
958
959         if ((flock->fl_flags & FL_POSIX) == 0)
960                 return 1;
961
962         down_read(&cinode->lock_sem);
963         posix_test_lock(file, flock);
964
965         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
966                 flock->fl_type = saved_type;
967                 rc = 1;
968         }
969
970         up_read(&cinode->lock_sem);
971         return rc;
972 }
973
974 /*
975  * Set the byte-range lock (posix style). Returns:
976  * 1) 0, if we set the lock and don't need to request to the server;
977  * 2) 1, if we need to request to the server;
978  * 3) <0, if the error occurs while setting the lock.
979  */
980 static int
981 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
982 {
983         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
984         int rc = 1;
985
986         if ((flock->fl_flags & FL_POSIX) == 0)
987                 return rc;
988
989 try_again:
990         down_write(&cinode->lock_sem);
991         if (!cinode->can_cache_brlcks) {
992                 up_write(&cinode->lock_sem);
993                 return rc;
994         }
995
996         rc = posix_lock_file(file, flock, NULL);
997         up_write(&cinode->lock_sem);
998         if (rc == FILE_LOCK_DEFERRED) {
999                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1000                 if (!rc)
1001                         goto try_again;
1002                 posix_unblock_lock(flock);
1003         }
1004         return rc;
1005 }
1006
1007 int
1008 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1009 {
1010         unsigned int xid;
1011         int rc = 0, stored_rc;
1012         struct cifsLockInfo *li, *tmp;
1013         struct cifs_tcon *tcon;
1014         unsigned int num, max_num, max_buf;
1015         LOCKING_ANDX_RANGE *buf, *cur;
1016         int types[] = {LOCKING_ANDX_LARGE_FILES,
1017                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1018         int i;
1019
1020         xid = get_xid();
1021         tcon = tlink_tcon(cfile->tlink);
1022
1023         /*
1024          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1025          * and check it for zero before using.
1026          */
1027         max_buf = tcon->ses->server->maxBuf;
1028         if (!max_buf) {
1029                 free_xid(xid);
1030                 return -EINVAL;
1031         }
1032
1033         max_num = (max_buf - sizeof(struct smb_hdr)) /
1034                                                 sizeof(LOCKING_ANDX_RANGE);
1035         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1036         if (!buf) {
1037                 free_xid(xid);
1038                 return -ENOMEM;
1039         }
1040
1041         for (i = 0; i < 2; i++) {
1042                 cur = buf;
1043                 num = 0;
1044                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1045                         if (li->type != types[i])
1046                                 continue;
1047                         cur->Pid = cpu_to_le16(li->pid);
1048                         cur->LengthLow = cpu_to_le32((u32)li->length);
1049                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1050                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1051                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1052                         if (++num == max_num) {
1053                                 stored_rc = cifs_lockv(xid, tcon,
1054                                                        cfile->fid.netfid,
1055                                                        (__u8)li->type, 0, num,
1056                                                        buf);
1057                                 if (stored_rc)
1058                                         rc = stored_rc;
1059                                 cur = buf;
1060                                 num = 0;
1061                         } else
1062                                 cur++;
1063                 }
1064
1065                 if (num) {
1066                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1067                                                (__u8)types[i], 0, num, buf);
1068                         if (stored_rc)
1069                                 rc = stored_rc;
1070                 }
1071         }
1072
1073         kfree(buf);
1074         free_xid(xid);
1075         return rc;
1076 }
1077
1078 /* copied from fs/locks.c with a name change */
1079 #define cifs_for_each_lock(inode, lockp) \
1080         for (lockp = &inode->i_flock; *lockp != NULL; \
1081              lockp = &(*lockp)->fl_next)
1082
1083 struct lock_to_push {
1084         struct list_head llist;
1085         __u64 offset;
1086         __u64 length;
1087         __u32 pid;
1088         __u16 netfid;
1089         __u8 type;
1090 };
1091
1092 static int
1093 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1094 {
1095         struct inode *inode = cfile->dentry->d_inode;
1096         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1097         struct file_lock *flock, **before;
1098         unsigned int count = 0, i = 0;
1099         int rc = 0, xid, type;
1100         struct list_head locks_to_send, *el;
1101         struct lock_to_push *lck, *tmp;
1102         __u64 length;
1103
1104         xid = get_xid();
1105
1106         spin_lock(&inode->i_lock);
1107         cifs_for_each_lock(inode, before) {
1108                 if ((*before)->fl_flags & FL_POSIX)
1109                         count++;
1110         }
1111         spin_unlock(&inode->i_lock);
1112
1113         INIT_LIST_HEAD(&locks_to_send);
1114
1115         /*
1116          * Allocating count locks is enough because no FL_POSIX locks can be
1117          * added to the list while we are holding cinode->lock_sem that
1118          * protects locking operations of this inode.
1119          */
1120         for (; i < count; i++) {
1121                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1122                 if (!lck) {
1123                         rc = -ENOMEM;
1124                         goto err_out;
1125                 }
1126                 list_add_tail(&lck->llist, &locks_to_send);
1127         }
1128
1129         el = locks_to_send.next;
1130         spin_lock(&inode->i_lock);
1131         cifs_for_each_lock(inode, before) {
1132                 flock = *before;
1133                 if ((flock->fl_flags & FL_POSIX) == 0)
1134                         continue;
1135                 if (el == &locks_to_send) {
1136                         /*
1137                          * The list ended. We don't have enough allocated
1138                          * structures - something is really wrong.
1139                          */
1140                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1141                         break;
1142                 }
1143                 length = 1 + flock->fl_end - flock->fl_start;
1144                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1145                         type = CIFS_RDLCK;
1146                 else
1147                         type = CIFS_WRLCK;
1148                 lck = list_entry(el, struct lock_to_push, llist);
1149                 lck->pid = flock->fl_pid;
1150                 lck->netfid = cfile->fid.netfid;
1151                 lck->length = length;
1152                 lck->type = type;
1153                 lck->offset = flock->fl_start;
1154                 el = el->next;
1155         }
1156         spin_unlock(&inode->i_lock);
1157
1158         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1159                 int stored_rc;
1160
1161                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1162                                              lck->offset, lck->length, NULL,
1163                                              lck->type, 0);
1164                 if (stored_rc)
1165                         rc = stored_rc;
1166                 list_del(&lck->llist);
1167                 kfree(lck);
1168         }
1169
1170 out:
1171         free_xid(xid);
1172         return rc;
1173 err_out:
1174         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1175                 list_del(&lck->llist);
1176                 kfree(lck);
1177         }
1178         goto out;
1179 }
1180
1181 static int
1182 cifs_push_locks(struct cifsFileInfo *cfile)
1183 {
1184         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1185         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1186         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1187         int rc = 0;
1188
1189         /* we are going to update can_cache_brlcks here - need a write access */
1190         down_write(&cinode->lock_sem);
1191         if (!cinode->can_cache_brlcks) {
1192                 up_write(&cinode->lock_sem);
1193                 return rc;
1194         }
1195
1196         if (cap_unix(tcon->ses) &&
1197             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1198             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1199                 rc = cifs_push_posix_locks(cfile);
1200         else
1201                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1202
1203         cinode->can_cache_brlcks = false;
1204         up_write(&cinode->lock_sem);
1205         return rc;
1206 }
1207
1208 static void
1209 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1210                 bool *wait_flag, struct TCP_Server_Info *server)
1211 {
1212         if (flock->fl_flags & FL_POSIX)
1213                 cifs_dbg(FYI, "Posix\n");
1214         if (flock->fl_flags & FL_FLOCK)
1215                 cifs_dbg(FYI, "Flock\n");
1216         if (flock->fl_flags & FL_SLEEP) {
1217                 cifs_dbg(FYI, "Blocking lock\n");
1218                 *wait_flag = true;
1219         }
1220         if (flock->fl_flags & FL_ACCESS)
1221                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1222         if (flock->fl_flags & FL_LEASE)
1223                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1224         if (flock->fl_flags &
1225             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1226                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1227                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1228
1229         *type = server->vals->large_lock_type;
1230         if (flock->fl_type == F_WRLCK) {
1231                 cifs_dbg(FYI, "F_WRLCK\n");
1232                 *type |= server->vals->exclusive_lock_type;
1233                 *lock = 1;
1234         } else if (flock->fl_type == F_UNLCK) {
1235                 cifs_dbg(FYI, "F_UNLCK\n");
1236                 *type |= server->vals->unlock_lock_type;
1237                 *unlock = 1;
1238                 /* Check if unlock includes more than one lock range */
1239         } else if (flock->fl_type == F_RDLCK) {
1240                 cifs_dbg(FYI, "F_RDLCK\n");
1241                 *type |= server->vals->shared_lock_type;
1242                 *lock = 1;
1243         } else if (flock->fl_type == F_EXLCK) {
1244                 cifs_dbg(FYI, "F_EXLCK\n");
1245                 *type |= server->vals->exclusive_lock_type;
1246                 *lock = 1;
1247         } else if (flock->fl_type == F_SHLCK) {
1248                 cifs_dbg(FYI, "F_SHLCK\n");
1249                 *type |= server->vals->shared_lock_type;
1250                 *lock = 1;
1251         } else
1252                 cifs_dbg(FYI, "Unknown type of lock\n");
1253 }
1254
1255 static int
1256 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1257            bool wait_flag, bool posix_lck, unsigned int xid)
1258 {
1259         int rc = 0;
1260         __u64 length = 1 + flock->fl_end - flock->fl_start;
1261         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1262         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1263         struct TCP_Server_Info *server = tcon->ses->server;
1264         __u16 netfid = cfile->fid.netfid;
1265
1266         if (posix_lck) {
1267                 int posix_lock_type;
1268
1269                 rc = cifs_posix_lock_test(file, flock);
1270                 if (!rc)
1271                         return rc;
1272
1273                 if (type & server->vals->shared_lock_type)
1274                         posix_lock_type = CIFS_RDLCK;
1275                 else
1276                         posix_lock_type = CIFS_WRLCK;
1277                 rc = CIFSSMBPosixLock(xid, tcon, netfid, current->tgid,
1278                                       flock->fl_start, length, flock,
1279                                       posix_lock_type, wait_flag);
1280                 return rc;
1281         }
1282
1283         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1284         if (!rc)
1285                 return rc;
1286
1287         /* BB we could chain these into one lock request BB */
1288         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1289                                     1, 0, false);
1290         if (rc == 0) {
1291                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1292                                             type, 0, 1, false);
1293                 flock->fl_type = F_UNLCK;
1294                 if (rc != 0)
1295                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1296                                  rc);
1297                 return 0;
1298         }
1299
1300         if (type & server->vals->shared_lock_type) {
1301                 flock->fl_type = F_WRLCK;
1302                 return 0;
1303         }
1304
1305         type &= ~server->vals->exclusive_lock_type;
1306
1307         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1308                                     type | server->vals->shared_lock_type,
1309                                     1, 0, false);
1310         if (rc == 0) {
1311                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1312                         type | server->vals->shared_lock_type, 0, 1, false);
1313                 flock->fl_type = F_RDLCK;
1314                 if (rc != 0)
1315                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1316                                  rc);
1317         } else
1318                 flock->fl_type = F_WRLCK;
1319
1320         return 0;
1321 }
1322
1323 void
1324 cifs_move_llist(struct list_head *source, struct list_head *dest)
1325 {
1326         struct list_head *li, *tmp;
1327         list_for_each_safe(li, tmp, source)
1328                 list_move(li, dest);
1329 }
1330
1331 void
1332 cifs_free_llist(struct list_head *llist)
1333 {
1334         struct cifsLockInfo *li, *tmp;
1335         list_for_each_entry_safe(li, tmp, llist, llist) {
1336                 cifs_del_lock_waiters(li);
1337                 list_del(&li->llist);
1338                 kfree(li);
1339         }
1340 }
1341
1342 int
1343 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1344                   unsigned int xid)
1345 {
1346         int rc = 0, stored_rc;
1347         int types[] = {LOCKING_ANDX_LARGE_FILES,
1348                        LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES};
1349         unsigned int i;
1350         unsigned int max_num, num, max_buf;
1351         LOCKING_ANDX_RANGE *buf, *cur;
1352         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1353         struct cifsInodeInfo *cinode = CIFS_I(cfile->dentry->d_inode);
1354         struct cifsLockInfo *li, *tmp;
1355         __u64 length = 1 + flock->fl_end - flock->fl_start;
1356         struct list_head tmp_llist;
1357
1358         INIT_LIST_HEAD(&tmp_llist);
1359
1360         /*
1361          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1362          * and check it for zero before using.
1363          */
1364         max_buf = tcon->ses->server->maxBuf;
1365         if (!max_buf)
1366                 return -EINVAL;
1367
1368         max_num = (max_buf - sizeof(struct smb_hdr)) /
1369                                                 sizeof(LOCKING_ANDX_RANGE);
1370         buf = kzalloc(max_num * sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1371         if (!buf)
1372                 return -ENOMEM;
1373
1374         down_write(&cinode->lock_sem);
1375         for (i = 0; i < 2; i++) {
1376                 cur = buf;
1377                 num = 0;
1378                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1379                         if (flock->fl_start > li->offset ||
1380                             (flock->fl_start + length) <
1381                             (li->offset + li->length))
1382                                 continue;
1383                         if (current->tgid != li->pid)
1384                                 continue;
1385                         if (types[i] != li->type)
1386                                 continue;
1387                         if (cinode->can_cache_brlcks) {
1388                                 /*
1389                                  * We can cache brlock requests - simply remove
1390                                  * a lock from the file's list.
1391                                  */
1392                                 list_del(&li->llist);
1393                                 cifs_del_lock_waiters(li);
1394                                 kfree(li);
1395                                 continue;
1396                         }
1397                         cur->Pid = cpu_to_le16(li->pid);
1398                         cur->LengthLow = cpu_to_le32((u32)li->length);
1399                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1400                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1401                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1402                         /*
1403                          * We need to save a lock here to let us add it again to
1404                          * the file's list if the unlock range request fails on
1405                          * the server.
1406                          */
1407                         list_move(&li->llist, &tmp_llist);
1408                         if (++num == max_num) {
1409                                 stored_rc = cifs_lockv(xid, tcon,
1410                                                        cfile->fid.netfid,
1411                                                        li->type, num, 0, buf);
1412                                 if (stored_rc) {
1413                                         /*
1414                                          * We failed on the unlock range
1415                                          * request - add all locks from the tmp
1416                                          * list to the head of the file's list.
1417                                          */
1418                                         cifs_move_llist(&tmp_llist,
1419                                                         &cfile->llist->locks);
1420                                         rc = stored_rc;
1421                                 } else
1422                                         /*
1423                                          * The unlock range request succeed -
1424                                          * free the tmp list.
1425                                          */
1426                                         cifs_free_llist(&tmp_llist);
1427                                 cur = buf;
1428                                 num = 0;
1429                         } else
1430                                 cur++;
1431                 }
1432                 if (num) {
1433                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1434                                                types[i], num, 0, buf);
1435                         if (stored_rc) {
1436                                 cifs_move_llist(&tmp_llist,
1437                                                 &cfile->llist->locks);
1438                                 rc = stored_rc;
1439                         } else
1440                                 cifs_free_llist(&tmp_llist);
1441                 }
1442         }
1443
1444         up_write(&cinode->lock_sem);
1445         kfree(buf);
1446         return rc;
1447 }
1448
1449 static int
1450 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1451            bool wait_flag, bool posix_lck, int lock, int unlock,
1452            unsigned int xid)
1453 {
1454         int rc = 0;
1455         __u64 length = 1 + flock->fl_end - flock->fl_start;
1456         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1457         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1458         struct TCP_Server_Info *server = tcon->ses->server;
1459         struct inode *inode = cfile->dentry->d_inode;
1460
1461         if (posix_lck) {
1462                 int posix_lock_type;
1463
1464                 rc = cifs_posix_lock_set(file, flock);
1465                 if (!rc || rc < 0)
1466                         return rc;
1467
1468                 if (type & server->vals->shared_lock_type)
1469                         posix_lock_type = CIFS_RDLCK;
1470                 else
1471                         posix_lock_type = CIFS_WRLCK;
1472
1473                 if (unlock == 1)
1474                         posix_lock_type = CIFS_UNLCK;
1475
1476                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1477                                       current->tgid, flock->fl_start, length,
1478                                       NULL, posix_lock_type, wait_flag);
1479                 goto out;
1480         }
1481
1482         if (lock) {
1483                 struct cifsLockInfo *lock;
1484
1485                 lock = cifs_lock_init(flock->fl_start, length, type);
1486                 if (!lock)
1487                         return -ENOMEM;
1488
1489                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1490                 if (rc < 0) {
1491                         kfree(lock);
1492                         return rc;
1493                 }
1494                 if (!rc)
1495                         goto out;
1496
1497                 /*
1498                  * Windows 7 server can delay breaking lease from read to None
1499                  * if we set a byte-range lock on a file - break it explicitly
1500                  * before sending the lock to the server to be sure the next
1501                  * read won't conflict with non-overlapted locks due to
1502                  * pagereading.
1503                  */
1504                 if (!CIFS_I(inode)->clientCanCacheAll &&
1505                                         CIFS_I(inode)->clientCanCacheRead) {
1506                         cifs_invalidate_mapping(inode);
1507                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1508                                  inode);
1509                         CIFS_I(inode)->clientCanCacheRead = false;
1510                 }
1511
1512                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1513                                             type, 1, 0, wait_flag);
1514                 if (rc) {
1515                         kfree(lock);
1516                         return rc;
1517                 }
1518
1519                 cifs_lock_add(cfile, lock);
1520         } else if (unlock)
1521                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1522
1523 out:
1524         if (flock->fl_flags & FL_POSIX)
1525                 posix_lock_file_wait(file, flock);
1526         return rc;
1527 }
1528
1529 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1530 {
1531         int rc, xid;
1532         int lock = 0, unlock = 0;
1533         bool wait_flag = false;
1534         bool posix_lck = false;
1535         struct cifs_sb_info *cifs_sb;
1536         struct cifs_tcon *tcon;
1537         struct cifsInodeInfo *cinode;
1538         struct cifsFileInfo *cfile;
1539         __u16 netfid;
1540         __u32 type;
1541
1542         rc = -EACCES;
1543         xid = get_xid();
1544
1545         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1546                  cmd, flock->fl_flags, flock->fl_type,
1547                  flock->fl_start, flock->fl_end);
1548
1549         cfile = (struct cifsFileInfo *)file->private_data;
1550         tcon = tlink_tcon(cfile->tlink);
1551
1552         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1553                         tcon->ses->server);
1554
1555         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
1556         netfid = cfile->fid.netfid;
1557         cinode = CIFS_I(file_inode(file));
1558
1559         if (cap_unix(tcon->ses) &&
1560             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1561             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1562                 posix_lck = true;
1563         /*
1564          * BB add code here to normalize offset and length to account for
1565          * negative length which we can not accept over the wire.
1566          */
1567         if (IS_GETLK(cmd)) {
1568                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1569                 free_xid(xid);
1570                 return rc;
1571         }
1572
1573         if (!lock && !unlock) {
1574                 /*
1575                  * if no lock or unlock then nothing to do since we do not
1576                  * know what it is
1577                  */
1578                 free_xid(xid);
1579                 return -EOPNOTSUPP;
1580         }
1581
1582         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1583                         xid);
1584         free_xid(xid);
1585         return rc;
1586 }
1587
1588 /*
1589  * update the file size (if needed) after a write. Should be called with
1590  * the inode->i_lock held
1591  */
1592 void
1593 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1594                       unsigned int bytes_written)
1595 {
1596         loff_t end_of_write = offset + bytes_written;
1597
1598         if (end_of_write > cifsi->server_eof)
1599                 cifsi->server_eof = end_of_write;
1600 }
1601
1602 static ssize_t
1603 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1604            size_t write_size, loff_t *offset)
1605 {
1606         int rc = 0;
1607         unsigned int bytes_written = 0;
1608         unsigned int total_written;
1609         struct cifs_sb_info *cifs_sb;
1610         struct cifs_tcon *tcon;
1611         struct TCP_Server_Info *server;
1612         unsigned int xid;
1613         struct dentry *dentry = open_file->dentry;
1614         struct cifsInodeInfo *cifsi = CIFS_I(dentry->d_inode);
1615         struct cifs_io_parms io_parms;
1616
1617         cifs_sb = CIFS_SB(dentry->d_sb);
1618
1619         cifs_dbg(FYI, "write %zd bytes to offset %lld of %s\n",
1620                  write_size, *offset, dentry->d_name.name);
1621
1622         tcon = tlink_tcon(open_file->tlink);
1623         server = tcon->ses->server;
1624
1625         if (!server->ops->sync_write)
1626                 return -ENOSYS;
1627
1628         xid = get_xid();
1629
1630         for (total_written = 0; write_size > total_written;
1631              total_written += bytes_written) {
1632                 rc = -EAGAIN;
1633                 while (rc == -EAGAIN) {
1634                         struct kvec iov[2];
1635                         unsigned int len;
1636
1637                         if (open_file->invalidHandle) {
1638                                 /* we could deadlock if we called
1639                                    filemap_fdatawait from here so tell
1640                                    reopen_file not to flush data to
1641                                    server now */
1642                                 rc = cifs_reopen_file(open_file, false);
1643                                 if (rc != 0)
1644                                         break;
1645                         }
1646
1647                         len = min((size_t)cifs_sb->wsize,
1648                                   write_size - total_written);
1649                         /* iov[0] is reserved for smb header */
1650                         iov[1].iov_base = (char *)write_data + total_written;
1651                         iov[1].iov_len = len;
1652                         io_parms.pid = pid;
1653                         io_parms.tcon = tcon;
1654                         io_parms.offset = *offset;
1655                         io_parms.length = len;
1656                         rc = server->ops->sync_write(xid, open_file, &io_parms,
1657                                                      &bytes_written, iov, 1);
1658                 }
1659                 if (rc || (bytes_written == 0)) {
1660                         if (total_written)
1661                                 break;
1662                         else {
1663                                 free_xid(xid);
1664                                 return rc;
1665                         }
1666                 } else {
1667                         spin_lock(&dentry->d_inode->i_lock);
1668                         cifs_update_eof(cifsi, *offset, bytes_written);
1669                         spin_unlock(&dentry->d_inode->i_lock);
1670                         *offset += bytes_written;
1671                 }
1672         }
1673
1674         cifs_stats_bytes_written(tcon, total_written);
1675
1676         if (total_written > 0) {
1677                 spin_lock(&dentry->d_inode->i_lock);
1678                 if (*offset > dentry->d_inode->i_size)
1679                         i_size_write(dentry->d_inode, *offset);
1680                 spin_unlock(&dentry->d_inode->i_lock);
1681         }
1682         mark_inode_dirty_sync(dentry->d_inode);
1683         free_xid(xid);
1684         return total_written;
1685 }
1686
1687 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1688                                         bool fsuid_only)
1689 {
1690         struct cifsFileInfo *open_file = NULL;
1691         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1692
1693         /* only filter by fsuid on multiuser mounts */
1694         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1695                 fsuid_only = false;
1696
1697         spin_lock(&cifs_file_list_lock);
1698         /* we could simply get the first_list_entry since write-only entries
1699            are always at the end of the list but since the first entry might
1700            have a close pending, we go through the whole list */
1701         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1702                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1703                         continue;
1704                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1705                         if (!open_file->invalidHandle) {
1706                                 /* found a good file */
1707                                 /* lock it so it will not be closed on us */
1708                                 cifsFileInfo_get_locked(open_file);
1709                                 spin_unlock(&cifs_file_list_lock);
1710                                 return open_file;
1711                         } /* else might as well continue, and look for
1712                              another, or simply have the caller reopen it
1713                              again rather than trying to fix this handle */
1714                 } else /* write only file */
1715                         break; /* write only files are last so must be done */
1716         }
1717         spin_unlock(&cifs_file_list_lock);
1718         return NULL;
1719 }
1720
1721 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1722                                         bool fsuid_only)
1723 {
1724         struct cifsFileInfo *open_file, *inv_file = NULL;
1725         struct cifs_sb_info *cifs_sb;
1726         bool any_available = false;
1727         int rc;
1728         unsigned int refind = 0;
1729
1730         /* Having a null inode here (because mapping->host was set to zero by
1731         the VFS or MM) should not happen but we had reports of on oops (due to
1732         it being zero) during stress testcases so we need to check for it */
1733
1734         if (cifs_inode == NULL) {
1735                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1736                 dump_stack();
1737                 return NULL;
1738         }
1739
1740         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1741
1742         /* only filter by fsuid on multiuser mounts */
1743         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1744                 fsuid_only = false;
1745
1746         spin_lock(&cifs_file_list_lock);
1747 refind_writable:
1748         if (refind > MAX_REOPEN_ATT) {
1749                 spin_unlock(&cifs_file_list_lock);
1750                 return NULL;
1751         }
1752         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1753                 if (!any_available && open_file->pid != current->tgid)
1754                         continue;
1755                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1756                         continue;
1757                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1758                         if (!open_file->invalidHandle) {
1759                                 /* found a good writable file */
1760                                 cifsFileInfo_get_locked(open_file);
1761                                 spin_unlock(&cifs_file_list_lock);
1762                                 return open_file;
1763                         } else {
1764                                 if (!inv_file)
1765                                         inv_file = open_file;
1766                         }
1767                 }
1768         }
1769         /* couldn't find useable FH with same pid, try any available */
1770         if (!any_available) {
1771                 any_available = true;
1772                 goto refind_writable;
1773         }
1774
1775         if (inv_file) {
1776                 any_available = false;
1777                 cifsFileInfo_get_locked(inv_file);
1778         }
1779
1780         spin_unlock(&cifs_file_list_lock);
1781
1782         if (inv_file) {
1783                 rc = cifs_reopen_file(inv_file, false);
1784                 if (!rc)
1785                         return inv_file;
1786                 else {
1787                         spin_lock(&cifs_file_list_lock);
1788                         list_move_tail(&inv_file->flist,
1789                                         &cifs_inode->openFileList);
1790                         spin_unlock(&cifs_file_list_lock);
1791                         cifsFileInfo_put(inv_file);
1792                         spin_lock(&cifs_file_list_lock);
1793                         ++refind;
1794                         goto refind_writable;
1795                 }
1796         }
1797
1798         return NULL;
1799 }
1800
1801 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1802 {
1803         struct address_space *mapping = page->mapping;
1804         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
1805         char *write_data;
1806         int rc = -EFAULT;
1807         int bytes_written = 0;
1808         struct inode *inode;
1809         struct cifsFileInfo *open_file;
1810
1811         if (!mapping || !mapping->host)
1812                 return -EFAULT;
1813
1814         inode = page->mapping->host;
1815
1816         offset += (loff_t)from;
1817         write_data = kmap(page);
1818         write_data += from;
1819
1820         if ((to > PAGE_CACHE_SIZE) || (from > to)) {
1821                 kunmap(page);
1822                 return -EIO;
1823         }
1824
1825         /* racing with truncate? */
1826         if (offset > mapping->host->i_size) {
1827                 kunmap(page);
1828                 return 0; /* don't care */
1829         }
1830
1831         /* check to make sure that we are not extending the file */
1832         if (mapping->host->i_size - offset < (loff_t)to)
1833                 to = (unsigned)(mapping->host->i_size - offset);
1834
1835         open_file = find_writable_file(CIFS_I(mapping->host), false);
1836         if (open_file) {
1837                 bytes_written = cifs_write(open_file, open_file->pid,
1838                                            write_data, to - from, &offset);
1839                 cifsFileInfo_put(open_file);
1840                 /* Does mm or vfs already set times? */
1841                 inode->i_atime = inode->i_mtime = current_fs_time(inode->i_sb);
1842                 if ((bytes_written > 0) && (offset))
1843                         rc = 0;
1844                 else if (bytes_written < 0)
1845                         rc = bytes_written;
1846         } else {
1847                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
1848                 rc = -EIO;
1849         }
1850
1851         kunmap(page);
1852         return rc;
1853 }
1854
1855 static int cifs_writepages(struct address_space *mapping,
1856                            struct writeback_control *wbc)
1857 {
1858         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
1859         bool done = false, scanned = false, range_whole = false;
1860         pgoff_t end, index;
1861         struct cifs_writedata *wdata;
1862         struct TCP_Server_Info *server;
1863         struct page *page;
1864         int rc = 0;
1865
1866         /*
1867          * If wsize is smaller than the page cache size, default to writing
1868          * one page at a time via cifs_writepage
1869          */
1870         if (cifs_sb->wsize < PAGE_CACHE_SIZE)
1871                 return generic_writepages(mapping, wbc);
1872
1873         if (wbc->range_cyclic) {
1874                 index = mapping->writeback_index; /* Start from prev offset */
1875                 end = -1;
1876         } else {
1877                 index = wbc->range_start >> PAGE_CACHE_SHIFT;
1878                 end = wbc->range_end >> PAGE_CACHE_SHIFT;
1879                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
1880                         range_whole = true;
1881                 scanned = true;
1882         }
1883 retry:
1884         while (!done && index <= end) {
1885                 unsigned int i, nr_pages, found_pages;
1886                 pgoff_t next = 0, tofind;
1887                 struct page **pages;
1888
1889                 tofind = min((cifs_sb->wsize / PAGE_CACHE_SIZE) - 1,
1890                                 end - index) + 1;
1891
1892                 wdata = cifs_writedata_alloc((unsigned int)tofind,
1893                                              cifs_writev_complete);
1894                 if (!wdata) {
1895                         rc = -ENOMEM;
1896                         break;
1897                 }
1898
1899                 /*
1900                  * find_get_pages_tag seems to return a max of 256 on each
1901                  * iteration, so we must call it several times in order to
1902                  * fill the array or the wsize is effectively limited to
1903                  * 256 * PAGE_CACHE_SIZE.
1904                  */
1905                 found_pages = 0;
1906                 pages = wdata->pages;
1907                 do {
1908                         nr_pages = find_get_pages_tag(mapping, &index,
1909                                                         PAGECACHE_TAG_DIRTY,
1910                                                         tofind, pages);
1911                         found_pages += nr_pages;
1912                         tofind -= nr_pages;
1913                         pages += nr_pages;
1914                 } while (nr_pages && tofind && index <= end);
1915
1916                 if (found_pages == 0) {
1917                         kref_put(&wdata->refcount, cifs_writedata_release);
1918                         break;
1919                 }
1920
1921                 nr_pages = 0;
1922                 for (i = 0; i < found_pages; i++) {
1923                         page = wdata->pages[i];
1924                         /*
1925                          * At this point we hold neither mapping->tree_lock nor
1926                          * lock on the page itself: the page may be truncated or
1927                          * invalidated (changing page->mapping to NULL), or even
1928                          * swizzled back from swapper_space to tmpfs file
1929                          * mapping
1930                          */
1931
1932                         if (nr_pages == 0)
1933                                 lock_page(page);
1934                         else if (!trylock_page(page))
1935                                 break;
1936
1937                         if (unlikely(page->mapping != mapping)) {
1938                                 unlock_page(page);
1939                                 break;
1940                         }
1941
1942                         if (!wbc->range_cyclic && page->index > end) {
1943                                 done = true;
1944                                 unlock_page(page);
1945                                 break;
1946                         }
1947
1948                         if (next && (page->index != next)) {
1949                                 /* Not next consecutive page */
1950                                 unlock_page(page);
1951                                 break;
1952                         }
1953
1954                         if (wbc->sync_mode != WB_SYNC_NONE)
1955                                 wait_on_page_writeback(page);
1956
1957                         if (PageWriteback(page) ||
1958                                         !clear_page_dirty_for_io(page)) {
1959                                 unlock_page(page);
1960                                 break;
1961                         }
1962
1963                         /*
1964                          * This actually clears the dirty bit in the radix tree.
1965                          * See cifs_writepage() for more commentary.
1966                          */
1967                         set_page_writeback(page);
1968
1969                         if (page_offset(page) >= i_size_read(mapping->host)) {
1970                                 done = true;
1971                                 unlock_page(page);
1972                                 end_page_writeback(page);
1973                                 break;
1974                         }
1975
1976                         wdata->pages[i] = page;
1977                         next = page->index + 1;
1978                         ++nr_pages;
1979                 }
1980
1981                 /* reset index to refind any pages skipped */
1982                 if (nr_pages == 0)
1983                         index = wdata->pages[0]->index + 1;
1984
1985                 /* put any pages we aren't going to use */
1986                 for (i = nr_pages; i < found_pages; i++) {
1987                         page_cache_release(wdata->pages[i]);
1988                         wdata->pages[i] = NULL;
1989                 }
1990
1991                 /* nothing to write? */
1992                 if (nr_pages == 0) {
1993                         kref_put(&wdata->refcount, cifs_writedata_release);
1994                         continue;
1995                 }
1996
1997                 wdata->sync_mode = wbc->sync_mode;
1998                 wdata->nr_pages = nr_pages;
1999                 wdata->offset = page_offset(wdata->pages[0]);
2000                 wdata->pagesz = PAGE_CACHE_SIZE;
2001                 wdata->tailsz =
2002                         min(i_size_read(mapping->host) -
2003                             page_offset(wdata->pages[nr_pages - 1]),
2004                             (loff_t)PAGE_CACHE_SIZE);
2005                 wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) +
2006                                         wdata->tailsz;
2007
2008                 do {
2009                         if (wdata->cfile != NULL)
2010                                 cifsFileInfo_put(wdata->cfile);
2011                         wdata->cfile = find_writable_file(CIFS_I(mapping->host),
2012                                                           false);
2013                         if (!wdata->cfile) {
2014                                 cifs_dbg(VFS, "No writable handles for inode\n");
2015                                 rc = -EBADF;
2016                                 break;
2017                         }
2018                         wdata->pid = wdata->cfile->pid;
2019                         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2020                         rc = server->ops->async_writev(wdata);
2021                 } while (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN);
2022
2023                 for (i = 0; i < nr_pages; ++i)
2024                         unlock_page(wdata->pages[i]);
2025
2026                 /* send failure -- clean up the mess */
2027                 if (rc != 0) {
2028                         for (i = 0; i < nr_pages; ++i) {
2029                                 if (rc == -EAGAIN)
2030                                         redirty_page_for_writepage(wbc,
2031                                                            wdata->pages[i]);
2032                                 else
2033                                         SetPageError(wdata->pages[i]);
2034                                 end_page_writeback(wdata->pages[i]);
2035                                 page_cache_release(wdata->pages[i]);
2036                         }
2037                         if (rc != -EAGAIN)
2038                                 mapping_set_error(mapping, rc);
2039                 }
2040                 kref_put(&wdata->refcount, cifs_writedata_release);
2041
2042                 wbc->nr_to_write -= nr_pages;
2043                 if (wbc->nr_to_write <= 0)
2044                         done = true;
2045
2046                 index = next;
2047         }
2048
2049         if (!scanned && !done) {
2050                 /*
2051                  * We hit the last page and there is more work to be done: wrap
2052                  * back to the start of the file
2053                  */
2054                 scanned = true;
2055                 index = 0;
2056                 goto retry;
2057         }
2058
2059         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2060                 mapping->writeback_index = index;
2061
2062         return rc;
2063 }
2064
2065 static int
2066 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2067 {
2068         int rc;
2069         unsigned int xid;
2070
2071         xid = get_xid();
2072 /* BB add check for wbc flags */
2073         page_cache_get(page);
2074         if (!PageUptodate(page))
2075                 cifs_dbg(FYI, "ppw - page not up to date\n");
2076
2077         /*
2078          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2079          *
2080          * A writepage() implementation always needs to do either this,
2081          * or re-dirty the page with "redirty_page_for_writepage()" in
2082          * the case of a failure.
2083          *
2084          * Just unlocking the page will cause the radix tree tag-bits
2085          * to fail to update with the state of the page correctly.
2086          */
2087         set_page_writeback(page);
2088 retry_write:
2089         rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
2090         if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
2091                 goto retry_write;
2092         else if (rc == -EAGAIN)
2093                 redirty_page_for_writepage(wbc, page);
2094         else if (rc != 0)
2095                 SetPageError(page);
2096         else
2097                 SetPageUptodate(page);
2098         end_page_writeback(page);
2099         page_cache_release(page);
2100         free_xid(xid);
2101         return rc;
2102 }
2103
2104 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2105 {
2106         int rc = cifs_writepage_locked(page, wbc);
2107         unlock_page(page);
2108         return rc;
2109 }
2110
2111 static int cifs_write_end(struct file *file, struct address_space *mapping,
2112                         loff_t pos, unsigned len, unsigned copied,
2113                         struct page *page, void *fsdata)
2114 {
2115         int rc;
2116         struct inode *inode = mapping->host;
2117         struct cifsFileInfo *cfile = file->private_data;
2118         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2119         __u32 pid;
2120
2121         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2122                 pid = cfile->pid;
2123         else
2124                 pid = current->tgid;
2125
2126         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2127                  page, pos, copied);
2128
2129         if (PageChecked(page)) {
2130                 if (copied == len)
2131                         SetPageUptodate(page);
2132                 ClearPageChecked(page);
2133         } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
2134                 SetPageUptodate(page);
2135
2136         if (!PageUptodate(page)) {
2137                 char *page_data;
2138                 unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
2139                 unsigned int xid;
2140
2141                 xid = get_xid();
2142                 /* this is probably better than directly calling
2143                    partialpage_write since in this function the file handle is
2144                    known which we might as well leverage */
2145                 /* BB check if anything else missing out of ppw
2146                    such as updating last write time */
2147                 page_data = kmap(page);
2148                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2149                 /* if (rc < 0) should we set writebehind rc? */
2150                 kunmap(page);
2151
2152                 free_xid(xid);
2153         } else {
2154                 rc = copied;
2155                 pos += copied;
2156                 set_page_dirty(page);
2157         }
2158
2159         if (rc > 0) {
2160                 spin_lock(&inode->i_lock);
2161                 if (pos > inode->i_size)
2162                         i_size_write(inode, pos);
2163                 spin_unlock(&inode->i_lock);
2164         }
2165
2166         unlock_page(page);
2167         page_cache_release(page);
2168
2169         return rc;
2170 }
2171
2172 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2173                       int datasync)
2174 {
2175         unsigned int xid;
2176         int rc = 0;
2177         struct cifs_tcon *tcon;
2178         struct TCP_Server_Info *server;
2179         struct cifsFileInfo *smbfile = file->private_data;
2180         struct inode *inode = file_inode(file);
2181         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2182
2183         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2184         if (rc)
2185                 return rc;
2186         mutex_lock(&inode->i_mutex);
2187
2188         xid = get_xid();
2189
2190         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2191                  file->f_path.dentry->d_name.name, datasync);
2192
2193         if (!CIFS_I(inode)->clientCanCacheRead) {
2194                 rc = cifs_invalidate_mapping(inode);
2195                 if (rc) {
2196                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2197                         rc = 0; /* don't care about it in fsync */
2198                 }
2199         }
2200
2201         tcon = tlink_tcon(smbfile->tlink);
2202         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2203                 server = tcon->ses->server;
2204                 if (server->ops->flush)
2205                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2206                 else
2207                         rc = -ENOSYS;
2208         }
2209
2210         free_xid(xid);
2211         mutex_unlock(&inode->i_mutex);
2212         return rc;
2213 }
2214
2215 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2216 {
2217         unsigned int xid;
2218         int rc = 0;
2219         struct cifs_tcon *tcon;
2220         struct TCP_Server_Info *server;
2221         struct cifsFileInfo *smbfile = file->private_data;
2222         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2223         struct inode *inode = file->f_mapping->host;
2224
2225         rc = filemap_write_and_wait_range(inode->i_mapping, start, end);
2226         if (rc)
2227                 return rc;
2228         mutex_lock(&inode->i_mutex);
2229
2230         xid = get_xid();
2231
2232         cifs_dbg(FYI, "Sync file - name: %s datasync: 0x%x\n",
2233                  file->f_path.dentry->d_name.name, datasync);
2234
2235         tcon = tlink_tcon(smbfile->tlink);
2236         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2237                 server = tcon->ses->server;
2238                 if (server->ops->flush)
2239                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2240                 else
2241                         rc = -ENOSYS;
2242         }
2243
2244         free_xid(xid);
2245         mutex_unlock(&inode->i_mutex);
2246         return rc;
2247 }
2248
2249 /*
2250  * As file closes, flush all cached write data for this inode checking
2251  * for write behind errors.
2252  */
2253 int cifs_flush(struct file *file, fl_owner_t id)
2254 {
2255         struct inode *inode = file_inode(file);
2256         int rc = 0;
2257
2258         if (file->f_mode & FMODE_WRITE)
2259                 rc = filemap_write_and_wait(inode->i_mapping);
2260
2261         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2262
2263         return rc;
2264 }
2265
2266 static int
2267 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2268 {
2269         int rc = 0;
2270         unsigned long i;
2271
2272         for (i = 0; i < num_pages; i++) {
2273                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2274                 if (!pages[i]) {
2275                         /*
2276                          * save number of pages we have already allocated and
2277                          * return with ENOMEM error
2278                          */
2279                         num_pages = i;
2280                         rc = -ENOMEM;
2281                         break;
2282                 }
2283         }
2284
2285         if (rc) {
2286                 for (i = 0; i < num_pages; i++)
2287                         put_page(pages[i]);
2288         }
2289         return rc;
2290 }
2291
2292 static inline
2293 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2294 {
2295         size_t num_pages;
2296         size_t clen;
2297
2298         clen = min_t(const size_t, len, wsize);
2299         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2300
2301         if (cur_len)
2302                 *cur_len = clen;
2303
2304         return num_pages;
2305 }
2306
2307 static void
2308 cifs_uncached_writev_complete(struct work_struct *work)
2309 {
2310         int i;
2311         struct cifs_writedata *wdata = container_of(work,
2312                                         struct cifs_writedata, work);
2313         struct inode *inode = wdata->cfile->dentry->d_inode;
2314         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2315
2316         spin_lock(&inode->i_lock);
2317         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2318         if (cifsi->server_eof > inode->i_size)
2319                 i_size_write(inode, cifsi->server_eof);
2320         spin_unlock(&inode->i_lock);
2321
2322         complete(&wdata->done);
2323
2324         if (wdata->result != -EAGAIN) {
2325                 for (i = 0; i < wdata->nr_pages; i++)
2326                         put_page(wdata->pages[i]);
2327         }
2328
2329         kref_put(&wdata->refcount, cifs_writedata_release);
2330 }
2331
2332 /* attempt to send write to server, retry on any -EAGAIN errors */
2333 static int
2334 cifs_uncached_retry_writev(struct cifs_writedata *wdata)
2335 {
2336         int rc;
2337         struct TCP_Server_Info *server;
2338
2339         server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2340
2341         do {
2342                 if (wdata->cfile->invalidHandle) {
2343                         rc = cifs_reopen_file(wdata->cfile, false);
2344                         if (rc != 0)
2345                                 continue;
2346                 }
2347                 rc = server->ops->async_writev(wdata);
2348         } while (rc == -EAGAIN);
2349
2350         return rc;
2351 }
2352
2353 static ssize_t
2354 cifs_iovec_write(struct file *file, const struct iovec *iov,
2355                  unsigned long nr_segs, loff_t *poffset)
2356 {
2357         unsigned long nr_pages, i;
2358         size_t copied, len, cur_len;
2359         ssize_t total_written = 0;
2360         loff_t offset;
2361         struct iov_iter it;
2362         struct cifsFileInfo *open_file;
2363         struct cifs_tcon *tcon;
2364         struct cifs_sb_info *cifs_sb;
2365         struct cifs_writedata *wdata, *tmp;
2366         struct list_head wdata_list;
2367         int rc;
2368         pid_t pid;
2369
2370         len = iov_length(iov, nr_segs);
2371         if (!len)
2372                 return 0;
2373
2374         rc = generic_write_checks(file, poffset, &len, 0);
2375         if (rc)
2376                 return rc;
2377
2378         INIT_LIST_HEAD(&wdata_list);
2379         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2380         open_file = file->private_data;
2381         tcon = tlink_tcon(open_file->tlink);
2382
2383         if (!tcon->ses->server->ops->async_writev)
2384                 return -ENOSYS;
2385
2386         offset = *poffset;
2387
2388         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2389                 pid = open_file->pid;
2390         else
2391                 pid = current->tgid;
2392
2393         iov_iter_init(&it, iov, nr_segs, len, 0);
2394         do {
2395                 size_t save_len;
2396
2397                 nr_pages = get_numpages(cifs_sb->wsize, len, &cur_len);
2398                 wdata = cifs_writedata_alloc(nr_pages,
2399                                              cifs_uncached_writev_complete);
2400                 if (!wdata) {
2401                         rc = -ENOMEM;
2402                         break;
2403                 }
2404
2405                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2406                 if (rc) {
2407                         kfree(wdata);
2408                         break;
2409                 }
2410
2411                 save_len = cur_len;
2412                 for (i = 0; i < nr_pages; i++) {
2413                         copied = min_t(const size_t, cur_len, PAGE_SIZE);
2414                         copied = iov_iter_copy_from_user(wdata->pages[i], &it,
2415                                                          0, copied);
2416                         cur_len -= copied;
2417                         iov_iter_advance(&it, copied);
2418                 }
2419                 cur_len = save_len - cur_len;
2420
2421                 wdata->sync_mode = WB_SYNC_ALL;
2422                 wdata->nr_pages = nr_pages;
2423                 wdata->offset = (__u64)offset;
2424                 wdata->cfile = cifsFileInfo_get(open_file);
2425                 wdata->pid = pid;
2426                 wdata->bytes = cur_len;
2427                 wdata->pagesz = PAGE_SIZE;
2428                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2429                 rc = cifs_uncached_retry_writev(wdata);
2430                 if (rc) {
2431                         kref_put(&wdata->refcount, cifs_writedata_release);
2432                         break;
2433                 }
2434
2435                 list_add_tail(&wdata->list, &wdata_list);
2436                 offset += cur_len;
2437                 len -= cur_len;
2438         } while (len > 0);
2439
2440         /*
2441          * If at least one write was successfully sent, then discard any rc
2442          * value from the later writes. If the other write succeeds, then
2443          * we'll end up returning whatever was written. If it fails, then
2444          * we'll get a new rc value from that.
2445          */
2446         if (!list_empty(&wdata_list))
2447                 rc = 0;
2448
2449         /*
2450          * Wait for and collect replies for any successful sends in order of
2451          * increasing offset. Once an error is hit or we get a fatal signal
2452          * while waiting, then return without waiting for any more replies.
2453          */
2454 restart_loop:
2455         list_for_each_entry_safe(wdata, tmp, &wdata_list, list) {
2456                 if (!rc) {
2457                         /* FIXME: freezable too? */
2458                         rc = wait_for_completion_killable(&wdata->done);
2459                         if (rc)
2460                                 rc = -EINTR;
2461                         else if (wdata->result)
2462                                 rc = wdata->result;
2463                         else
2464                                 total_written += wdata->bytes;
2465
2466                         /* resend call if it's a retryable error */
2467                         if (rc == -EAGAIN) {
2468                                 rc = cifs_uncached_retry_writev(wdata);
2469                                 goto restart_loop;
2470                         }
2471                 }
2472                 list_del_init(&wdata->list);
2473                 kref_put(&wdata->refcount, cifs_writedata_release);
2474         }
2475
2476         if (total_written > 0)
2477                 *poffset += total_written;
2478
2479         cifs_stats_bytes_written(tcon, total_written);
2480         return total_written ? total_written : (ssize_t)rc;
2481 }
2482
2483 ssize_t cifs_user_writev(struct kiocb *iocb, const struct iovec *iov,
2484                                 unsigned long nr_segs, loff_t pos)
2485 {
2486         ssize_t written;
2487         struct inode *inode;
2488
2489         inode = file_inode(iocb->ki_filp);
2490
2491         /*
2492          * BB - optimize the way when signing is disabled. We can drop this
2493          * extra memory-to-memory copying and use iovec buffers for constructing
2494          * write request.
2495          */
2496
2497         written = cifs_iovec_write(iocb->ki_filp, iov, nr_segs, &pos);
2498         if (written > 0) {
2499                 CIFS_I(inode)->invalid_mapping = true;
2500                 iocb->ki_pos = pos;
2501         }
2502
2503         return written;
2504 }
2505
2506 static ssize_t
2507 cifs_writev(struct kiocb *iocb, const struct iovec *iov,
2508             unsigned long nr_segs, loff_t pos)
2509 {
2510         struct file *file = iocb->ki_filp;
2511         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2512         struct inode *inode = file->f_mapping->host;
2513         struct cifsInodeInfo *cinode = CIFS_I(inode);
2514         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2515         ssize_t rc = -EACCES;
2516
2517         BUG_ON(iocb->ki_pos != pos);
2518
2519         /*
2520          * We need to hold the sem to be sure nobody modifies lock list
2521          * with a brlock that prevents writing.
2522          */
2523         down_read(&cinode->lock_sem);
2524         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2525                                      server->vals->exclusive_lock_type, NULL,
2526                                      CIFS_WRITE_OP)) {
2527                 mutex_lock(&inode->i_mutex);
2528                 rc = __generic_file_aio_write(iocb, iov, nr_segs,
2529                                                &iocb->ki_pos);
2530                 mutex_unlock(&inode->i_mutex);
2531         }
2532
2533         if (rc > 0 || rc == -EIOCBQUEUED) {
2534                 ssize_t err;
2535
2536                 err = generic_write_sync(file, pos, rc);
2537                 if (err < 0 && rc > 0)
2538                         rc = err;
2539         }
2540
2541         up_read(&cinode->lock_sem);
2542         return rc;
2543 }
2544
2545 ssize_t
2546 cifs_strict_writev(struct kiocb *iocb, const struct iovec *iov,
2547                    unsigned long nr_segs, loff_t pos)
2548 {
2549         struct inode *inode = file_inode(iocb->ki_filp);
2550         struct cifsInodeInfo *cinode = CIFS_I(inode);
2551         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2552         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2553                                                 iocb->ki_filp->private_data;
2554         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2555         ssize_t written;
2556
2557         if (cinode->clientCanCacheAll) {
2558                 if (cap_unix(tcon->ses) &&
2559                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2560                     && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2561                         return generic_file_aio_write(iocb, iov, nr_segs, pos);
2562                 return cifs_writev(iocb, iov, nr_segs, pos);
2563         }
2564         /*
2565          * For non-oplocked files in strict cache mode we need to write the data
2566          * to the server exactly from the pos to pos+len-1 rather than flush all
2567          * affected pages because it may cause a error with mandatory locks on
2568          * these pages but not on the region from pos to ppos+len-1.
2569          */
2570         written = cifs_user_writev(iocb, iov, nr_segs, pos);
2571         if (written > 0 && cinode->clientCanCacheRead) {
2572                 /*
2573                  * Windows 7 server can delay breaking level2 oplock if a write
2574                  * request comes - break it on the client to prevent reading
2575                  * an old data.
2576                  */
2577                 cifs_invalidate_mapping(inode);
2578                 cifs_dbg(FYI, "Set no oplock for inode=%p after a write operation\n",
2579                          inode);
2580                 cinode->clientCanCacheRead = false;
2581         }
2582         return written;
2583 }
2584
2585 static struct cifs_readdata *
2586 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2587 {
2588         struct cifs_readdata *rdata;
2589
2590         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2591                         GFP_KERNEL);
2592         if (rdata != NULL) {
2593                 kref_init(&rdata->refcount);
2594                 INIT_LIST_HEAD(&rdata->list);
2595                 init_completion(&rdata->done);
2596                 INIT_WORK(&rdata->work, complete);
2597         }
2598
2599         return rdata;
2600 }
2601
2602 void
2603 cifs_readdata_release(struct kref *refcount)
2604 {
2605         struct cifs_readdata *rdata = container_of(refcount,
2606                                         struct cifs_readdata, refcount);
2607
2608         if (rdata->cfile)
2609                 cifsFileInfo_put(rdata->cfile);
2610
2611         kfree(rdata);
2612 }
2613
2614 static int
2615 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2616 {
2617         int rc = 0;
2618         struct page *page;
2619         unsigned int i;
2620
2621         for (i = 0; i < nr_pages; i++) {
2622                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2623                 if (!page) {
2624                         rc = -ENOMEM;
2625                         break;
2626                 }
2627                 rdata->pages[i] = page;
2628         }
2629
2630         if (rc) {
2631                 for (i = 0; i < nr_pages; i++) {
2632                         put_page(rdata->pages[i]);
2633                         rdata->pages[i] = NULL;
2634                 }
2635         }
2636         return rc;
2637 }
2638
2639 static void
2640 cifs_uncached_readdata_release(struct kref *refcount)
2641 {
2642         struct cifs_readdata *rdata = container_of(refcount,
2643                                         struct cifs_readdata, refcount);
2644         unsigned int i;
2645
2646         for (i = 0; i < rdata->nr_pages; i++) {
2647                 put_page(rdata->pages[i]);
2648                 rdata->pages[i] = NULL;
2649         }
2650         cifs_readdata_release(refcount);
2651 }
2652
2653 static int
2654 cifs_retry_async_readv(struct cifs_readdata *rdata)
2655 {
2656         int rc;
2657         struct TCP_Server_Info *server;
2658
2659         server = tlink_tcon(rdata->cfile->tlink)->ses->server;
2660
2661         do {
2662                 if (rdata->cfile->invalidHandle) {
2663                         rc = cifs_reopen_file(rdata->cfile, true);
2664                         if (rc != 0)
2665                                 continue;
2666                 }
2667                 rc = server->ops->async_readv(rdata);
2668         } while (rc == -EAGAIN);
2669
2670         return rc;
2671 }
2672
2673 /**
2674  * cifs_readdata_to_iov - copy data from pages in response to an iovec
2675  * @rdata:      the readdata response with list of pages holding data
2676  * @iov:        vector in which we should copy the data
2677  * @nr_segs:    number of segments in vector
2678  * @offset:     offset into file of the first iovec
2679  * @copied:     used to return the amount of data copied to the iov
2680  *
2681  * This function copies data from a list of pages in a readdata response into
2682  * an array of iovecs. It will first calculate where the data should go
2683  * based on the info in the readdata and then copy the data into that spot.
2684  */
2685 static ssize_t
2686 cifs_readdata_to_iov(struct cifs_readdata *rdata, const struct iovec *iov,
2687                         unsigned long nr_segs, loff_t offset, ssize_t *copied)
2688 {
2689         int rc = 0;
2690         struct iov_iter ii;
2691         size_t pos = rdata->offset - offset;
2692         ssize_t remaining = rdata->bytes;
2693         unsigned char *pdata;
2694         unsigned int i;
2695
2696         /* set up iov_iter and advance to the correct offset */
2697         iov_iter_init(&ii, iov, nr_segs, iov_length(iov, nr_segs), 0);
2698         iov_iter_advance(&ii, pos);
2699
2700         *copied = 0;
2701         for (i = 0; i < rdata->nr_pages; i++) {
2702                 ssize_t copy;
2703                 struct page *page = rdata->pages[i];
2704
2705                 /* copy a whole page or whatever's left */
2706                 copy = min_t(ssize_t, remaining, PAGE_SIZE);
2707
2708                 /* ...but limit it to whatever space is left in the iov */
2709                 copy = min_t(ssize_t, copy, iov_iter_count(&ii));
2710
2711                 /* go while there's data to be copied and no errors */
2712                 if (copy && !rc) {
2713                         pdata = kmap(page);
2714                         rc = memcpy_toiovecend(ii.iov, pdata, ii.iov_offset,
2715                                                 (int)copy);
2716                         kunmap(page);
2717                         if (!rc) {
2718                                 *copied += copy;
2719                                 remaining -= copy;
2720                                 iov_iter_advance(&ii, copy);
2721                         }
2722                 }
2723         }
2724
2725         return rc;
2726 }
2727
2728 static void
2729 cifs_uncached_readv_complete(struct work_struct *work)
2730 {
2731         struct cifs_readdata *rdata = container_of(work,
2732                                                 struct cifs_readdata, work);
2733
2734         complete(&rdata->done);
2735         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2736 }
2737
2738 static int
2739 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
2740                         struct cifs_readdata *rdata, unsigned int len)
2741 {
2742         int total_read = 0, result = 0;
2743         unsigned int i;
2744         unsigned int nr_pages = rdata->nr_pages;
2745         struct kvec iov;
2746
2747         rdata->tailsz = PAGE_SIZE;
2748         for (i = 0; i < nr_pages; i++) {
2749                 struct page *page = rdata->pages[i];
2750
2751                 if (len >= PAGE_SIZE) {
2752                         /* enough data to fill the page */
2753                         iov.iov_base = kmap(page);
2754                         iov.iov_len = PAGE_SIZE;
2755                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2756                                  i, iov.iov_base, iov.iov_len);
2757                         len -= PAGE_SIZE;
2758                 } else if (len > 0) {
2759                         /* enough for partial page, fill and zero the rest */
2760                         iov.iov_base = kmap(page);
2761                         iov.iov_len = len;
2762                         cifs_dbg(FYI, "%u: iov_base=%p iov_len=%zu\n",
2763                                  i, iov.iov_base, iov.iov_len);
2764                         memset(iov.iov_base + len, '\0', PAGE_SIZE - len);
2765                         rdata->tailsz = len;
2766                         len = 0;
2767                 } else {
2768                         /* no need to hold page hostage */
2769                         rdata->pages[i] = NULL;
2770                         rdata->nr_pages--;
2771                         put_page(page);
2772                         continue;
2773                 }
2774
2775                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
2776                 kunmap(page);
2777                 if (result < 0)
2778                         break;
2779
2780                 total_read += result;
2781         }
2782
2783         return total_read > 0 ? total_read : result;
2784 }
2785
2786 static ssize_t
2787 cifs_iovec_read(struct file *file, const struct iovec *iov,
2788                  unsigned long nr_segs, loff_t *poffset)
2789 {
2790         ssize_t rc;
2791         size_t len, cur_len;
2792         ssize_t total_read = 0;
2793         loff_t offset = *poffset;
2794         unsigned int npages;
2795         struct cifs_sb_info *cifs_sb;
2796         struct cifs_tcon *tcon;
2797         struct cifsFileInfo *open_file;
2798         struct cifs_readdata *rdata, *tmp;
2799         struct list_head rdata_list;
2800         pid_t pid;
2801
2802         if (!nr_segs)
2803                 return 0;
2804
2805         len = iov_length(iov, nr_segs);
2806         if (!len)
2807                 return 0;
2808
2809         INIT_LIST_HEAD(&rdata_list);
2810         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2811         open_file = file->private_data;
2812         tcon = tlink_tcon(open_file->tlink);
2813
2814         if (!tcon->ses->server->ops->async_readv)
2815                 return -ENOSYS;
2816
2817         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2818                 pid = open_file->pid;
2819         else
2820                 pid = current->tgid;
2821
2822         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
2823                 cifs_dbg(FYI, "attempting read on write only file instance\n");
2824
2825         do {
2826                 cur_len = min_t(const size_t, len - total_read, cifs_sb->rsize);
2827                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
2828
2829                 /* allocate a readdata struct */
2830                 rdata = cifs_readdata_alloc(npages,
2831                                             cifs_uncached_readv_complete);
2832                 if (!rdata) {
2833                         rc = -ENOMEM;
2834                         goto error;
2835                 }
2836
2837                 rc = cifs_read_allocate_pages(rdata, npages);
2838                 if (rc)
2839                         goto error;
2840
2841                 rdata->cfile = cifsFileInfo_get(open_file);
2842                 rdata->nr_pages = npages;
2843                 rdata->offset = offset;
2844                 rdata->bytes = cur_len;
2845                 rdata->pid = pid;
2846                 rdata->pagesz = PAGE_SIZE;
2847                 rdata->read_into_pages = cifs_uncached_read_into_pages;
2848
2849                 rc = cifs_retry_async_readv(rdata);
2850 error:
2851                 if (rc) {
2852                         kref_put(&rdata->refcount,
2853                                  cifs_uncached_readdata_release);
2854                         break;
2855                 }
2856
2857                 list_add_tail(&rdata->list, &rdata_list);
2858                 offset += cur_len;
2859                 len -= cur_len;
2860         } while (len > 0);
2861
2862         /* if at least one read request send succeeded, then reset rc */
2863         if (!list_empty(&rdata_list))
2864                 rc = 0;
2865
2866         /* the loop below should proceed in the order of increasing offsets */
2867 restart_loop:
2868         list_for_each_entry_safe(rdata, tmp, &rdata_list, list) {
2869                 if (!rc) {
2870                         ssize_t copied;
2871
2872                         /* FIXME: freezable sleep too? */
2873                         rc = wait_for_completion_killable(&rdata->done);
2874                         if (rc)
2875                                 rc = -EINTR;
2876                         else if (rdata->result)
2877                                 rc = rdata->result;
2878                         else {
2879                                 rc = cifs_readdata_to_iov(rdata, iov,
2880                                                         nr_segs, *poffset,
2881                                                         &copied);
2882                                 total_read += copied;
2883                         }
2884
2885                         /* resend call if it's a retryable error */
2886                         if (rc == -EAGAIN) {
2887                                 rc = cifs_retry_async_readv(rdata);
2888                                 goto restart_loop;
2889                         }
2890                 }
2891                 list_del_init(&rdata->list);
2892                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
2893         }
2894
2895         cifs_stats_bytes_read(tcon, total_read);
2896         *poffset += total_read;
2897
2898         /* mask nodata case */
2899         if (rc == -ENODATA)
2900                 rc = 0;
2901
2902         return total_read ? total_read : rc;
2903 }
2904
2905 ssize_t cifs_user_readv(struct kiocb *iocb, const struct iovec *iov,
2906                                unsigned long nr_segs, loff_t pos)
2907 {
2908         ssize_t read;
2909
2910         read = cifs_iovec_read(iocb->ki_filp, iov, nr_segs, &pos);
2911         if (read > 0)
2912                 iocb->ki_pos = pos;
2913
2914         return read;
2915 }
2916
2917 ssize_t
2918 cifs_strict_readv(struct kiocb *iocb, const struct iovec *iov,
2919                   unsigned long nr_segs, loff_t pos)
2920 {
2921         struct inode *inode = file_inode(iocb->ki_filp);
2922         struct cifsInodeInfo *cinode = CIFS_I(inode);
2923         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2924         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2925                                                 iocb->ki_filp->private_data;
2926         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2927         int rc = -EACCES;
2928
2929         /*
2930          * In strict cache mode we need to read from the server all the time
2931          * if we don't have level II oplock because the server can delay mtime
2932          * change - so we can't make a decision about inode invalidating.
2933          * And we can also fail with pagereading if there are mandatory locks
2934          * on pages affected by this read but not on the region from pos to
2935          * pos+len-1.
2936          */
2937         if (!cinode->clientCanCacheRead)
2938                 return cifs_user_readv(iocb, iov, nr_segs, pos);
2939
2940         if (cap_unix(tcon->ses) &&
2941             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
2942             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
2943                 return generic_file_aio_read(iocb, iov, nr_segs, pos);
2944
2945         /*
2946          * We need to hold the sem to be sure nobody modifies lock list
2947          * with a brlock that prevents reading.
2948          */
2949         down_read(&cinode->lock_sem);
2950         if (!cifs_find_lock_conflict(cfile, pos, iov_length(iov, nr_segs),
2951                                      tcon->ses->server->vals->shared_lock_type,
2952                                      NULL, CIFS_READ_OP))
2953                 rc = generic_file_aio_read(iocb, iov, nr_segs, pos);
2954         up_read(&cinode->lock_sem);
2955         return rc;
2956 }
2957
2958 static ssize_t
2959 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
2960 {
2961         int rc = -EACCES;
2962         unsigned int bytes_read = 0;
2963         unsigned int total_read;
2964         unsigned int current_read_size;
2965         unsigned int rsize;
2966         struct cifs_sb_info *cifs_sb;
2967         struct cifs_tcon *tcon;
2968         struct TCP_Server_Info *server;
2969         unsigned int xid;
2970         char *cur_offset;
2971         struct cifsFileInfo *open_file;
2972         struct cifs_io_parms io_parms;
2973         int buf_type = CIFS_NO_BUFFER;
2974         __u32 pid;
2975
2976         xid = get_xid();
2977         cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
2978
2979         /* FIXME: set up handlers for larger reads and/or convert to async */
2980         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
2981
2982         if (file->private_data == NULL) {
2983                 rc = -EBADF;
2984                 free_xid(xid);
2985                 return rc;
2986         }
2987         open_file = file->private_data;
2988         tcon = tlink_tcon(open_file->tlink);
2989         server = tcon->ses->server;
2990
2991         if (!server->ops->sync_read) {
2992                 free_xid(xid);
2993                 return -ENOSYS;
2994         }
2995
2996         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2997                 pid = open_file->pid;
2998         else
2999                 pid = current->tgid;
3000
3001         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3002                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3003
3004         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3005              total_read += bytes_read, cur_offset += bytes_read) {
3006                 current_read_size = min_t(uint, read_size - total_read, rsize);
3007                 /*
3008                  * For windows me and 9x we do not want to request more than it
3009                  * negotiated since it will refuse the read then.
3010                  */
3011                 if ((tcon->ses) && !(tcon->ses->capabilities &
3012                                 tcon->ses->server->vals->cap_large_files)) {
3013                         current_read_size = min_t(uint, current_read_size,
3014                                         CIFSMaxBufSize);
3015                 }
3016                 rc = -EAGAIN;
3017                 while (rc == -EAGAIN) {
3018                         if (open_file->invalidHandle) {
3019                                 rc = cifs_reopen_file(open_file, true);
3020                                 if (rc != 0)
3021                                         break;
3022                         }
3023                         io_parms.pid = pid;
3024                         io_parms.tcon = tcon;
3025                         io_parms.offset = *offset;
3026                         io_parms.length = current_read_size;
3027                         rc = server->ops->sync_read(xid, open_file, &io_parms,
3028                                                     &bytes_read, &cur_offset,
3029                                                     &buf_type);
3030                 }
3031                 if (rc || (bytes_read == 0)) {
3032                         if (total_read) {
3033                                 break;
3034                         } else {
3035                                 free_xid(xid);
3036                                 return rc;
3037                         }
3038                 } else {
3039                         cifs_stats_bytes_read(tcon, total_read);
3040                         *offset += bytes_read;
3041                 }
3042         }
3043         free_xid(xid);
3044         return total_read;
3045 }
3046
3047 /*
3048  * If the page is mmap'ed into a process' page tables, then we need to make
3049  * sure that it doesn't change while being written back.
3050  */
3051 static int
3052 cifs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
3053 {
3054         struct page *page = vmf->page;
3055
3056         lock_page(page);
3057         return VM_FAULT_LOCKED;
3058 }
3059
3060 static struct vm_operations_struct cifs_file_vm_ops = {
3061         .fault = filemap_fault,
3062         .page_mkwrite = cifs_page_mkwrite,
3063         .remap_pages = generic_file_remap_pages,
3064 };
3065
3066 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3067 {
3068         int rc, xid;
3069         struct inode *inode = file_inode(file);
3070
3071         xid = get_xid();
3072
3073         if (!CIFS_I(inode)->clientCanCacheRead) {
3074                 rc = cifs_invalidate_mapping(inode);
3075                 if (rc)
3076                         return rc;
3077         }
3078
3079         rc = generic_file_mmap(file, vma);
3080         if (rc == 0)
3081                 vma->vm_ops = &cifs_file_vm_ops;
3082         free_xid(xid);
3083         return rc;
3084 }
3085
3086 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3087 {
3088         int rc, xid;
3089
3090         xid = get_xid();
3091         rc = cifs_revalidate_file(file);
3092         if (rc) {
3093                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3094                          rc);
3095                 free_xid(xid);
3096                 return rc;
3097         }
3098         rc = generic_file_mmap(file, vma);
3099         if (rc == 0)
3100                 vma->vm_ops = &cifs_file_vm_ops;
3101         free_xid(xid);
3102         return rc;
3103 }
3104
3105 static void
3106 cifs_readv_complete(struct work_struct *work)
3107 {
3108         unsigned int i;
3109         struct cifs_readdata *rdata = container_of(work,
3110                                                 struct cifs_readdata, work);
3111
3112         for (i = 0; i < rdata->nr_pages; i++) {
3113                 struct page *page = rdata->pages[i];
3114
3115                 lru_cache_add_file(page);
3116
3117                 if (rdata->result == 0) {
3118                         flush_dcache_page(page);
3119                         SetPageUptodate(page);
3120                 }
3121
3122                 unlock_page(page);
3123
3124                 if (rdata->result == 0)
3125                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3126
3127                 page_cache_release(page);
3128                 rdata->pages[i] = NULL;
3129         }
3130         kref_put(&rdata->refcount, cifs_readdata_release);
3131 }
3132
3133 static int
3134 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3135                         struct cifs_readdata *rdata, unsigned int len)
3136 {
3137         int total_read = 0, result = 0;
3138         unsigned int i;
3139         u64 eof;
3140         pgoff_t eof_index;
3141         unsigned int nr_pages = rdata->nr_pages;
3142         struct kvec iov;
3143
3144         /* determine the eof that the server (probably) has */
3145         eof = CIFS_I(rdata->mapping->host)->server_eof;
3146         eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
3147         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3148
3149         rdata->tailsz = PAGE_CACHE_SIZE;
3150         for (i = 0; i < nr_pages; i++) {
3151                 struct page *page = rdata->pages[i];
3152
3153                 if (len >= PAGE_CACHE_SIZE) {
3154                         /* enough data to fill the page */
3155                         iov.iov_base = kmap(page);
3156                         iov.iov_len = PAGE_CACHE_SIZE;
3157                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3158                                  i, page->index, iov.iov_base, iov.iov_len);
3159                         len -= PAGE_CACHE_SIZE;
3160                 } else if (len > 0) {
3161                         /* enough for partial page, fill and zero the rest */
3162                         iov.iov_base = kmap(page);
3163                         iov.iov_len = len;
3164                         cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
3165                                  i, page->index, iov.iov_base, iov.iov_len);
3166                         memset(iov.iov_base + len,
3167                                 '\0', PAGE_CACHE_SIZE - len);
3168                         rdata->tailsz = len;
3169                         len = 0;
3170                 } else if (page->index > eof_index) {
3171                         /*
3172                          * The VFS will not try to do readahead past the
3173                          * i_size, but it's possible that we have outstanding
3174                          * writes with gaps in the middle and the i_size hasn't
3175                          * caught up yet. Populate those with zeroed out pages
3176                          * to prevent the VFS from repeatedly attempting to
3177                          * fill them until the writes are flushed.
3178                          */
3179                         zero_user(page, 0, PAGE_CACHE_SIZE);
3180                         lru_cache_add_file(page);
3181                         flush_dcache_page(page);
3182                         SetPageUptodate(page);
3183                         unlock_page(page);
3184                         page_cache_release(page);
3185                         rdata->pages[i] = NULL;
3186                         rdata->nr_pages--;
3187                         continue;
3188                 } else {
3189                         /* no need to hold page hostage */
3190                         lru_cache_add_file(page);
3191                         unlock_page(page);
3192                         page_cache_release(page);
3193                         rdata->pages[i] = NULL;
3194                         rdata->nr_pages--;
3195                         continue;
3196                 }
3197
3198                 result = cifs_readv_from_socket(server, &iov, 1, iov.iov_len);
3199                 kunmap(page);
3200                 if (result < 0)
3201                         break;
3202
3203                 total_read += result;
3204         }
3205
3206         return total_read > 0 ? total_read : result;
3207 }
3208
3209 static int cifs_readpages(struct file *file, struct address_space *mapping,
3210         struct list_head *page_list, unsigned num_pages)
3211 {
3212         int rc;
3213         struct list_head tmplist;
3214         struct cifsFileInfo *open_file = file->private_data;
3215         struct cifs_sb_info *cifs_sb = CIFS_SB(file->f_path.dentry->d_sb);
3216         unsigned int rsize = cifs_sb->rsize;
3217         pid_t pid;
3218
3219         /*
3220          * Give up immediately if rsize is too small to read an entire page.
3221          * The VFS will fall back to readpage. We should never reach this
3222          * point however since we set ra_pages to 0 when the rsize is smaller
3223          * than a cache page.
3224          */
3225         if (unlikely(rsize < PAGE_CACHE_SIZE))
3226                 return 0;
3227
3228         /*
3229          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3230          * immediately if the cookie is negative
3231          */
3232         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3233                                          &num_pages);
3234         if (rc == 0)
3235                 return rc;
3236
3237         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3238                 pid = open_file->pid;
3239         else
3240                 pid = current->tgid;
3241
3242         rc = 0;
3243         INIT_LIST_HEAD(&tmplist);
3244
3245         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3246                  __func__, file, mapping, num_pages);
3247
3248         /*
3249          * Start with the page at end of list and move it to private
3250          * list. Do the same with any following pages until we hit
3251          * the rsize limit, hit an index discontinuity, or run out of
3252          * pages. Issue the async read and then start the loop again
3253          * until the list is empty.
3254          *
3255          * Note that list order is important. The page_list is in
3256          * the order of declining indexes. When we put the pages in
3257          * the rdata->pages, then we want them in increasing order.
3258          */
3259         while (!list_empty(page_list)) {
3260                 unsigned int i;
3261                 unsigned int bytes = PAGE_CACHE_SIZE;
3262                 unsigned int expected_index;
3263                 unsigned int nr_pages = 1;
3264                 loff_t offset;
3265                 struct page *page, *tpage;
3266                 struct cifs_readdata *rdata;
3267
3268                 page = list_entry(page_list->prev, struct page, lru);
3269
3270                 /*
3271                  * Lock the page and put it in the cache. Since no one else
3272                  * should have access to this page, we're safe to simply set
3273                  * PG_locked without checking it first.
3274                  */
3275                 __set_page_locked(page);
3276                 rc = add_to_page_cache_locked(page, mapping,
3277                                               page->index, GFP_KERNEL);
3278
3279                 /* give up if we can't stick it in the cache */
3280                 if (rc) {
3281                         __clear_page_locked(page);
3282                         break;
3283                 }
3284
3285                 /* move first page to the tmplist */
3286                 offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3287                 list_move_tail(&page->lru, &tmplist);
3288
3289                 /* now try and add more pages onto the request */
3290                 expected_index = page->index + 1;
3291                 list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3292                         /* discontinuity ? */
3293                         if (page->index != expected_index)
3294                                 break;
3295
3296                         /* would this page push the read over the rsize? */
3297                         if (bytes + PAGE_CACHE_SIZE > rsize)
3298                                 break;
3299
3300                         __set_page_locked(page);
3301                         if (add_to_page_cache_locked(page, mapping,
3302                                                 page->index, GFP_KERNEL)) {
3303                                 __clear_page_locked(page);
3304                                 break;
3305                         }
3306                         list_move_tail(&page->lru, &tmplist);
3307                         bytes += PAGE_CACHE_SIZE;
3308                         expected_index++;
3309                         nr_pages++;
3310                 }
3311
3312                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3313                 if (!rdata) {
3314                         /* best to give up if we're out of mem */
3315                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3316                                 list_del(&page->lru);
3317                                 lru_cache_add_file(page);
3318                                 unlock_page(page);
3319                                 page_cache_release(page);
3320                         }
3321                         rc = -ENOMEM;
3322                         break;
3323                 }
3324
3325                 rdata->cfile = cifsFileInfo_get(open_file);
3326                 rdata->mapping = mapping;
3327                 rdata->offset = offset;
3328                 rdata->bytes = bytes;
3329                 rdata->pid = pid;
3330                 rdata->pagesz = PAGE_CACHE_SIZE;
3331                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3332
3333                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3334                         list_del(&page->lru);
3335                         rdata->pages[rdata->nr_pages++] = page;
3336                 }
3337
3338                 rc = cifs_retry_async_readv(rdata);
3339                 if (rc != 0) {
3340                         for (i = 0; i < rdata->nr_pages; i++) {
3341                                 page = rdata->pages[i];
3342                                 lru_cache_add_file(page);
3343                                 unlock_page(page);
3344                                 page_cache_release(page);
3345                         }
3346                         kref_put(&rdata->refcount, cifs_readdata_release);
3347                         break;
3348                 }
3349
3350                 kref_put(&rdata->refcount, cifs_readdata_release);
3351         }
3352
3353         return rc;
3354 }
3355
3356 static int cifs_readpage_worker(struct file *file, struct page *page,
3357         loff_t *poffset)
3358 {
3359         char *read_data;
3360         int rc;
3361
3362         /* Is the page cached? */
3363         rc = cifs_readpage_from_fscache(file_inode(file), page);
3364         if (rc == 0)
3365                 goto read_complete;
3366
3367         page_cache_get(page);
3368         read_data = kmap(page);
3369         /* for reads over a certain size could initiate async read ahead */
3370
3371         rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
3372
3373         if (rc < 0)
3374                 goto io_error;
3375         else
3376                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3377
3378         file_inode(file)->i_atime =
3379                 current_fs_time(file_inode(file)->i_sb);
3380
3381         if (PAGE_CACHE_SIZE > rc)
3382                 memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
3383
3384         flush_dcache_page(page);
3385         SetPageUptodate(page);
3386
3387         /* send this page to the cache */
3388         cifs_readpage_to_fscache(file_inode(file), page);
3389
3390         rc = 0;
3391
3392 io_error:
3393         kunmap(page);
3394         page_cache_release(page);
3395
3396 read_complete:
3397         return rc;
3398 }
3399
3400 static int cifs_readpage(struct file *file, struct page *page)
3401 {
3402         loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
3403         int rc = -EACCES;
3404         unsigned int xid;
3405
3406         xid = get_xid();
3407
3408         if (file->private_data == NULL) {
3409                 rc = -EBADF;
3410                 free_xid(xid);
3411                 return rc;
3412         }
3413
3414         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3415                  page, (int)offset, (int)offset);
3416
3417         rc = cifs_readpage_worker(file, page, &offset);
3418
3419         unlock_page(page);
3420
3421         free_xid(xid);
3422         return rc;
3423 }
3424
3425 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3426 {
3427         struct cifsFileInfo *open_file;
3428
3429         spin_lock(&cifs_file_list_lock);
3430         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3431                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3432                         spin_unlock(&cifs_file_list_lock);
3433                         return 1;
3434                 }
3435         }
3436         spin_unlock(&cifs_file_list_lock);
3437         return 0;
3438 }
3439
3440 /* We do not want to update the file size from server for inodes
3441    open for write - to avoid races with writepage extending
3442    the file - in the future we could consider allowing
3443    refreshing the inode only on increases in the file size
3444    but this is tricky to do without racing with writebehind
3445    page caching in the current Linux kernel design */
3446 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3447 {
3448         if (!cifsInode)
3449                 return true;
3450
3451         if (is_inode_writable(cifsInode)) {
3452                 /* This inode is open for write at least once */
3453                 struct cifs_sb_info *cifs_sb;
3454
3455                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
3456                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
3457                         /* since no page cache to corrupt on directio
3458                         we can change size safely */
3459                         return true;
3460                 }
3461
3462                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
3463                         return true;
3464
3465                 return false;
3466         } else
3467                 return true;
3468 }
3469
3470 static int cifs_write_begin(struct file *file, struct address_space *mapping,
3471                         loff_t pos, unsigned len, unsigned flags,
3472                         struct page **pagep, void **fsdata)
3473 {
3474         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
3475         loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
3476         loff_t page_start = pos & PAGE_MASK;
3477         loff_t i_size;
3478         struct page *page;
3479         int rc = 0;
3480
3481         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
3482
3483         page = grab_cache_page_write_begin(mapping, index, flags);
3484         if (!page) {
3485                 rc = -ENOMEM;
3486                 goto out;
3487         }
3488
3489         if (PageUptodate(page))
3490                 goto out;
3491
3492         /*
3493          * If we write a full page it will be up to date, no need to read from
3494          * the server. If the write is short, we'll end up doing a sync write
3495          * instead.
3496          */
3497         if (len == PAGE_CACHE_SIZE)
3498                 goto out;
3499
3500         /*
3501          * optimize away the read when we have an oplock, and we're not
3502          * expecting to use any of the data we'd be reading in. That
3503          * is, when the page lies beyond the EOF, or straddles the EOF
3504          * and the write will cover all of the existing data.
3505          */
3506         if (CIFS_I(mapping->host)->clientCanCacheRead) {
3507                 i_size = i_size_read(mapping->host);
3508                 if (page_start >= i_size ||
3509                     (offset == 0 && (pos + len) >= i_size)) {
3510                         zero_user_segments(page, 0, offset,
3511                                            offset + len,
3512                                            PAGE_CACHE_SIZE);
3513                         /*
3514                          * PageChecked means that the parts of the page
3515                          * to which we're not writing are considered up
3516                          * to date. Once the data is copied to the
3517                          * page, it can be set uptodate.
3518                          */
3519                         SetPageChecked(page);
3520                         goto out;
3521                 }
3522         }
3523
3524         if ((file->f_flags & O_ACCMODE) != O_WRONLY) {
3525                 /*
3526                  * might as well read a page, it is fast enough. If we get
3527                  * an error, we don't need to return it. cifs_write_end will
3528                  * do a sync write instead since PG_uptodate isn't set.
3529                  */
3530                 cifs_readpage_worker(file, page, &page_start);
3531         } else {
3532                 /* we could try using another file handle if there is one -
3533                    but how would we lock it to prevent close of that handle
3534                    racing with this read? In any case
3535                    this will be written out by write_end so is fine */
3536         }
3537 out:
3538         *pagep = page;
3539         return rc;
3540 }
3541
3542 static int cifs_release_page(struct page *page, gfp_t gfp)
3543 {
3544         if (PagePrivate(page))
3545                 return 0;
3546
3547         return cifs_fscache_release_page(page, gfp);
3548 }
3549
3550 static void cifs_invalidate_page(struct page *page, unsigned int offset,
3551                                  unsigned int length)
3552 {
3553         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
3554
3555         if (offset == 0 && length == PAGE_CACHE_SIZE)
3556                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
3557 }
3558
3559 static int cifs_launder_page(struct page *page)
3560 {
3561         int rc = 0;
3562         loff_t range_start = page_offset(page);
3563         loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
3564         struct writeback_control wbc = {
3565                 .sync_mode = WB_SYNC_ALL,
3566                 .nr_to_write = 0,
3567                 .range_start = range_start,
3568                 .range_end = range_end,
3569         };
3570
3571         cifs_dbg(FYI, "Launder page: %p\n", page);
3572
3573         if (clear_page_dirty_for_io(page))
3574                 rc = cifs_writepage_locked(page, &wbc);
3575
3576         cifs_fscache_invalidate_page(page, page->mapping->host);
3577         return rc;
3578 }
3579
3580 void cifs_oplock_break(struct work_struct *work)
3581 {
3582         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
3583                                                   oplock_break);
3584         struct inode *inode = cfile->dentry->d_inode;
3585         struct cifsInodeInfo *cinode = CIFS_I(inode);
3586         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3587         int rc = 0;
3588
3589         if (!cinode->clientCanCacheAll && cinode->clientCanCacheRead &&
3590                                                 cifs_has_mand_locks(cinode)) {
3591                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
3592                          inode);
3593                 cinode->clientCanCacheRead = false;
3594         }
3595
3596         if (inode && S_ISREG(inode->i_mode)) {
3597                 if (cinode->clientCanCacheRead)
3598                         break_lease(inode, O_RDONLY);
3599                 else
3600                         break_lease(inode, O_WRONLY);
3601                 rc = filemap_fdatawrite(inode->i_mapping);
3602                 if (cinode->clientCanCacheRead == 0) {
3603                         rc = filemap_fdatawait(inode->i_mapping);
3604                         mapping_set_error(inode->i_mapping, rc);
3605                         cifs_invalidate_mapping(inode);
3606                 }
3607                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
3608         }
3609
3610         rc = cifs_push_locks(cfile);
3611         if (rc)
3612                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
3613
3614         /*
3615          * releasing stale oplock after recent reconnect of smb session using
3616          * a now incorrect file handle is not a data integrity issue but do
3617          * not bother sending an oplock release if session to server still is
3618          * disconnected since oplock already released by the server
3619          */
3620         if (!cfile->oplock_break_cancelled) {
3621                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
3622                                                              cinode);
3623                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
3624         }
3625 }
3626
3627 const struct address_space_operations cifs_addr_ops = {
3628         .readpage = cifs_readpage,
3629         .readpages = cifs_readpages,
3630         .writepage = cifs_writepage,
3631         .writepages = cifs_writepages,
3632         .write_begin = cifs_write_begin,
3633         .write_end = cifs_write_end,
3634         .set_page_dirty = __set_page_dirty_nobuffers,
3635         .releasepage = cifs_release_page,
3636         .invalidatepage = cifs_invalidate_page,
3637         .launder_page = cifs_launder_page,
3638 };
3639
3640 /*
3641  * cifs_readpages requires the server to support a buffer large enough to
3642  * contain the header plus one complete page of data.  Otherwise, we need
3643  * to leave cifs_readpages out of the address space operations.
3644  */
3645 const struct address_space_operations cifs_addr_ops_smallbuf = {
3646         .readpage = cifs_readpage,
3647         .writepage = cifs_writepage,
3648         .writepages = cifs_writepages,
3649         .write_begin = cifs_write_begin,
3650         .write_end = cifs_write_end,
3651         .set_page_dirty = __set_page_dirty_nobuffers,
3652         .releasepage = cifs_release_page,
3653         .invalidatepage = cifs_invalidate_page,
3654         .launder_page = cifs_launder_page,
3655 };