GNU Linux-libre 4.14.266-gnu1
[releases.git] / fs / cifs / file.c
1 /*
2  *   fs/cifs/file.c
3  *
4  *   vfs operations that deal with files
5  *
6  *   Copyright (C) International Business Machines  Corp., 2002,2010
7  *   Author(s): Steve French (sfrench@us.ibm.com)
8  *              Jeremy Allison (jra@samba.org)
9  *
10  *   This library is free software; you can redistribute it and/or modify
11  *   it under the terms of the GNU Lesser General Public License as published
12  *   by the Free Software Foundation; either version 2.1 of the License, or
13  *   (at your option) any later version.
14  *
15  *   This library is distributed in the hope that it will be useful,
16  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
17  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
18  *   the GNU Lesser General Public License for more details.
19  *
20  *   You should have received a copy of the GNU Lesser General Public License
21  *   along with this library; if not, write to the Free Software
22  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
23  */
24 #include <linux/fs.h>
25 #include <linux/backing-dev.h>
26 #include <linux/stat.h>
27 #include <linux/fcntl.h>
28 #include <linux/pagemap.h>
29 #include <linux/pagevec.h>
30 #include <linux/writeback.h>
31 #include <linux/task_io_accounting_ops.h>
32 #include <linux/delay.h>
33 #include <linux/mount.h>
34 #include <linux/slab.h>
35 #include <linux/swap.h>
36 #include <asm/div64.h>
37 #include "cifsfs.h"
38 #include "cifspdu.h"
39 #include "cifsglob.h"
40 #include "cifsproto.h"
41 #include "cifs_unicode.h"
42 #include "cifs_debug.h"
43 #include "cifs_fs_sb.h"
44 #include "fscache.h"
45
46
47 static inline int cifs_convert_flags(unsigned int flags)
48 {
49         if ((flags & O_ACCMODE) == O_RDONLY)
50                 return GENERIC_READ;
51         else if ((flags & O_ACCMODE) == O_WRONLY)
52                 return GENERIC_WRITE;
53         else if ((flags & O_ACCMODE) == O_RDWR) {
54                 /* GENERIC_ALL is too much permission to request
55                    can cause unnecessary access denied on create */
56                 /* return GENERIC_ALL; */
57                 return (GENERIC_READ | GENERIC_WRITE);
58         }
59
60         return (READ_CONTROL | FILE_WRITE_ATTRIBUTES | FILE_READ_ATTRIBUTES |
61                 FILE_WRITE_EA | FILE_APPEND_DATA | FILE_WRITE_DATA |
62                 FILE_READ_DATA);
63 }
64
65 static u32 cifs_posix_convert_flags(unsigned int flags)
66 {
67         u32 posix_flags = 0;
68
69         if ((flags & O_ACCMODE) == O_RDONLY)
70                 posix_flags = SMB_O_RDONLY;
71         else if ((flags & O_ACCMODE) == O_WRONLY)
72                 posix_flags = SMB_O_WRONLY;
73         else if ((flags & O_ACCMODE) == O_RDWR)
74                 posix_flags = SMB_O_RDWR;
75
76         if (flags & O_CREAT) {
77                 posix_flags |= SMB_O_CREAT;
78                 if (flags & O_EXCL)
79                         posix_flags |= SMB_O_EXCL;
80         } else if (flags & O_EXCL)
81                 cifs_dbg(FYI, "Application %s pid %d has incorrectly set O_EXCL flag but not O_CREAT on file open. Ignoring O_EXCL\n",
82                          current->comm, current->tgid);
83
84         if (flags & O_TRUNC)
85                 posix_flags |= SMB_O_TRUNC;
86         /* be safe and imply O_SYNC for O_DSYNC */
87         if (flags & O_DSYNC)
88                 posix_flags |= SMB_O_SYNC;
89         if (flags & O_DIRECTORY)
90                 posix_flags |= SMB_O_DIRECTORY;
91         if (flags & O_NOFOLLOW)
92                 posix_flags |= SMB_O_NOFOLLOW;
93         if (flags & O_DIRECT)
94                 posix_flags |= SMB_O_DIRECT;
95
96         return posix_flags;
97 }
98
99 static inline int cifs_get_disposition(unsigned int flags)
100 {
101         if ((flags & (O_CREAT | O_EXCL)) == (O_CREAT | O_EXCL))
102                 return FILE_CREATE;
103         else if ((flags & (O_CREAT | O_TRUNC)) == (O_CREAT | O_TRUNC))
104                 return FILE_OVERWRITE_IF;
105         else if ((flags & O_CREAT) == O_CREAT)
106                 return FILE_OPEN_IF;
107         else if ((flags & O_TRUNC) == O_TRUNC)
108                 return FILE_OVERWRITE;
109         else
110                 return FILE_OPEN;
111 }
112
113 int cifs_posix_open(char *full_path, struct inode **pinode,
114                         struct super_block *sb, int mode, unsigned int f_flags,
115                         __u32 *poplock, __u16 *pnetfid, unsigned int xid)
116 {
117         int rc;
118         FILE_UNIX_BASIC_INFO *presp_data;
119         __u32 posix_flags = 0;
120         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
121         struct cifs_fattr fattr;
122         struct tcon_link *tlink;
123         struct cifs_tcon *tcon;
124
125         cifs_dbg(FYI, "posix open %s\n", full_path);
126
127         presp_data = kzalloc(sizeof(FILE_UNIX_BASIC_INFO), GFP_KERNEL);
128         if (presp_data == NULL)
129                 return -ENOMEM;
130
131         tlink = cifs_sb_tlink(cifs_sb);
132         if (IS_ERR(tlink)) {
133                 rc = PTR_ERR(tlink);
134                 goto posix_open_ret;
135         }
136
137         tcon = tlink_tcon(tlink);
138         mode &= ~current_umask();
139
140         posix_flags = cifs_posix_convert_flags(f_flags);
141         rc = CIFSPOSIXCreate(xid, tcon, posix_flags, mode, pnetfid, presp_data,
142                              poplock, full_path, cifs_sb->local_nls,
143                              cifs_remap(cifs_sb));
144         cifs_put_tlink(tlink);
145
146         if (rc)
147                 goto posix_open_ret;
148
149         if (presp_data->Type == cpu_to_le32(-1))
150                 goto posix_open_ret; /* open ok, caller does qpathinfo */
151
152         if (!pinode)
153                 goto posix_open_ret; /* caller does not need info */
154
155         cifs_unix_basic_to_fattr(&fattr, presp_data, cifs_sb);
156
157         /* get new inode and set it up */
158         if (*pinode == NULL) {
159                 cifs_fill_uniqueid(sb, &fattr);
160                 *pinode = cifs_iget(sb, &fattr);
161                 if (!*pinode) {
162                         rc = -ENOMEM;
163                         goto posix_open_ret;
164                 }
165         } else {
166                 cifs_revalidate_mapping(*pinode);
167                 cifs_fattr_to_inode(*pinode, &fattr);
168         }
169
170 posix_open_ret:
171         kfree(presp_data);
172         return rc;
173 }
174
175 static int
176 cifs_nt_open(char *full_path, struct inode *inode, struct cifs_sb_info *cifs_sb,
177              struct cifs_tcon *tcon, unsigned int f_flags, __u32 *oplock,
178              struct cifs_fid *fid, unsigned int xid)
179 {
180         int rc;
181         int desired_access;
182         int disposition;
183         int create_options = CREATE_NOT_DIR;
184         FILE_ALL_INFO *buf;
185         struct TCP_Server_Info *server = tcon->ses->server;
186         struct cifs_open_parms oparms;
187
188         if (!server->ops->open)
189                 return -ENOSYS;
190
191         desired_access = cifs_convert_flags(f_flags);
192
193 /*********************************************************************
194  *  open flag mapping table:
195  *
196  *      POSIX Flag            CIFS Disposition
197  *      ----------            ----------------
198  *      O_CREAT               FILE_OPEN_IF
199  *      O_CREAT | O_EXCL      FILE_CREATE
200  *      O_CREAT | O_TRUNC     FILE_OVERWRITE_IF
201  *      O_TRUNC               FILE_OVERWRITE
202  *      none of the above     FILE_OPEN
203  *
204  *      Note that there is not a direct match between disposition
205  *      FILE_SUPERSEDE (ie create whether or not file exists although
206  *      O_CREAT | O_TRUNC is similar but truncates the existing
207  *      file rather than creating a new file as FILE_SUPERSEDE does
208  *      (which uses the attributes / metadata passed in on open call)
209  *?
210  *?  O_SYNC is a reasonable match to CIFS writethrough flag
211  *?  and the read write flags match reasonably.  O_LARGEFILE
212  *?  is irrelevant because largefile support is always used
213  *?  by this client. Flags O_APPEND, O_DIRECT, O_DIRECTORY,
214  *       O_FASYNC, O_NOFOLLOW, O_NONBLOCK need further investigation
215  *********************************************************************/
216
217         disposition = cifs_get_disposition(f_flags);
218
219         /* BB pass O_SYNC flag through on file attributes .. BB */
220
221         buf = kmalloc(sizeof(FILE_ALL_INFO), GFP_KERNEL);
222         if (!buf)
223                 return -ENOMEM;
224
225         if (backup_cred(cifs_sb))
226                 create_options |= CREATE_OPEN_BACKUP_INTENT;
227
228         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
229         if (f_flags & O_SYNC)
230                 create_options |= CREATE_WRITE_THROUGH;
231
232         if (f_flags & O_DIRECT)
233                 create_options |= CREATE_NO_BUFFER;
234
235         oparms.tcon = tcon;
236         oparms.cifs_sb = cifs_sb;
237         oparms.desired_access = desired_access;
238         oparms.create_options = create_options;
239         oparms.disposition = disposition;
240         oparms.path = full_path;
241         oparms.fid = fid;
242         oparms.reconnect = false;
243
244         rc = server->ops->open(xid, &oparms, oplock, buf);
245
246         if (rc)
247                 goto out;
248
249         if (tcon->unix_ext)
250                 rc = cifs_get_inode_info_unix(&inode, full_path, inode->i_sb,
251                                               xid);
252         else
253                 rc = cifs_get_inode_info(&inode, full_path, buf, inode->i_sb,
254                                          xid, fid);
255
256         if (rc) {
257                 server->ops->close(xid, tcon, fid);
258                 if (rc == -ESTALE)
259                         rc = -EOPENSTALE;
260         }
261
262 out:
263         kfree(buf);
264         return rc;
265 }
266
267 static bool
268 cifs_has_mand_locks(struct cifsInodeInfo *cinode)
269 {
270         struct cifs_fid_locks *cur;
271         bool has_locks = false;
272
273         down_read(&cinode->lock_sem);
274         list_for_each_entry(cur, &cinode->llist, llist) {
275                 if (!list_empty(&cur->locks)) {
276                         has_locks = true;
277                         break;
278                 }
279         }
280         up_read(&cinode->lock_sem);
281         return has_locks;
282 }
283
284 void
285 cifs_down_write(struct rw_semaphore *sem)
286 {
287         while (!down_write_trylock(sem))
288                 msleep(10);
289 }
290
291 struct cifsFileInfo *
292 cifs_new_fileinfo(struct cifs_fid *fid, struct file *file,
293                   struct tcon_link *tlink, __u32 oplock)
294 {
295         struct dentry *dentry = file_dentry(file);
296         struct inode *inode = d_inode(dentry);
297         struct cifsInodeInfo *cinode = CIFS_I(inode);
298         struct cifsFileInfo *cfile;
299         struct cifs_fid_locks *fdlocks;
300         struct cifs_tcon *tcon = tlink_tcon(tlink);
301         struct TCP_Server_Info *server = tcon->ses->server;
302
303         cfile = kzalloc(sizeof(struct cifsFileInfo), GFP_KERNEL);
304         if (cfile == NULL)
305                 return cfile;
306
307         fdlocks = kzalloc(sizeof(struct cifs_fid_locks), GFP_KERNEL);
308         if (!fdlocks) {
309                 kfree(cfile);
310                 return NULL;
311         }
312
313         INIT_LIST_HEAD(&fdlocks->locks);
314         fdlocks->cfile = cfile;
315         cfile->llist = fdlocks;
316
317         cfile->count = 1;
318         cfile->pid = current->tgid;
319         cfile->uid = current_fsuid();
320         cfile->dentry = dget(dentry);
321         cfile->f_flags = file->f_flags;
322         cfile->invalidHandle = false;
323         cfile->tlink = cifs_get_tlink(tlink);
324         INIT_WORK(&cfile->oplock_break, cifs_oplock_break);
325         mutex_init(&cfile->fh_mutex);
326         spin_lock_init(&cfile->file_info_lock);
327
328         cifs_sb_active(inode->i_sb);
329
330         /*
331          * If the server returned a read oplock and we have mandatory brlocks,
332          * set oplock level to None.
333          */
334         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
335                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
336                 oplock = 0;
337         }
338
339         cifs_down_write(&cinode->lock_sem);
340         list_add(&fdlocks->llist, &cinode->llist);
341         up_write(&cinode->lock_sem);
342
343         spin_lock(&tcon->open_file_lock);
344         if (fid->pending_open->oplock != CIFS_OPLOCK_NO_CHANGE && oplock)
345                 oplock = fid->pending_open->oplock;
346         list_del(&fid->pending_open->olist);
347
348         fid->purge_cache = false;
349         server->ops->set_fid(cfile, fid, oplock);
350
351         list_add(&cfile->tlist, &tcon->openFileList);
352
353         /* if readable file instance put first in list*/
354         if (file->f_mode & FMODE_READ)
355                 list_add(&cfile->flist, &cinode->openFileList);
356         else
357                 list_add_tail(&cfile->flist, &cinode->openFileList);
358         spin_unlock(&tcon->open_file_lock);
359
360         if (fid->purge_cache)
361                 cifs_zap_mapping(inode);
362
363         file->private_data = cfile;
364         return cfile;
365 }
366
367 struct cifsFileInfo *
368 cifsFileInfo_get(struct cifsFileInfo *cifs_file)
369 {
370         spin_lock(&cifs_file->file_info_lock);
371         cifsFileInfo_get_locked(cifs_file);
372         spin_unlock(&cifs_file->file_info_lock);
373         return cifs_file;
374 }
375
376 /**
377  * cifsFileInfo_put - release a reference of file priv data
378  *
379  * Always potentially wait for oplock handler. See _cifsFileInfo_put().
380  */
381 void cifsFileInfo_put(struct cifsFileInfo *cifs_file)
382 {
383         _cifsFileInfo_put(cifs_file, true);
384 }
385
386 /**
387  * _cifsFileInfo_put - release a reference of file priv data
388  *
389  * This may involve closing the filehandle @cifs_file out on the
390  * server. Must be called without holding tcon->open_file_lock and
391  * cifs_file->file_info_lock.
392  *
393  * If @wait_for_oplock_handler is true and we are releasing the last
394  * reference, wait for any running oplock break handler of the file
395  * and cancel any pending one. If calling this function from the
396  * oplock break handler, you need to pass false.
397  *
398  */
399 void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, bool wait_oplock_handler)
400 {
401         struct inode *inode = d_inode(cifs_file->dentry);
402         struct cifs_tcon *tcon = tlink_tcon(cifs_file->tlink);
403         struct TCP_Server_Info *server = tcon->ses->server;
404         struct cifsInodeInfo *cifsi = CIFS_I(inode);
405         struct super_block *sb = inode->i_sb;
406         struct cifs_sb_info *cifs_sb = CIFS_SB(sb);
407         struct cifsLockInfo *li, *tmp;
408         struct cifs_fid fid;
409         struct cifs_pending_open open;
410         bool oplock_break_cancelled;
411
412         spin_lock(&tcon->open_file_lock);
413
414         spin_lock(&cifs_file->file_info_lock);
415         if (--cifs_file->count > 0) {
416                 spin_unlock(&cifs_file->file_info_lock);
417                 spin_unlock(&tcon->open_file_lock);
418                 return;
419         }
420         spin_unlock(&cifs_file->file_info_lock);
421
422         if (server->ops->get_lease_key)
423                 server->ops->get_lease_key(inode, &fid);
424
425         /* store open in pending opens to make sure we don't miss lease break */
426         cifs_add_pending_open_locked(&fid, cifs_file->tlink, &open);
427
428         /* remove it from the lists */
429         list_del(&cifs_file->flist);
430         list_del(&cifs_file->tlist);
431
432         if (list_empty(&cifsi->openFileList)) {
433                 cifs_dbg(FYI, "closing last open instance for inode %p\n",
434                          d_inode(cifs_file->dentry));
435                 /*
436                  * In strict cache mode we need invalidate mapping on the last
437                  * close  because it may cause a error when we open this file
438                  * again and get at least level II oplock.
439                  */
440                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO)
441                         set_bit(CIFS_INO_INVALID_MAPPING, &cifsi->flags);
442                 cifs_set_oplock_level(cifsi, 0);
443         }
444
445         spin_unlock(&tcon->open_file_lock);
446
447         oplock_break_cancelled = wait_oplock_handler ?
448                 cancel_work_sync(&cifs_file->oplock_break) : false;
449
450         if (!tcon->need_reconnect && !cifs_file->invalidHandle) {
451                 struct TCP_Server_Info *server = tcon->ses->server;
452                 unsigned int xid;
453
454                 xid = get_xid();
455                 if (server->ops->close)
456                         server->ops->close(xid, tcon, &cifs_file->fid);
457                 _free_xid(xid);
458         }
459
460         if (oplock_break_cancelled)
461                 cifs_done_oplock_break(cifsi);
462
463         cifs_del_pending_open(&open);
464
465         /*
466          * Delete any outstanding lock records. We'll lose them when the file
467          * is closed anyway.
468          */
469         cifs_down_write(&cifsi->lock_sem);
470         list_for_each_entry_safe(li, tmp, &cifs_file->llist->locks, llist) {
471                 list_del(&li->llist);
472                 cifs_del_lock_waiters(li);
473                 kfree(li);
474         }
475         list_del(&cifs_file->llist->llist);
476         kfree(cifs_file->llist);
477         up_write(&cifsi->lock_sem);
478
479         cifs_put_tlink(cifs_file->tlink);
480         dput(cifs_file->dentry);
481         cifs_sb_deactive(sb);
482         kfree(cifs_file);
483 }
484
485 int cifs_open(struct inode *inode, struct file *file)
486
487 {
488         int rc = -EACCES;
489         unsigned int xid;
490         __u32 oplock;
491         struct cifs_sb_info *cifs_sb;
492         struct TCP_Server_Info *server;
493         struct cifs_tcon *tcon;
494         struct tcon_link *tlink;
495         struct cifsFileInfo *cfile = NULL;
496         char *full_path = NULL;
497         bool posix_open_ok = false;
498         struct cifs_fid fid;
499         struct cifs_pending_open open;
500
501         xid = get_xid();
502
503         cifs_sb = CIFS_SB(inode->i_sb);
504         tlink = cifs_sb_tlink(cifs_sb);
505         if (IS_ERR(tlink)) {
506                 free_xid(xid);
507                 return PTR_ERR(tlink);
508         }
509         tcon = tlink_tcon(tlink);
510         server = tcon->ses->server;
511
512         full_path = build_path_from_dentry(file_dentry(file));
513         if (full_path == NULL) {
514                 rc = -ENOMEM;
515                 goto out;
516         }
517
518         cifs_dbg(FYI, "inode = 0x%p file flags are 0x%x for %s\n",
519                  inode, file->f_flags, full_path);
520
521         if (file->f_flags & O_DIRECT &&
522             cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) {
523                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_BRL)
524                         file->f_op = &cifs_file_direct_nobrl_ops;
525                 else
526                         file->f_op = &cifs_file_direct_ops;
527         }
528
529         if (server->oplocks)
530                 oplock = REQ_OPLOCK;
531         else
532                 oplock = 0;
533
534         if (!tcon->broken_posix_open && tcon->unix_ext &&
535             cap_unix(tcon->ses) && (CIFS_UNIX_POSIX_PATH_OPS_CAP &
536                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
537                 /* can not refresh inode info since size could be stale */
538                 rc = cifs_posix_open(full_path, &inode, inode->i_sb,
539                                 cifs_sb->mnt_file_mode /* ignored */,
540                                 file->f_flags, &oplock, &fid.netfid, xid);
541                 if (rc == 0) {
542                         cifs_dbg(FYI, "posix open succeeded\n");
543                         posix_open_ok = true;
544                 } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
545                         if (tcon->ses->serverNOS)
546                                 cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
547                                          tcon->ses->serverName,
548                                          tcon->ses->serverNOS);
549                         tcon->broken_posix_open = true;
550                 } else if ((rc != -EIO) && (rc != -EREMOTE) &&
551                          (rc != -EOPNOTSUPP)) /* path not found or net err */
552                         goto out;
553                 /*
554                  * Else fallthrough to retry open the old way on network i/o
555                  * or DFS errors.
556                  */
557         }
558
559         if (server->ops->get_lease_key)
560                 server->ops->get_lease_key(inode, &fid);
561
562         cifs_add_pending_open(&fid, tlink, &open);
563
564         if (!posix_open_ok) {
565                 if (server->ops->get_lease_key)
566                         server->ops->get_lease_key(inode, &fid);
567
568                 rc = cifs_nt_open(full_path, inode, cifs_sb, tcon,
569                                   file->f_flags, &oplock, &fid, xid);
570                 if (rc) {
571                         cifs_del_pending_open(&open);
572                         goto out;
573                 }
574         }
575
576         cfile = cifs_new_fileinfo(&fid, file, tlink, oplock);
577         if (cfile == NULL) {
578                 if (server->ops->close)
579                         server->ops->close(xid, tcon, &fid);
580                 cifs_del_pending_open(&open);
581                 rc = -ENOMEM;
582                 goto out;
583         }
584
585         cifs_fscache_set_inode_cookie(inode, file);
586
587         if ((oplock & CIFS_CREATE_ACTION) && !posix_open_ok && tcon->unix_ext) {
588                 /*
589                  * Time to set mode which we can not set earlier due to
590                  * problems creating new read-only files.
591                  */
592                 struct cifs_unix_set_info_args args = {
593                         .mode   = inode->i_mode,
594                         .uid    = INVALID_UID, /* no change */
595                         .gid    = INVALID_GID, /* no change */
596                         .ctime  = NO_CHANGE_64,
597                         .atime  = NO_CHANGE_64,
598                         .mtime  = NO_CHANGE_64,
599                         .device = 0,
600                 };
601                 CIFSSMBUnixSetFileInfo(xid, tcon, &args, fid.netfid,
602                                        cfile->pid);
603         }
604
605 out:
606         kfree(full_path);
607         free_xid(xid);
608         cifs_put_tlink(tlink);
609         return rc;
610 }
611
612 static int cifs_push_posix_locks(struct cifsFileInfo *cfile);
613
614 /*
615  * Try to reacquire byte range locks that were released when session
616  * to server was lost.
617  */
618 static int
619 cifs_relock_file(struct cifsFileInfo *cfile)
620 {
621         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
622         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
623         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
624         int rc = 0;
625
626         down_read_nested(&cinode->lock_sem, SINGLE_DEPTH_NESTING);
627         if (cinode->can_cache_brlcks) {
628                 /* can cache locks - no need to relock */
629                 up_read(&cinode->lock_sem);
630                 return rc;
631         }
632
633         if (cap_unix(tcon->ses) &&
634             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
635             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
636                 rc = cifs_push_posix_locks(cfile);
637         else
638                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
639
640         up_read(&cinode->lock_sem);
641         return rc;
642 }
643
644 static int
645 cifs_reopen_file(struct cifsFileInfo *cfile, bool can_flush)
646 {
647         int rc = -EACCES;
648         unsigned int xid;
649         __u32 oplock;
650         struct cifs_sb_info *cifs_sb;
651         struct cifs_tcon *tcon;
652         struct TCP_Server_Info *server;
653         struct cifsInodeInfo *cinode;
654         struct inode *inode;
655         char *full_path = NULL;
656         int desired_access;
657         int disposition = FILE_OPEN;
658         int create_options = CREATE_NOT_DIR;
659         struct cifs_open_parms oparms;
660
661         xid = get_xid();
662         mutex_lock(&cfile->fh_mutex);
663         if (!cfile->invalidHandle) {
664                 mutex_unlock(&cfile->fh_mutex);
665                 rc = 0;
666                 free_xid(xid);
667                 return rc;
668         }
669
670         inode = d_inode(cfile->dentry);
671         cifs_sb = CIFS_SB(inode->i_sb);
672         tcon = tlink_tcon(cfile->tlink);
673         server = tcon->ses->server;
674
675         /*
676          * Can not grab rename sem here because various ops, including those
677          * that already have the rename sem can end up causing writepage to get
678          * called and if the server was down that means we end up here, and we
679          * can never tell if the caller already has the rename_sem.
680          */
681         full_path = build_path_from_dentry(cfile->dentry);
682         if (full_path == NULL) {
683                 rc = -ENOMEM;
684                 mutex_unlock(&cfile->fh_mutex);
685                 free_xid(xid);
686                 return rc;
687         }
688
689         cifs_dbg(FYI, "inode = 0x%p file flags 0x%x for %s\n",
690                  inode, cfile->f_flags, full_path);
691
692         if (tcon->ses->server->oplocks)
693                 oplock = REQ_OPLOCK;
694         else
695                 oplock = 0;
696
697         if (tcon->unix_ext && cap_unix(tcon->ses) &&
698             (CIFS_UNIX_POSIX_PATH_OPS_CAP &
699                                 le64_to_cpu(tcon->fsUnixInfo.Capability))) {
700                 /*
701                  * O_CREAT, O_EXCL and O_TRUNC already had their effect on the
702                  * original open. Must mask them off for a reopen.
703                  */
704                 unsigned int oflags = cfile->f_flags &
705                                                 ~(O_CREAT | O_EXCL | O_TRUNC);
706
707                 rc = cifs_posix_open(full_path, NULL, inode->i_sb,
708                                      cifs_sb->mnt_file_mode /* ignored */,
709                                      oflags, &oplock, &cfile->fid.netfid, xid);
710                 if (rc == 0) {
711                         cifs_dbg(FYI, "posix reopen succeeded\n");
712                         oparms.reconnect = true;
713                         goto reopen_success;
714                 }
715                 /*
716                  * fallthrough to retry open the old way on errors, especially
717                  * in the reconnect path it is important to retry hard
718                  */
719         }
720
721         desired_access = cifs_convert_flags(cfile->f_flags);
722
723         if (backup_cred(cifs_sb))
724                 create_options |= CREATE_OPEN_BACKUP_INTENT;
725
726         /* O_SYNC also has bit for O_DSYNC so following check picks up either */
727         if (cfile->f_flags & O_SYNC)
728                 create_options |= CREATE_WRITE_THROUGH;
729
730         if (cfile->f_flags & O_DIRECT)
731                 create_options |= CREATE_NO_BUFFER;
732
733         if (server->ops->get_lease_key)
734                 server->ops->get_lease_key(inode, &cfile->fid);
735
736         oparms.tcon = tcon;
737         oparms.cifs_sb = cifs_sb;
738         oparms.desired_access = desired_access;
739         oparms.create_options = create_options;
740         oparms.disposition = disposition;
741         oparms.path = full_path;
742         oparms.fid = &cfile->fid;
743         oparms.reconnect = true;
744
745         /*
746          * Can not refresh inode by passing in file_info buf to be returned by
747          * ops->open and then calling get_inode_info with returned buf since
748          * file might have write behind data that needs to be flushed and server
749          * version of file size can be stale. If we knew for sure that inode was
750          * not dirty locally we could do this.
751          */
752         rc = server->ops->open(xid, &oparms, &oplock, NULL);
753         if (rc == -ENOENT && oparms.reconnect == false) {
754                 /* durable handle timeout is expired - open the file again */
755                 rc = server->ops->open(xid, &oparms, &oplock, NULL);
756                 /* indicate that we need to relock the file */
757                 oparms.reconnect = true;
758         }
759
760         if (rc) {
761                 mutex_unlock(&cfile->fh_mutex);
762                 cifs_dbg(FYI, "cifs_reopen returned 0x%x\n", rc);
763                 cifs_dbg(FYI, "oplock: %d\n", oplock);
764                 goto reopen_error_exit;
765         }
766
767 reopen_success:
768         cfile->invalidHandle = false;
769         mutex_unlock(&cfile->fh_mutex);
770         cinode = CIFS_I(inode);
771
772         if (can_flush) {
773                 rc = filemap_write_and_wait(inode->i_mapping);
774                 mapping_set_error(inode->i_mapping, rc);
775
776                 if (tcon->unix_ext)
777                         rc = cifs_get_inode_info_unix(&inode, full_path,
778                                                       inode->i_sb, xid);
779                 else
780                         rc = cifs_get_inode_info(&inode, full_path, NULL,
781                                                  inode->i_sb, xid, NULL);
782         }
783         /*
784          * Else we are writing out data to server already and could deadlock if
785          * we tried to flush data, and since we do not know if we have data that
786          * would invalidate the current end of file on the server we can not go
787          * to the server to get the new inode info.
788          */
789
790         /*
791          * If the server returned a read oplock and we have mandatory brlocks,
792          * set oplock level to None.
793          */
794         if (server->ops->is_read_op(oplock) && cifs_has_mand_locks(cinode)) {
795                 cifs_dbg(FYI, "Reset oplock val from read to None due to mand locks\n");
796                 oplock = 0;
797         }
798
799         server->ops->set_fid(cfile, &cfile->fid, oplock);
800         if (oparms.reconnect)
801                 cifs_relock_file(cfile);
802
803 reopen_error_exit:
804         kfree(full_path);
805         free_xid(xid);
806         return rc;
807 }
808
809 int cifs_close(struct inode *inode, struct file *file)
810 {
811         if (file->private_data != NULL) {
812                 cifsFileInfo_put(file->private_data);
813                 file->private_data = NULL;
814         }
815
816         /* return code from the ->release op is always ignored */
817         return 0;
818 }
819
820 void
821 cifs_reopen_persistent_handles(struct cifs_tcon *tcon)
822 {
823         struct cifsFileInfo *open_file;
824         struct list_head *tmp;
825         struct list_head *tmp1;
826         struct list_head tmp_list;
827
828         if (!tcon->use_persistent || !tcon->need_reopen_files)
829                 return;
830
831         tcon->need_reopen_files = false;
832
833         cifs_dbg(FYI, "Reopen persistent handles");
834         INIT_LIST_HEAD(&tmp_list);
835
836         /* list all files open on tree connection, reopen resilient handles  */
837         spin_lock(&tcon->open_file_lock);
838         list_for_each(tmp, &tcon->openFileList) {
839                 open_file = list_entry(tmp, struct cifsFileInfo, tlist);
840                 if (!open_file->invalidHandle)
841                         continue;
842                 cifsFileInfo_get(open_file);
843                 list_add_tail(&open_file->rlist, &tmp_list);
844         }
845         spin_unlock(&tcon->open_file_lock);
846
847         list_for_each_safe(tmp, tmp1, &tmp_list) {
848                 open_file = list_entry(tmp, struct cifsFileInfo, rlist);
849                 if (cifs_reopen_file(open_file, false /* do not flush */))
850                         tcon->need_reopen_files = true;
851                 list_del_init(&open_file->rlist);
852                 cifsFileInfo_put(open_file);
853         }
854 }
855
856 int cifs_closedir(struct inode *inode, struct file *file)
857 {
858         int rc = 0;
859         unsigned int xid;
860         struct cifsFileInfo *cfile = file->private_data;
861         struct cifs_tcon *tcon;
862         struct TCP_Server_Info *server;
863         char *buf;
864
865         cifs_dbg(FYI, "Closedir inode = 0x%p\n", inode);
866
867         if (cfile == NULL)
868                 return rc;
869
870         xid = get_xid();
871         tcon = tlink_tcon(cfile->tlink);
872         server = tcon->ses->server;
873
874         cifs_dbg(FYI, "Freeing private data in close dir\n");
875         spin_lock(&cfile->file_info_lock);
876         if (server->ops->dir_needs_close(cfile)) {
877                 cfile->invalidHandle = true;
878                 spin_unlock(&cfile->file_info_lock);
879                 if (server->ops->close_dir)
880                         rc = server->ops->close_dir(xid, tcon, &cfile->fid);
881                 else
882                         rc = -ENOSYS;
883                 cifs_dbg(FYI, "Closing uncompleted readdir with rc %d\n", rc);
884                 /* not much we can do if it fails anyway, ignore rc */
885                 rc = 0;
886         } else
887                 spin_unlock(&cfile->file_info_lock);
888
889         buf = cfile->srch_inf.ntwrk_buf_start;
890         if (buf) {
891                 cifs_dbg(FYI, "closedir free smb buf in srch struct\n");
892                 cfile->srch_inf.ntwrk_buf_start = NULL;
893                 if (cfile->srch_inf.smallBuf)
894                         cifs_small_buf_release(buf);
895                 else
896                         cifs_buf_release(buf);
897         }
898
899         cifs_put_tlink(cfile->tlink);
900         kfree(file->private_data);
901         file->private_data = NULL;
902         /* BB can we lock the filestruct while this is going on? */
903         free_xid(xid);
904         return rc;
905 }
906
907 static struct cifsLockInfo *
908 cifs_lock_init(__u64 offset, __u64 length, __u8 type)
909 {
910         struct cifsLockInfo *lock =
911                 kmalloc(sizeof(struct cifsLockInfo), GFP_KERNEL);
912         if (!lock)
913                 return lock;
914         lock->offset = offset;
915         lock->length = length;
916         lock->type = type;
917         lock->pid = current->tgid;
918         INIT_LIST_HEAD(&lock->blist);
919         init_waitqueue_head(&lock->block_q);
920         return lock;
921 }
922
923 void
924 cifs_del_lock_waiters(struct cifsLockInfo *lock)
925 {
926         struct cifsLockInfo *li, *tmp;
927         list_for_each_entry_safe(li, tmp, &lock->blist, blist) {
928                 list_del_init(&li->blist);
929                 wake_up(&li->block_q);
930         }
931 }
932
933 #define CIFS_LOCK_OP    0
934 #define CIFS_READ_OP    1
935 #define CIFS_WRITE_OP   2
936
937 /* @rw_check : 0 - no op, 1 - read, 2 - write */
938 static bool
939 cifs_find_fid_lock_conflict(struct cifs_fid_locks *fdlocks, __u64 offset,
940                             __u64 length, __u8 type, struct cifsFileInfo *cfile,
941                             struct cifsLockInfo **conf_lock, int rw_check)
942 {
943         struct cifsLockInfo *li;
944         struct cifsFileInfo *cur_cfile = fdlocks->cfile;
945         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
946
947         list_for_each_entry(li, &fdlocks->locks, llist) {
948                 if (offset + length <= li->offset ||
949                     offset >= li->offset + li->length)
950                         continue;
951                 if (rw_check != CIFS_LOCK_OP && current->tgid == li->pid &&
952                     server->ops->compare_fids(cfile, cur_cfile)) {
953                         /* shared lock prevents write op through the same fid */
954                         if (!(li->type & server->vals->shared_lock_type) ||
955                             rw_check != CIFS_WRITE_OP)
956                                 continue;
957                 }
958                 if ((type & server->vals->shared_lock_type) &&
959                     ((server->ops->compare_fids(cfile, cur_cfile) &&
960                      current->tgid == li->pid) || type == li->type))
961                         continue;
962                 if (conf_lock)
963                         *conf_lock = li;
964                 return true;
965         }
966         return false;
967 }
968
969 bool
970 cifs_find_lock_conflict(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
971                         __u8 type, struct cifsLockInfo **conf_lock,
972                         int rw_check)
973 {
974         bool rc = false;
975         struct cifs_fid_locks *cur;
976         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
977
978         list_for_each_entry(cur, &cinode->llist, llist) {
979                 rc = cifs_find_fid_lock_conflict(cur, offset, length, type,
980                                                  cfile, conf_lock, rw_check);
981                 if (rc)
982                         break;
983         }
984
985         return rc;
986 }
987
988 /*
989  * Check if there is another lock that prevents us to set the lock (mandatory
990  * style). If such a lock exists, update the flock structure with its
991  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
992  * or leave it the same if we can't. Returns 0 if we don't need to request to
993  * the server or 1 otherwise.
994  */
995 static int
996 cifs_lock_test(struct cifsFileInfo *cfile, __u64 offset, __u64 length,
997                __u8 type, struct file_lock *flock)
998 {
999         int rc = 0;
1000         struct cifsLockInfo *conf_lock;
1001         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1002         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
1003         bool exist;
1004
1005         down_read(&cinode->lock_sem);
1006
1007         exist = cifs_find_lock_conflict(cfile, offset, length, type,
1008                                         &conf_lock, CIFS_LOCK_OP);
1009         if (exist) {
1010                 flock->fl_start = conf_lock->offset;
1011                 flock->fl_end = conf_lock->offset + conf_lock->length - 1;
1012                 flock->fl_pid = conf_lock->pid;
1013                 if (conf_lock->type & server->vals->shared_lock_type)
1014                         flock->fl_type = F_RDLCK;
1015                 else
1016                         flock->fl_type = F_WRLCK;
1017         } else if (!cinode->can_cache_brlcks)
1018                 rc = 1;
1019         else
1020                 flock->fl_type = F_UNLCK;
1021
1022         up_read(&cinode->lock_sem);
1023         return rc;
1024 }
1025
1026 static void
1027 cifs_lock_add(struct cifsFileInfo *cfile, struct cifsLockInfo *lock)
1028 {
1029         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1030         cifs_down_write(&cinode->lock_sem);
1031         list_add_tail(&lock->llist, &cfile->llist->locks);
1032         up_write(&cinode->lock_sem);
1033 }
1034
1035 /*
1036  * Set the byte-range lock (mandatory style). Returns:
1037  * 1) 0, if we set the lock and don't need to request to the server;
1038  * 2) 1, if no locks prevent us but we need to request to the server;
1039  * 3) -EACCESS, if there is a lock that prevents us and wait is false.
1040  */
1041 static int
1042 cifs_lock_add_if(struct cifsFileInfo *cfile, struct cifsLockInfo *lock,
1043                  bool wait)
1044 {
1045         struct cifsLockInfo *conf_lock;
1046         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1047         bool exist;
1048         int rc = 0;
1049
1050 try_again:
1051         exist = false;
1052         cifs_down_write(&cinode->lock_sem);
1053
1054         exist = cifs_find_lock_conflict(cfile, lock->offset, lock->length,
1055                                         lock->type, &conf_lock, CIFS_LOCK_OP);
1056         if (!exist && cinode->can_cache_brlcks) {
1057                 list_add_tail(&lock->llist, &cfile->llist->locks);
1058                 up_write(&cinode->lock_sem);
1059                 return rc;
1060         }
1061
1062         if (!exist)
1063                 rc = 1;
1064         else if (!wait)
1065                 rc = -EACCES;
1066         else {
1067                 list_add_tail(&lock->blist, &conf_lock->blist);
1068                 up_write(&cinode->lock_sem);
1069                 rc = wait_event_interruptible(lock->block_q,
1070                                         (lock->blist.prev == &lock->blist) &&
1071                                         (lock->blist.next == &lock->blist));
1072                 if (!rc)
1073                         goto try_again;
1074                 cifs_down_write(&cinode->lock_sem);
1075                 list_del_init(&lock->blist);
1076         }
1077
1078         up_write(&cinode->lock_sem);
1079         return rc;
1080 }
1081
1082 /*
1083  * Check if there is another lock that prevents us to set the lock (posix
1084  * style). If such a lock exists, update the flock structure with its
1085  * properties. Otherwise, set the flock type to F_UNLCK if we can cache brlocks
1086  * or leave it the same if we can't. Returns 0 if we don't need to request to
1087  * the server or 1 otherwise.
1088  */
1089 static int
1090 cifs_posix_lock_test(struct file *file, struct file_lock *flock)
1091 {
1092         int rc = 0;
1093         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1094         unsigned char saved_type = flock->fl_type;
1095
1096         if ((flock->fl_flags & FL_POSIX) == 0)
1097                 return 1;
1098
1099         down_read(&cinode->lock_sem);
1100         posix_test_lock(file, flock);
1101
1102         if (flock->fl_type == F_UNLCK && !cinode->can_cache_brlcks) {
1103                 flock->fl_type = saved_type;
1104                 rc = 1;
1105         }
1106
1107         up_read(&cinode->lock_sem);
1108         return rc;
1109 }
1110
1111 /*
1112  * Set the byte-range lock (posix style). Returns:
1113  * 1) 0, if we set the lock and don't need to request to the server;
1114  * 2) 1, if we need to request to the server;
1115  * 3) <0, if the error occurs while setting the lock.
1116  */
1117 static int
1118 cifs_posix_lock_set(struct file *file, struct file_lock *flock)
1119 {
1120         struct cifsInodeInfo *cinode = CIFS_I(file_inode(file));
1121         int rc = 1;
1122
1123         if ((flock->fl_flags & FL_POSIX) == 0)
1124                 return rc;
1125
1126 try_again:
1127         cifs_down_write(&cinode->lock_sem);
1128         if (!cinode->can_cache_brlcks) {
1129                 up_write(&cinode->lock_sem);
1130                 return rc;
1131         }
1132
1133         rc = posix_lock_file(file, flock, NULL);
1134         up_write(&cinode->lock_sem);
1135         if (rc == FILE_LOCK_DEFERRED) {
1136                 rc = wait_event_interruptible(flock->fl_wait, !flock->fl_next);
1137                 if (!rc)
1138                         goto try_again;
1139                 posix_unblock_lock(flock);
1140         }
1141         return rc;
1142 }
1143
1144 int
1145 cifs_push_mandatory_locks(struct cifsFileInfo *cfile)
1146 {
1147         unsigned int xid;
1148         int rc = 0, stored_rc;
1149         struct cifsLockInfo *li, *tmp;
1150         struct cifs_tcon *tcon;
1151         unsigned int num, max_num, max_buf;
1152         LOCKING_ANDX_RANGE *buf, *cur;
1153         static const int types[] = {
1154                 LOCKING_ANDX_LARGE_FILES,
1155                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1156         };
1157         int i;
1158
1159         xid = get_xid();
1160         tcon = tlink_tcon(cfile->tlink);
1161
1162         /*
1163          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1164          * and check it before using.
1165          */
1166         max_buf = tcon->ses->server->maxBuf;
1167         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE))) {
1168                 free_xid(xid);
1169                 return -EINVAL;
1170         }
1171
1172         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1173                      PAGE_SIZE);
1174         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1175                         PAGE_SIZE);
1176         max_num = (max_buf - sizeof(struct smb_hdr)) /
1177                                                 sizeof(LOCKING_ANDX_RANGE);
1178         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1179         if (!buf) {
1180                 free_xid(xid);
1181                 return -ENOMEM;
1182         }
1183
1184         for (i = 0; i < 2; i++) {
1185                 cur = buf;
1186                 num = 0;
1187                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1188                         if (li->type != types[i])
1189                                 continue;
1190                         cur->Pid = cpu_to_le16(li->pid);
1191                         cur->LengthLow = cpu_to_le32((u32)li->length);
1192                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1193                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1194                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1195                         if (++num == max_num) {
1196                                 stored_rc = cifs_lockv(xid, tcon,
1197                                                        cfile->fid.netfid,
1198                                                        (__u8)li->type, 0, num,
1199                                                        buf);
1200                                 if (stored_rc)
1201                                         rc = stored_rc;
1202                                 cur = buf;
1203                                 num = 0;
1204                         } else
1205                                 cur++;
1206                 }
1207
1208                 if (num) {
1209                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1210                                                (__u8)types[i], 0, num, buf);
1211                         if (stored_rc)
1212                                 rc = stored_rc;
1213                 }
1214         }
1215
1216         kfree(buf);
1217         free_xid(xid);
1218         return rc;
1219 }
1220
1221 static __u32
1222 hash_lockowner(fl_owner_t owner)
1223 {
1224         return cifs_lock_secret ^ hash32_ptr((const void *)owner);
1225 }
1226
1227 struct lock_to_push {
1228         struct list_head llist;
1229         __u64 offset;
1230         __u64 length;
1231         __u32 pid;
1232         __u16 netfid;
1233         __u8 type;
1234 };
1235
1236 static int
1237 cifs_push_posix_locks(struct cifsFileInfo *cfile)
1238 {
1239         struct inode *inode = d_inode(cfile->dentry);
1240         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1241         struct file_lock *flock;
1242         struct file_lock_context *flctx = inode->i_flctx;
1243         unsigned int count = 0, i;
1244         int rc = 0, xid, type;
1245         struct list_head locks_to_send, *el;
1246         struct lock_to_push *lck, *tmp;
1247         __u64 length;
1248
1249         xid = get_xid();
1250
1251         if (!flctx)
1252                 goto out;
1253
1254         spin_lock(&flctx->flc_lock);
1255         list_for_each(el, &flctx->flc_posix) {
1256                 count++;
1257         }
1258         spin_unlock(&flctx->flc_lock);
1259
1260         INIT_LIST_HEAD(&locks_to_send);
1261
1262         /*
1263          * Allocating count locks is enough because no FL_POSIX locks can be
1264          * added to the list while we are holding cinode->lock_sem that
1265          * protects locking operations of this inode.
1266          */
1267         for (i = 0; i < count; i++) {
1268                 lck = kmalloc(sizeof(struct lock_to_push), GFP_KERNEL);
1269                 if (!lck) {
1270                         rc = -ENOMEM;
1271                         goto err_out;
1272                 }
1273                 list_add_tail(&lck->llist, &locks_to_send);
1274         }
1275
1276         el = locks_to_send.next;
1277         spin_lock(&flctx->flc_lock);
1278         list_for_each_entry(flock, &flctx->flc_posix, fl_list) {
1279                 if (el == &locks_to_send) {
1280                         /*
1281                          * The list ended. We don't have enough allocated
1282                          * structures - something is really wrong.
1283                          */
1284                         cifs_dbg(VFS, "Can't push all brlocks!\n");
1285                         break;
1286                 }
1287                 length = 1 + flock->fl_end - flock->fl_start;
1288                 if (flock->fl_type == F_RDLCK || flock->fl_type == F_SHLCK)
1289                         type = CIFS_RDLCK;
1290                 else
1291                         type = CIFS_WRLCK;
1292                 lck = list_entry(el, struct lock_to_push, llist);
1293                 lck->pid = hash_lockowner(flock->fl_owner);
1294                 lck->netfid = cfile->fid.netfid;
1295                 lck->length = length;
1296                 lck->type = type;
1297                 lck->offset = flock->fl_start;
1298         }
1299         spin_unlock(&flctx->flc_lock);
1300
1301         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1302                 int stored_rc;
1303
1304                 stored_rc = CIFSSMBPosixLock(xid, tcon, lck->netfid, lck->pid,
1305                                              lck->offset, lck->length, NULL,
1306                                              lck->type, 0);
1307                 if (stored_rc)
1308                         rc = stored_rc;
1309                 list_del(&lck->llist);
1310                 kfree(lck);
1311         }
1312
1313 out:
1314         free_xid(xid);
1315         return rc;
1316 err_out:
1317         list_for_each_entry_safe(lck, tmp, &locks_to_send, llist) {
1318                 list_del(&lck->llist);
1319                 kfree(lck);
1320         }
1321         goto out;
1322 }
1323
1324 static int
1325 cifs_push_locks(struct cifsFileInfo *cfile)
1326 {
1327         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
1328         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1329         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1330         int rc = 0;
1331
1332         /* we are going to update can_cache_brlcks here - need a write access */
1333         cifs_down_write(&cinode->lock_sem);
1334         if (!cinode->can_cache_brlcks) {
1335                 up_write(&cinode->lock_sem);
1336                 return rc;
1337         }
1338
1339         if (cap_unix(tcon->ses) &&
1340             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1341             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1342                 rc = cifs_push_posix_locks(cfile);
1343         else
1344                 rc = tcon->ses->server->ops->push_mand_locks(cfile);
1345
1346         cinode->can_cache_brlcks = false;
1347         up_write(&cinode->lock_sem);
1348         return rc;
1349 }
1350
1351 static void
1352 cifs_read_flock(struct file_lock *flock, __u32 *type, int *lock, int *unlock,
1353                 bool *wait_flag, struct TCP_Server_Info *server)
1354 {
1355         if (flock->fl_flags & FL_POSIX)
1356                 cifs_dbg(FYI, "Posix\n");
1357         if (flock->fl_flags & FL_FLOCK)
1358                 cifs_dbg(FYI, "Flock\n");
1359         if (flock->fl_flags & FL_SLEEP) {
1360                 cifs_dbg(FYI, "Blocking lock\n");
1361                 *wait_flag = true;
1362         }
1363         if (flock->fl_flags & FL_ACCESS)
1364                 cifs_dbg(FYI, "Process suspended by mandatory locking - not implemented yet\n");
1365         if (flock->fl_flags & FL_LEASE)
1366                 cifs_dbg(FYI, "Lease on file - not implemented yet\n");
1367         if (flock->fl_flags &
1368             (~(FL_POSIX | FL_FLOCK | FL_SLEEP |
1369                FL_ACCESS | FL_LEASE | FL_CLOSE)))
1370                 cifs_dbg(FYI, "Unknown lock flags 0x%x\n", flock->fl_flags);
1371
1372         *type = server->vals->large_lock_type;
1373         if (flock->fl_type == F_WRLCK) {
1374                 cifs_dbg(FYI, "F_WRLCK\n");
1375                 *type |= server->vals->exclusive_lock_type;
1376                 *lock = 1;
1377         } else if (flock->fl_type == F_UNLCK) {
1378                 cifs_dbg(FYI, "F_UNLCK\n");
1379                 *type |= server->vals->unlock_lock_type;
1380                 *unlock = 1;
1381                 /* Check if unlock includes more than one lock range */
1382         } else if (flock->fl_type == F_RDLCK) {
1383                 cifs_dbg(FYI, "F_RDLCK\n");
1384                 *type |= server->vals->shared_lock_type;
1385                 *lock = 1;
1386         } else if (flock->fl_type == F_EXLCK) {
1387                 cifs_dbg(FYI, "F_EXLCK\n");
1388                 *type |= server->vals->exclusive_lock_type;
1389                 *lock = 1;
1390         } else if (flock->fl_type == F_SHLCK) {
1391                 cifs_dbg(FYI, "F_SHLCK\n");
1392                 *type |= server->vals->shared_lock_type;
1393                 *lock = 1;
1394         } else
1395                 cifs_dbg(FYI, "Unknown type of lock\n");
1396 }
1397
1398 static int
1399 cifs_getlk(struct file *file, struct file_lock *flock, __u32 type,
1400            bool wait_flag, bool posix_lck, unsigned int xid)
1401 {
1402         int rc = 0;
1403         __u64 length = 1 + flock->fl_end - flock->fl_start;
1404         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1405         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1406         struct TCP_Server_Info *server = tcon->ses->server;
1407         __u16 netfid = cfile->fid.netfid;
1408
1409         if (posix_lck) {
1410                 int posix_lock_type;
1411
1412                 rc = cifs_posix_lock_test(file, flock);
1413                 if (!rc)
1414                         return rc;
1415
1416                 if (type & server->vals->shared_lock_type)
1417                         posix_lock_type = CIFS_RDLCK;
1418                 else
1419                         posix_lock_type = CIFS_WRLCK;
1420                 rc = CIFSSMBPosixLock(xid, tcon, netfid,
1421                                       hash_lockowner(flock->fl_owner),
1422                                       flock->fl_start, length, flock,
1423                                       posix_lock_type, wait_flag);
1424                 return rc;
1425         }
1426
1427         rc = cifs_lock_test(cfile, flock->fl_start, length, type, flock);
1428         if (!rc)
1429                 return rc;
1430
1431         /* BB we could chain these into one lock request BB */
1432         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length, type,
1433                                     1, 0, false);
1434         if (rc == 0) {
1435                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1436                                             type, 0, 1, false);
1437                 flock->fl_type = F_UNLCK;
1438                 if (rc != 0)
1439                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1440                                  rc);
1441                 return 0;
1442         }
1443
1444         if (type & server->vals->shared_lock_type) {
1445                 flock->fl_type = F_WRLCK;
1446                 return 0;
1447         }
1448
1449         type &= ~server->vals->exclusive_lock_type;
1450
1451         rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1452                                     type | server->vals->shared_lock_type,
1453                                     1, 0, false);
1454         if (rc == 0) {
1455                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1456                         type | server->vals->shared_lock_type, 0, 1, false);
1457                 flock->fl_type = F_RDLCK;
1458                 if (rc != 0)
1459                         cifs_dbg(VFS, "Error unlocking previously locked range %d during test of lock\n",
1460                                  rc);
1461         } else
1462                 flock->fl_type = F_WRLCK;
1463
1464         return 0;
1465 }
1466
1467 void
1468 cifs_move_llist(struct list_head *source, struct list_head *dest)
1469 {
1470         struct list_head *li, *tmp;
1471         list_for_each_safe(li, tmp, source)
1472                 list_move(li, dest);
1473 }
1474
1475 void
1476 cifs_free_llist(struct list_head *llist)
1477 {
1478         struct cifsLockInfo *li, *tmp;
1479         list_for_each_entry_safe(li, tmp, llist, llist) {
1480                 cifs_del_lock_waiters(li);
1481                 list_del(&li->llist);
1482                 kfree(li);
1483         }
1484 }
1485
1486 int
1487 cifs_unlock_range(struct cifsFileInfo *cfile, struct file_lock *flock,
1488                   unsigned int xid)
1489 {
1490         int rc = 0, stored_rc;
1491         static const int types[] = {
1492                 LOCKING_ANDX_LARGE_FILES,
1493                 LOCKING_ANDX_SHARED_LOCK | LOCKING_ANDX_LARGE_FILES
1494         };
1495         unsigned int i;
1496         unsigned int max_num, num, max_buf;
1497         LOCKING_ANDX_RANGE *buf, *cur;
1498         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1499         struct cifsInodeInfo *cinode = CIFS_I(d_inode(cfile->dentry));
1500         struct cifsLockInfo *li, *tmp;
1501         __u64 length = 1 + flock->fl_end - flock->fl_start;
1502         struct list_head tmp_llist;
1503
1504         INIT_LIST_HEAD(&tmp_llist);
1505
1506         /*
1507          * Accessing maxBuf is racy with cifs_reconnect - need to store value
1508          * and check it before using.
1509          */
1510         max_buf = tcon->ses->server->maxBuf;
1511         if (max_buf < (sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE)))
1512                 return -EINVAL;
1513
1514         BUILD_BUG_ON(sizeof(struct smb_hdr) + sizeof(LOCKING_ANDX_RANGE) >
1515                      PAGE_SIZE);
1516         max_buf = min_t(unsigned int, max_buf - sizeof(struct smb_hdr),
1517                         PAGE_SIZE);
1518         max_num = (max_buf - sizeof(struct smb_hdr)) /
1519                                                 sizeof(LOCKING_ANDX_RANGE);
1520         buf = kcalloc(max_num, sizeof(LOCKING_ANDX_RANGE), GFP_KERNEL);
1521         if (!buf)
1522                 return -ENOMEM;
1523
1524         cifs_down_write(&cinode->lock_sem);
1525         for (i = 0; i < 2; i++) {
1526                 cur = buf;
1527                 num = 0;
1528                 list_for_each_entry_safe(li, tmp, &cfile->llist->locks, llist) {
1529                         if (flock->fl_start > li->offset ||
1530                             (flock->fl_start + length) <
1531                             (li->offset + li->length))
1532                                 continue;
1533                         if (current->tgid != li->pid)
1534                                 continue;
1535                         if (types[i] != li->type)
1536                                 continue;
1537                         if (cinode->can_cache_brlcks) {
1538                                 /*
1539                                  * We can cache brlock requests - simply remove
1540                                  * a lock from the file's list.
1541                                  */
1542                                 list_del(&li->llist);
1543                                 cifs_del_lock_waiters(li);
1544                                 kfree(li);
1545                                 continue;
1546                         }
1547                         cur->Pid = cpu_to_le16(li->pid);
1548                         cur->LengthLow = cpu_to_le32((u32)li->length);
1549                         cur->LengthHigh = cpu_to_le32((u32)(li->length>>32));
1550                         cur->OffsetLow = cpu_to_le32((u32)li->offset);
1551                         cur->OffsetHigh = cpu_to_le32((u32)(li->offset>>32));
1552                         /*
1553                          * We need to save a lock here to let us add it again to
1554                          * the file's list if the unlock range request fails on
1555                          * the server.
1556                          */
1557                         list_move(&li->llist, &tmp_llist);
1558                         if (++num == max_num) {
1559                                 stored_rc = cifs_lockv(xid, tcon,
1560                                                        cfile->fid.netfid,
1561                                                        li->type, num, 0, buf);
1562                                 if (stored_rc) {
1563                                         /*
1564                                          * We failed on the unlock range
1565                                          * request - add all locks from the tmp
1566                                          * list to the head of the file's list.
1567                                          */
1568                                         cifs_move_llist(&tmp_llist,
1569                                                         &cfile->llist->locks);
1570                                         rc = stored_rc;
1571                                 } else
1572                                         /*
1573                                          * The unlock range request succeed -
1574                                          * free the tmp list.
1575                                          */
1576                                         cifs_free_llist(&tmp_llist);
1577                                 cur = buf;
1578                                 num = 0;
1579                         } else
1580                                 cur++;
1581                 }
1582                 if (num) {
1583                         stored_rc = cifs_lockv(xid, tcon, cfile->fid.netfid,
1584                                                types[i], num, 0, buf);
1585                         if (stored_rc) {
1586                                 cifs_move_llist(&tmp_llist,
1587                                                 &cfile->llist->locks);
1588                                 rc = stored_rc;
1589                         } else
1590                                 cifs_free_llist(&tmp_llist);
1591                 }
1592         }
1593
1594         up_write(&cinode->lock_sem);
1595         kfree(buf);
1596         return rc;
1597 }
1598
1599 static int
1600 cifs_setlk(struct file *file, struct file_lock *flock, __u32 type,
1601            bool wait_flag, bool posix_lck, int lock, int unlock,
1602            unsigned int xid)
1603 {
1604         int rc = 0;
1605         __u64 length = 1 + flock->fl_end - flock->fl_start;
1606         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
1607         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
1608         struct TCP_Server_Info *server = tcon->ses->server;
1609         struct inode *inode = d_inode(cfile->dentry);
1610
1611         if (posix_lck) {
1612                 int posix_lock_type;
1613
1614                 rc = cifs_posix_lock_set(file, flock);
1615                 if (!rc || rc < 0)
1616                         return rc;
1617
1618                 if (type & server->vals->shared_lock_type)
1619                         posix_lock_type = CIFS_RDLCK;
1620                 else
1621                         posix_lock_type = CIFS_WRLCK;
1622
1623                 if (unlock == 1)
1624                         posix_lock_type = CIFS_UNLCK;
1625
1626                 rc = CIFSSMBPosixLock(xid, tcon, cfile->fid.netfid,
1627                                       hash_lockowner(flock->fl_owner),
1628                                       flock->fl_start, length,
1629                                       NULL, posix_lock_type, wait_flag);
1630                 goto out;
1631         }
1632
1633         if (lock) {
1634                 struct cifsLockInfo *lock;
1635
1636                 lock = cifs_lock_init(flock->fl_start, length, type);
1637                 if (!lock)
1638                         return -ENOMEM;
1639
1640                 rc = cifs_lock_add_if(cfile, lock, wait_flag);
1641                 if (rc < 0) {
1642                         kfree(lock);
1643                         return rc;
1644                 }
1645                 if (!rc)
1646                         goto out;
1647
1648                 /*
1649                  * Windows 7 server can delay breaking lease from read to None
1650                  * if we set a byte-range lock on a file - break it explicitly
1651                  * before sending the lock to the server to be sure the next
1652                  * read won't conflict with non-overlapted locks due to
1653                  * pagereading.
1654                  */
1655                 if (!CIFS_CACHE_WRITE(CIFS_I(inode)) &&
1656                                         CIFS_CACHE_READ(CIFS_I(inode))) {
1657                         cifs_zap_mapping(inode);
1658                         cifs_dbg(FYI, "Set no oplock for inode=%p due to mand locks\n",
1659                                  inode);
1660                         CIFS_I(inode)->oplock = 0;
1661                 }
1662
1663                 rc = server->ops->mand_lock(xid, cfile, flock->fl_start, length,
1664                                             type, 1, 0, wait_flag);
1665                 if (rc) {
1666                         kfree(lock);
1667                         return rc;
1668                 }
1669
1670                 cifs_lock_add(cfile, lock);
1671         } else if (unlock)
1672                 rc = server->ops->mand_unlock_range(cfile, flock, xid);
1673
1674 out:
1675         if (flock->fl_flags & FL_POSIX) {
1676                 /*
1677                  * If this is a request to remove all locks because we
1678                  * are closing the file, it doesn't matter if the
1679                  * unlocking failed as both cifs.ko and the SMB server
1680                  * remove the lock on file close
1681                  */
1682                 if (rc) {
1683                         cifs_dbg(VFS, "%s failed rc=%d\n", __func__, rc);
1684                         if (!(flock->fl_flags & FL_CLOSE))
1685                                 return rc;
1686                 }
1687                 rc = locks_lock_file_wait(file, flock);
1688         }
1689         return rc;
1690 }
1691
1692 int cifs_lock(struct file *file, int cmd, struct file_lock *flock)
1693 {
1694         int rc, xid;
1695         int lock = 0, unlock = 0;
1696         bool wait_flag = false;
1697         bool posix_lck = false;
1698         struct cifs_sb_info *cifs_sb;
1699         struct cifs_tcon *tcon;
1700         struct cifsInodeInfo *cinode;
1701         struct cifsFileInfo *cfile;
1702         __u16 netfid;
1703         __u32 type;
1704
1705         rc = -EACCES;
1706         xid = get_xid();
1707
1708         cifs_dbg(FYI, "Lock parm: 0x%x flockflags: 0x%x flocktype: 0x%x start: %lld end: %lld\n",
1709                  cmd, flock->fl_flags, flock->fl_type,
1710                  flock->fl_start, flock->fl_end);
1711
1712         cfile = (struct cifsFileInfo *)file->private_data;
1713         tcon = tlink_tcon(cfile->tlink);
1714
1715         cifs_read_flock(flock, &type, &lock, &unlock, &wait_flag,
1716                         tcon->ses->server);
1717
1718         cifs_sb = CIFS_FILE_SB(file);
1719         netfid = cfile->fid.netfid;
1720         cinode = CIFS_I(file_inode(file));
1721
1722         if (cap_unix(tcon->ses) &&
1723             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
1724             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
1725                 posix_lck = true;
1726         /*
1727          * BB add code here to normalize offset and length to account for
1728          * negative length which we can not accept over the wire.
1729          */
1730         if (IS_GETLK(cmd)) {
1731                 rc = cifs_getlk(file, flock, type, wait_flag, posix_lck, xid);
1732                 free_xid(xid);
1733                 return rc;
1734         }
1735
1736         if (!lock && !unlock) {
1737                 /*
1738                  * if no lock or unlock then nothing to do since we do not
1739                  * know what it is
1740                  */
1741                 free_xid(xid);
1742                 return -EOPNOTSUPP;
1743         }
1744
1745         rc = cifs_setlk(file, flock, type, wait_flag, posix_lck, lock, unlock,
1746                         xid);
1747         free_xid(xid);
1748         return rc;
1749 }
1750
1751 /*
1752  * update the file size (if needed) after a write. Should be called with
1753  * the inode->i_lock held
1754  */
1755 void
1756 cifs_update_eof(struct cifsInodeInfo *cifsi, loff_t offset,
1757                       unsigned int bytes_written)
1758 {
1759         loff_t end_of_write = offset + bytes_written;
1760
1761         if (end_of_write > cifsi->server_eof)
1762                 cifsi->server_eof = end_of_write;
1763 }
1764
1765 static ssize_t
1766 cifs_write(struct cifsFileInfo *open_file, __u32 pid, const char *write_data,
1767            size_t write_size, loff_t *offset)
1768 {
1769         int rc = 0;
1770         unsigned int bytes_written = 0;
1771         unsigned int total_written;
1772         struct cifs_sb_info *cifs_sb;
1773         struct cifs_tcon *tcon;
1774         struct TCP_Server_Info *server;
1775         unsigned int xid;
1776         struct dentry *dentry = open_file->dentry;
1777         struct cifsInodeInfo *cifsi = CIFS_I(d_inode(dentry));
1778         struct cifs_io_parms io_parms;
1779
1780         cifs_sb = CIFS_SB(dentry->d_sb);
1781
1782         cifs_dbg(FYI, "write %zd bytes to offset %lld of %pd\n",
1783                  write_size, *offset, dentry);
1784
1785         tcon = tlink_tcon(open_file->tlink);
1786         server = tcon->ses->server;
1787
1788         if (!server->ops->sync_write)
1789                 return -ENOSYS;
1790
1791         xid = get_xid();
1792
1793         for (total_written = 0; write_size > total_written;
1794              total_written += bytes_written) {
1795                 rc = -EAGAIN;
1796                 while (rc == -EAGAIN) {
1797                         struct kvec iov[2];
1798                         unsigned int len;
1799
1800                         if (open_file->invalidHandle) {
1801                                 /* we could deadlock if we called
1802                                    filemap_fdatawait from here so tell
1803                                    reopen_file not to flush data to
1804                                    server now */
1805                                 rc = cifs_reopen_file(open_file, false);
1806                                 if (rc != 0)
1807                                         break;
1808                         }
1809
1810                         len = min(server->ops->wp_retry_size(d_inode(dentry)),
1811                                   (unsigned int)write_size - total_written);
1812                         /* iov[0] is reserved for smb header */
1813                         iov[1].iov_base = (char *)write_data + total_written;
1814                         iov[1].iov_len = len;
1815                         io_parms.pid = pid;
1816                         io_parms.tcon = tcon;
1817                         io_parms.offset = *offset;
1818                         io_parms.length = len;
1819                         rc = server->ops->sync_write(xid, &open_file->fid,
1820                                         &io_parms, &bytes_written, iov, 1);
1821                 }
1822                 if (rc || (bytes_written == 0)) {
1823                         if (total_written)
1824                                 break;
1825                         else {
1826                                 free_xid(xid);
1827                                 return rc;
1828                         }
1829                 } else {
1830                         spin_lock(&d_inode(dentry)->i_lock);
1831                         cifs_update_eof(cifsi, *offset, bytes_written);
1832                         spin_unlock(&d_inode(dentry)->i_lock);
1833                         *offset += bytes_written;
1834                 }
1835         }
1836
1837         cifs_stats_bytes_written(tcon, total_written);
1838
1839         if (total_written > 0) {
1840                 spin_lock(&d_inode(dentry)->i_lock);
1841                 if (*offset > d_inode(dentry)->i_size)
1842                         i_size_write(d_inode(dentry), *offset);
1843                 spin_unlock(&d_inode(dentry)->i_lock);
1844         }
1845         mark_inode_dirty_sync(d_inode(dentry));
1846         free_xid(xid);
1847         return total_written;
1848 }
1849
1850 struct cifsFileInfo *find_readable_file(struct cifsInodeInfo *cifs_inode,
1851                                         bool fsuid_only)
1852 {
1853         struct cifsFileInfo *open_file = NULL;
1854         struct cifs_sb_info *cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1855         struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb);
1856
1857         /* only filter by fsuid on multiuser mounts */
1858         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1859                 fsuid_only = false;
1860
1861         spin_lock(&tcon->open_file_lock);
1862         /* we could simply get the first_list_entry since write-only entries
1863            are always at the end of the list but since the first entry might
1864            have a close pending, we go through the whole list */
1865         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1866                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1867                         continue;
1868                 if (OPEN_FMODE(open_file->f_flags) & FMODE_READ) {
1869                         if (!open_file->invalidHandle) {
1870                                 /* found a good file */
1871                                 /* lock it so it will not be closed on us */
1872                                 cifsFileInfo_get(open_file);
1873                                 spin_unlock(&tcon->open_file_lock);
1874                                 return open_file;
1875                         } /* else might as well continue, and look for
1876                              another, or simply have the caller reopen it
1877                              again rather than trying to fix this handle */
1878                 } else /* write only file */
1879                         break; /* write only files are last so must be done */
1880         }
1881         spin_unlock(&tcon->open_file_lock);
1882         return NULL;
1883 }
1884
1885 struct cifsFileInfo *find_writable_file(struct cifsInodeInfo *cifs_inode,
1886                                         bool fsuid_only)
1887 {
1888         struct cifsFileInfo *open_file, *inv_file = NULL;
1889         struct cifs_sb_info *cifs_sb;
1890         struct cifs_tcon *tcon;
1891         bool any_available = false;
1892         int rc;
1893         unsigned int refind = 0;
1894
1895         /* Having a null inode here (because mapping->host was set to zero by
1896         the VFS or MM) should not happen but we had reports of on oops (due to
1897         it being zero) during stress testcases so we need to check for it */
1898
1899         if (cifs_inode == NULL) {
1900                 cifs_dbg(VFS, "Null inode passed to cifs_writeable_file\n");
1901                 dump_stack();
1902                 return NULL;
1903         }
1904
1905         cifs_sb = CIFS_SB(cifs_inode->vfs_inode.i_sb);
1906         tcon = cifs_sb_master_tcon(cifs_sb);
1907
1908         /* only filter by fsuid on multiuser mounts */
1909         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER))
1910                 fsuid_only = false;
1911
1912         spin_lock(&tcon->open_file_lock);
1913 refind_writable:
1914         if (refind > MAX_REOPEN_ATT) {
1915                 spin_unlock(&tcon->open_file_lock);
1916                 return NULL;
1917         }
1918         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
1919                 if (!any_available && open_file->pid != current->tgid)
1920                         continue;
1921                 if (fsuid_only && !uid_eq(open_file->uid, current_fsuid()))
1922                         continue;
1923                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
1924                         if (!open_file->invalidHandle) {
1925                                 /* found a good writable file */
1926                                 cifsFileInfo_get(open_file);
1927                                 spin_unlock(&tcon->open_file_lock);
1928                                 return open_file;
1929                         } else {
1930                                 if (!inv_file)
1931                                         inv_file = open_file;
1932                         }
1933                 }
1934         }
1935         /* couldn't find useable FH with same pid, try any available */
1936         if (!any_available) {
1937                 any_available = true;
1938                 goto refind_writable;
1939         }
1940
1941         if (inv_file) {
1942                 any_available = false;
1943                 cifsFileInfo_get(inv_file);
1944         }
1945
1946         spin_unlock(&tcon->open_file_lock);
1947
1948         if (inv_file) {
1949                 rc = cifs_reopen_file(inv_file, false);
1950                 if (!rc)
1951                         return inv_file;
1952                 else {
1953                         spin_lock(&tcon->open_file_lock);
1954                         list_move_tail(&inv_file->flist,
1955                                         &cifs_inode->openFileList);
1956                         spin_unlock(&tcon->open_file_lock);
1957                         cifsFileInfo_put(inv_file);
1958                         ++refind;
1959                         inv_file = NULL;
1960                         spin_lock(&tcon->open_file_lock);
1961                         goto refind_writable;
1962                 }
1963         }
1964
1965         return NULL;
1966 }
1967
1968 static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
1969 {
1970         struct address_space *mapping = page->mapping;
1971         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
1972         char *write_data;
1973         int rc = -EFAULT;
1974         int bytes_written = 0;
1975         struct inode *inode;
1976         struct cifsFileInfo *open_file;
1977
1978         if (!mapping || !mapping->host)
1979                 return -EFAULT;
1980
1981         inode = page->mapping->host;
1982
1983         offset += (loff_t)from;
1984         write_data = kmap(page);
1985         write_data += from;
1986
1987         if ((to > PAGE_SIZE) || (from > to)) {
1988                 kunmap(page);
1989                 return -EIO;
1990         }
1991
1992         /* racing with truncate? */
1993         if (offset > mapping->host->i_size) {
1994                 kunmap(page);
1995                 return 0; /* don't care */
1996         }
1997
1998         /* check to make sure that we are not extending the file */
1999         if (mapping->host->i_size - offset < (loff_t)to)
2000                 to = (unsigned)(mapping->host->i_size - offset);
2001
2002         open_file = find_writable_file(CIFS_I(mapping->host), false);
2003         if (open_file) {
2004                 bytes_written = cifs_write(open_file, open_file->pid,
2005                                            write_data, to - from, &offset);
2006                 cifsFileInfo_put(open_file);
2007                 /* Does mm or vfs already set times? */
2008                 inode->i_atime = inode->i_mtime = current_time(inode);
2009                 if ((bytes_written > 0) && (offset))
2010                         rc = 0;
2011                 else if (bytes_written < 0)
2012                         rc = bytes_written;
2013         } else {
2014                 cifs_dbg(FYI, "No writeable filehandles for inode\n");
2015                 rc = -EIO;
2016         }
2017
2018         kunmap(page);
2019         return rc;
2020 }
2021
2022 static struct cifs_writedata *
2023 wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
2024                           pgoff_t end, pgoff_t *index,
2025                           unsigned int *found_pages)
2026 {
2027         unsigned int nr_pages;
2028         struct page **pages;
2029         struct cifs_writedata *wdata;
2030
2031         wdata = cifs_writedata_alloc((unsigned int)tofind,
2032                                      cifs_writev_complete);
2033         if (!wdata)
2034                 return NULL;
2035
2036         /*
2037          * find_get_pages_tag seems to return a max of 256 on each
2038          * iteration, so we must call it several times in order to
2039          * fill the array or the wsize is effectively limited to
2040          * 256 * PAGE_SIZE.
2041          */
2042         *found_pages = 0;
2043         pages = wdata->pages;
2044         do {
2045                 nr_pages = find_get_pages_tag(mapping, index,
2046                                               PAGECACHE_TAG_DIRTY, tofind,
2047                                               pages);
2048                 *found_pages += nr_pages;
2049                 tofind -= nr_pages;
2050                 pages += nr_pages;
2051         } while (nr_pages && tofind && *index <= end);
2052
2053         return wdata;
2054 }
2055
2056 static unsigned int
2057 wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
2058                     struct address_space *mapping,
2059                     struct writeback_control *wbc,
2060                     pgoff_t end, pgoff_t *index, pgoff_t *next, bool *done)
2061 {
2062         unsigned int nr_pages = 0, i;
2063         struct page *page;
2064
2065         for (i = 0; i < found_pages; i++) {
2066                 page = wdata->pages[i];
2067                 /*
2068                  * At this point we hold neither mapping->tree_lock nor
2069                  * lock on the page itself: the page may be truncated or
2070                  * invalidated (changing page->mapping to NULL), or even
2071                  * swizzled back from swapper_space to tmpfs file
2072                  * mapping
2073                  */
2074
2075                 if (nr_pages == 0)
2076                         lock_page(page);
2077                 else if (!trylock_page(page))
2078                         break;
2079
2080                 if (unlikely(page->mapping != mapping)) {
2081                         unlock_page(page);
2082                         break;
2083                 }
2084
2085                 if (!wbc->range_cyclic && page->index > end) {
2086                         *done = true;
2087                         unlock_page(page);
2088                         break;
2089                 }
2090
2091                 if (*next && (page->index != *next)) {
2092                         /* Not next consecutive page */
2093                         unlock_page(page);
2094                         break;
2095                 }
2096
2097                 if (wbc->sync_mode != WB_SYNC_NONE)
2098                         wait_on_page_writeback(page);
2099
2100                 if (PageWriteback(page) ||
2101                                 !clear_page_dirty_for_io(page)) {
2102                         unlock_page(page);
2103                         break;
2104                 }
2105
2106                 /*
2107                  * This actually clears the dirty bit in the radix tree.
2108                  * See cifs_writepage() for more commentary.
2109                  */
2110                 set_page_writeback(page);
2111                 if (page_offset(page) >= i_size_read(mapping->host)) {
2112                         *done = true;
2113                         unlock_page(page);
2114                         end_page_writeback(page);
2115                         break;
2116                 }
2117
2118                 wdata->pages[i] = page;
2119                 *next = page->index + 1;
2120                 ++nr_pages;
2121         }
2122
2123         /* reset index to refind any pages skipped */
2124         if (nr_pages == 0)
2125                 *index = wdata->pages[0]->index + 1;
2126
2127         /* put any pages we aren't going to use */
2128         for (i = nr_pages; i < found_pages; i++) {
2129                 put_page(wdata->pages[i]);
2130                 wdata->pages[i] = NULL;
2131         }
2132
2133         return nr_pages;
2134 }
2135
2136 static int
2137 wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
2138                  struct address_space *mapping, struct writeback_control *wbc)
2139 {
2140         int rc = 0;
2141         struct TCP_Server_Info *server;
2142         unsigned int i;
2143
2144         wdata->sync_mode = wbc->sync_mode;
2145         wdata->nr_pages = nr_pages;
2146         wdata->offset = page_offset(wdata->pages[0]);
2147         wdata->pagesz = PAGE_SIZE;
2148         wdata->tailsz = min(i_size_read(mapping->host) -
2149                         page_offset(wdata->pages[nr_pages - 1]),
2150                         (loff_t)PAGE_SIZE);
2151         wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
2152
2153         if (wdata->cfile != NULL)
2154                 cifsFileInfo_put(wdata->cfile);
2155         wdata->cfile = find_writable_file(CIFS_I(mapping->host), false);
2156         if (!wdata->cfile) {
2157                 cifs_dbg(VFS, "No writable handles for inode\n");
2158                 rc = -EBADF;
2159         } else {
2160                 wdata->pid = wdata->cfile->pid;
2161                 server = tlink_tcon(wdata->cfile->tlink)->ses->server;
2162                 rc = server->ops->async_writev(wdata, cifs_writedata_release);
2163         }
2164
2165         for (i = 0; i < nr_pages; ++i)
2166                 unlock_page(wdata->pages[i]);
2167
2168         return rc;
2169 }
2170
2171 static int cifs_writepages(struct address_space *mapping,
2172                            struct writeback_control *wbc)
2173 {
2174         struct cifs_sb_info *cifs_sb = CIFS_SB(mapping->host->i_sb);
2175         struct TCP_Server_Info *server;
2176         bool done = false, scanned = false, range_whole = false;
2177         pgoff_t end, index;
2178         struct cifs_writedata *wdata;
2179         int rc = 0;
2180
2181         /*
2182          * If wsize is smaller than the page cache size, default to writing
2183          * one page at a time via cifs_writepage
2184          */
2185         if (cifs_sb->wsize < PAGE_SIZE)
2186                 return generic_writepages(mapping, wbc);
2187
2188         if (wbc->range_cyclic) {
2189                 index = mapping->writeback_index; /* Start from prev offset */
2190                 end = -1;
2191         } else {
2192                 index = wbc->range_start >> PAGE_SHIFT;
2193                 end = wbc->range_end >> PAGE_SHIFT;
2194                 if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
2195                         range_whole = true;
2196                 scanned = true;
2197         }
2198         server = cifs_sb_master_tcon(cifs_sb)->ses->server;
2199 retry:
2200         while (!done && index <= end) {
2201                 unsigned int i, nr_pages, found_pages, wsize, credits;
2202                 pgoff_t next = 0, tofind, saved_index = index;
2203
2204                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2205                                                    &wsize, &credits);
2206                 if (rc)
2207                         break;
2208
2209                 tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
2210
2211                 wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
2212                                                   &found_pages);
2213                 if (!wdata) {
2214                         rc = -ENOMEM;
2215                         add_credits_and_wake_if(server, credits, 0);
2216                         break;
2217                 }
2218
2219                 if (found_pages == 0) {
2220                         kref_put(&wdata->refcount, cifs_writedata_release);
2221                         add_credits_and_wake_if(server, credits, 0);
2222                         break;
2223                 }
2224
2225                 nr_pages = wdata_prepare_pages(wdata, found_pages, mapping, wbc,
2226                                                end, &index, &next, &done);
2227
2228                 /* nothing to write? */
2229                 if (nr_pages == 0) {
2230                         kref_put(&wdata->refcount, cifs_writedata_release);
2231                         add_credits_and_wake_if(server, credits, 0);
2232                         continue;
2233                 }
2234
2235                 wdata->credits = credits;
2236
2237                 rc = wdata_send_pages(wdata, nr_pages, mapping, wbc);
2238
2239                 /* send failure -- clean up the mess */
2240                 if (rc != 0) {
2241                         add_credits_and_wake_if(server, wdata->credits, 0);
2242                         for (i = 0; i < nr_pages; ++i) {
2243                                 if (rc == -EAGAIN)
2244                                         redirty_page_for_writepage(wbc,
2245                                                            wdata->pages[i]);
2246                                 else
2247                                         SetPageError(wdata->pages[i]);
2248                                 end_page_writeback(wdata->pages[i]);
2249                                 put_page(wdata->pages[i]);
2250                         }
2251                         if (rc != -EAGAIN)
2252                                 mapping_set_error(mapping, rc);
2253                 }
2254                 kref_put(&wdata->refcount, cifs_writedata_release);
2255
2256                 if (wbc->sync_mode == WB_SYNC_ALL && rc == -EAGAIN) {
2257                         index = saved_index;
2258                         continue;
2259                 }
2260
2261                 wbc->nr_to_write -= nr_pages;
2262                 if (wbc->nr_to_write <= 0)
2263                         done = true;
2264
2265                 index = next;
2266         }
2267
2268         if (!scanned && !done) {
2269                 /*
2270                  * We hit the last page and there is more work to be done: wrap
2271                  * back to the start of the file
2272                  */
2273                 scanned = true;
2274                 index = 0;
2275                 goto retry;
2276         }
2277
2278         if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
2279                 mapping->writeback_index = index;
2280
2281         return rc;
2282 }
2283
2284 static int
2285 cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
2286 {
2287         int rc;
2288         unsigned int xid;
2289
2290         xid = get_xid();
2291 /* BB add check for wbc flags */
2292         get_page(page);
2293         if (!PageUptodate(page))
2294                 cifs_dbg(FYI, "ppw - page not up to date\n");
2295
2296         /*
2297          * Set the "writeback" flag, and clear "dirty" in the radix tree.
2298          *
2299          * A writepage() implementation always needs to do either this,
2300          * or re-dirty the page with "redirty_page_for_writepage()" in
2301          * the case of a failure.
2302          *
2303          * Just unlocking the page will cause the radix tree tag-bits
2304          * to fail to update with the state of the page correctly.
2305          */
2306         set_page_writeback(page);
2307 retry_write:
2308         rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
2309         if (rc == -EAGAIN) {
2310                 if (wbc->sync_mode == WB_SYNC_ALL)
2311                         goto retry_write;
2312                 redirty_page_for_writepage(wbc, page);
2313         } else if (rc != 0) {
2314                 SetPageError(page);
2315                 mapping_set_error(page->mapping, rc);
2316         } else {
2317                 SetPageUptodate(page);
2318         }
2319         end_page_writeback(page);
2320         put_page(page);
2321         free_xid(xid);
2322         return rc;
2323 }
2324
2325 static int cifs_writepage(struct page *page, struct writeback_control *wbc)
2326 {
2327         int rc = cifs_writepage_locked(page, wbc);
2328         unlock_page(page);
2329         return rc;
2330 }
2331
2332 static int cifs_write_end(struct file *file, struct address_space *mapping,
2333                         loff_t pos, unsigned len, unsigned copied,
2334                         struct page *page, void *fsdata)
2335 {
2336         int rc;
2337         struct inode *inode = mapping->host;
2338         struct cifsFileInfo *cfile = file->private_data;
2339         struct cifs_sb_info *cifs_sb = CIFS_SB(cfile->dentry->d_sb);
2340         __u32 pid;
2341
2342         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2343                 pid = cfile->pid;
2344         else
2345                 pid = current->tgid;
2346
2347         cifs_dbg(FYI, "write_end for page %p from pos %lld with %d bytes\n",
2348                  page, pos, copied);
2349
2350         if (PageChecked(page)) {
2351                 if (copied == len)
2352                         SetPageUptodate(page);
2353                 ClearPageChecked(page);
2354         } else if (!PageUptodate(page) && copied == PAGE_SIZE)
2355                 SetPageUptodate(page);
2356
2357         if (!PageUptodate(page)) {
2358                 char *page_data;
2359                 unsigned offset = pos & (PAGE_SIZE - 1);
2360                 unsigned int xid;
2361
2362                 xid = get_xid();
2363                 /* this is probably better than directly calling
2364                    partialpage_write since in this function the file handle is
2365                    known which we might as well leverage */
2366                 /* BB check if anything else missing out of ppw
2367                    such as updating last write time */
2368                 page_data = kmap(page);
2369                 rc = cifs_write(cfile, pid, page_data + offset, copied, &pos);
2370                 /* if (rc < 0) should we set writebehind rc? */
2371                 kunmap(page);
2372
2373                 free_xid(xid);
2374         } else {
2375                 rc = copied;
2376                 pos += copied;
2377                 set_page_dirty(page);
2378         }
2379
2380         if (rc > 0) {
2381                 spin_lock(&inode->i_lock);
2382                 if (pos > inode->i_size)
2383                         i_size_write(inode, pos);
2384                 spin_unlock(&inode->i_lock);
2385         }
2386
2387         unlock_page(page);
2388         put_page(page);
2389
2390         return rc;
2391 }
2392
2393 int cifs_strict_fsync(struct file *file, loff_t start, loff_t end,
2394                       int datasync)
2395 {
2396         unsigned int xid;
2397         int rc = 0;
2398         struct cifs_tcon *tcon;
2399         struct TCP_Server_Info *server;
2400         struct cifsFileInfo *smbfile = file->private_data;
2401         struct inode *inode = file_inode(file);
2402         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2403
2404         rc = file_write_and_wait_range(file, start, end);
2405         if (rc)
2406                 return rc;
2407         inode_lock(inode);
2408
2409         xid = get_xid();
2410
2411         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2412                  file, datasync);
2413
2414         if (!CIFS_CACHE_READ(CIFS_I(inode))) {
2415                 rc = cifs_zap_mapping(inode);
2416                 if (rc) {
2417                         cifs_dbg(FYI, "rc: %d during invalidate phase\n", rc);
2418                         rc = 0; /* don't care about it in fsync */
2419                 }
2420         }
2421
2422         tcon = tlink_tcon(smbfile->tlink);
2423         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2424                 server = tcon->ses->server;
2425                 if (server->ops->flush)
2426                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2427                 else
2428                         rc = -ENOSYS;
2429         }
2430
2431         free_xid(xid);
2432         inode_unlock(inode);
2433         return rc;
2434 }
2435
2436 int cifs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
2437 {
2438         unsigned int xid;
2439         int rc = 0;
2440         struct cifs_tcon *tcon;
2441         struct TCP_Server_Info *server;
2442         struct cifsFileInfo *smbfile = file->private_data;
2443         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
2444         struct inode *inode = file->f_mapping->host;
2445
2446         rc = file_write_and_wait_range(file, start, end);
2447         if (rc)
2448                 return rc;
2449         inode_lock(inode);
2450
2451         xid = get_xid();
2452
2453         cifs_dbg(FYI, "Sync file - name: %pD datasync: 0x%x\n",
2454                  file, datasync);
2455
2456         tcon = tlink_tcon(smbfile->tlink);
2457         if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOSSYNC)) {
2458                 server = tcon->ses->server;
2459                 if (server->ops->flush)
2460                         rc = server->ops->flush(xid, tcon, &smbfile->fid);
2461                 else
2462                         rc = -ENOSYS;
2463         }
2464
2465         free_xid(xid);
2466         inode_unlock(inode);
2467         return rc;
2468 }
2469
2470 /*
2471  * As file closes, flush all cached write data for this inode checking
2472  * for write behind errors.
2473  */
2474 int cifs_flush(struct file *file, fl_owner_t id)
2475 {
2476         struct inode *inode = file_inode(file);
2477         int rc = 0;
2478
2479         if (file->f_mode & FMODE_WRITE)
2480                 rc = filemap_write_and_wait(inode->i_mapping);
2481
2482         cifs_dbg(FYI, "Flush inode %p file %p rc %d\n", inode, file, rc);
2483
2484         return rc;
2485 }
2486
2487 static int
2488 cifs_write_allocate_pages(struct page **pages, unsigned long num_pages)
2489 {
2490         int rc = 0;
2491         unsigned long i;
2492
2493         for (i = 0; i < num_pages; i++) {
2494                 pages[i] = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
2495                 if (!pages[i]) {
2496                         /*
2497                          * save number of pages we have already allocated and
2498                          * return with ENOMEM error
2499                          */
2500                         num_pages = i;
2501                         rc = -ENOMEM;
2502                         break;
2503                 }
2504         }
2505
2506         if (rc) {
2507                 for (i = 0; i < num_pages; i++)
2508                         put_page(pages[i]);
2509         }
2510         return rc;
2511 }
2512
2513 static inline
2514 size_t get_numpages(const size_t wsize, const size_t len, size_t *cur_len)
2515 {
2516         size_t num_pages;
2517         size_t clen;
2518
2519         clen = min_t(const size_t, len, wsize);
2520         num_pages = DIV_ROUND_UP(clen, PAGE_SIZE);
2521
2522         if (cur_len)
2523                 *cur_len = clen;
2524
2525         return num_pages;
2526 }
2527
2528 static void
2529 cifs_uncached_writedata_release(struct kref *refcount)
2530 {
2531         int i;
2532         struct cifs_writedata *wdata = container_of(refcount,
2533                                         struct cifs_writedata, refcount);
2534
2535         kref_put(&wdata->ctx->refcount, cifs_aio_ctx_release);
2536         for (i = 0; i < wdata->nr_pages; i++)
2537                 put_page(wdata->pages[i]);
2538         cifs_writedata_release(refcount);
2539 }
2540
2541 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx);
2542
2543 static void
2544 cifs_uncached_writev_complete(struct work_struct *work)
2545 {
2546         struct cifs_writedata *wdata = container_of(work,
2547                                         struct cifs_writedata, work);
2548         struct inode *inode = d_inode(wdata->cfile->dentry);
2549         struct cifsInodeInfo *cifsi = CIFS_I(inode);
2550
2551         spin_lock(&inode->i_lock);
2552         cifs_update_eof(cifsi, wdata->offset, wdata->bytes);
2553         if (cifsi->server_eof > inode->i_size)
2554                 i_size_write(inode, cifsi->server_eof);
2555         spin_unlock(&inode->i_lock);
2556
2557         complete(&wdata->done);
2558         collect_uncached_write_data(wdata->ctx);
2559         /* the below call can possibly free the last ref to aio ctx */
2560         kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2561 }
2562
2563 static int
2564 wdata_fill_from_iovec(struct cifs_writedata *wdata, struct iov_iter *from,
2565                       size_t *len, unsigned long *num_pages)
2566 {
2567         size_t save_len, copied, bytes, cur_len = *len;
2568         unsigned long i, nr_pages = *num_pages;
2569
2570         save_len = cur_len;
2571         for (i = 0; i < nr_pages; i++) {
2572                 bytes = min_t(const size_t, cur_len, PAGE_SIZE);
2573                 copied = copy_page_from_iter(wdata->pages[i], 0, bytes, from);
2574                 cur_len -= copied;
2575                 /*
2576                  * If we didn't copy as much as we expected, then that
2577                  * may mean we trod into an unmapped area. Stop copying
2578                  * at that point. On the next pass through the big
2579                  * loop, we'll likely end up getting a zero-length
2580                  * write and bailing out of it.
2581                  */
2582                 if (copied < bytes)
2583                         break;
2584         }
2585         cur_len = save_len - cur_len;
2586         *len = cur_len;
2587
2588         /*
2589          * If we have no data to send, then that probably means that
2590          * the copy above failed altogether. That's most likely because
2591          * the address in the iovec was bogus. Return -EFAULT and let
2592          * the caller free anything we allocated and bail out.
2593          */
2594         if (!cur_len)
2595                 return -EFAULT;
2596
2597         /*
2598          * i + 1 now represents the number of pages we actually used in
2599          * the copy phase above.
2600          */
2601         *num_pages = i + 1;
2602         return 0;
2603 }
2604
2605 static int
2606 cifs_write_from_iter(loff_t offset, size_t len, struct iov_iter *from,
2607                      struct cifsFileInfo *open_file,
2608                      struct cifs_sb_info *cifs_sb, struct list_head *wdata_list,
2609                      struct cifs_aio_ctx *ctx)
2610 {
2611         int rc = 0;
2612         size_t cur_len;
2613         unsigned long nr_pages, num_pages, i;
2614         struct cifs_writedata *wdata;
2615         struct iov_iter saved_from = *from;
2616         loff_t saved_offset = offset;
2617         pid_t pid;
2618         struct TCP_Server_Info *server;
2619
2620         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
2621                 pid = open_file->pid;
2622         else
2623                 pid = current->tgid;
2624
2625         server = tlink_tcon(open_file->tlink)->ses->server;
2626
2627         do {
2628                 unsigned int wsize, credits;
2629
2630                 rc = server->ops->wait_mtu_credits(server, cifs_sb->wsize,
2631                                                    &wsize, &credits);
2632                 if (rc)
2633                         break;
2634
2635                 nr_pages = get_numpages(wsize, len, &cur_len);
2636                 wdata = cifs_writedata_alloc(nr_pages,
2637                                              cifs_uncached_writev_complete);
2638                 if (!wdata) {
2639                         rc = -ENOMEM;
2640                         add_credits_and_wake_if(server, credits, 0);
2641                         break;
2642                 }
2643
2644                 rc = cifs_write_allocate_pages(wdata->pages, nr_pages);
2645                 if (rc) {
2646                         kfree(wdata);
2647                         add_credits_and_wake_if(server, credits, 0);
2648                         break;
2649                 }
2650
2651                 num_pages = nr_pages;
2652                 rc = wdata_fill_from_iovec(wdata, from, &cur_len, &num_pages);
2653                 if (rc) {
2654                         for (i = 0; i < nr_pages; i++)
2655                                 put_page(wdata->pages[i]);
2656                         kfree(wdata);
2657                         add_credits_and_wake_if(server, credits, 0);
2658                         break;
2659                 }
2660
2661                 /*
2662                  * Bring nr_pages down to the number of pages we actually used,
2663                  * and free any pages that we didn't use.
2664                  */
2665                 for ( ; nr_pages > num_pages; nr_pages--)
2666                         put_page(wdata->pages[nr_pages - 1]);
2667
2668                 wdata->sync_mode = WB_SYNC_ALL;
2669                 wdata->nr_pages = nr_pages;
2670                 wdata->offset = (__u64)offset;
2671                 wdata->cfile = cifsFileInfo_get(open_file);
2672                 wdata->pid = pid;
2673                 wdata->bytes = cur_len;
2674                 wdata->pagesz = PAGE_SIZE;
2675                 wdata->tailsz = cur_len - ((nr_pages - 1) * PAGE_SIZE);
2676                 wdata->credits = credits;
2677                 wdata->ctx = ctx;
2678                 kref_get(&ctx->refcount);
2679
2680                 if (!wdata->cfile->invalidHandle ||
2681                     !(rc = cifs_reopen_file(wdata->cfile, false)))
2682                         rc = server->ops->async_writev(wdata,
2683                                         cifs_uncached_writedata_release);
2684                 if (rc) {
2685                         add_credits_and_wake_if(server, wdata->credits, 0);
2686                         kref_put(&wdata->refcount,
2687                                  cifs_uncached_writedata_release);
2688                         if (rc == -EAGAIN) {
2689                                 *from = saved_from;
2690                                 iov_iter_advance(from, offset - saved_offset);
2691                                 continue;
2692                         }
2693                         break;
2694                 }
2695
2696                 list_add_tail(&wdata->list, wdata_list);
2697                 offset += cur_len;
2698                 len -= cur_len;
2699         } while (len > 0);
2700
2701         return rc;
2702 }
2703
2704 static void collect_uncached_write_data(struct cifs_aio_ctx *ctx)
2705 {
2706         struct cifs_writedata *wdata, *tmp;
2707         struct cifs_tcon *tcon;
2708         struct cifs_sb_info *cifs_sb;
2709         struct dentry *dentry = ctx->cfile->dentry;
2710         unsigned int i;
2711         int rc;
2712
2713         tcon = tlink_tcon(ctx->cfile->tlink);
2714         cifs_sb = CIFS_SB(dentry->d_sb);
2715
2716         mutex_lock(&ctx->aio_mutex);
2717
2718         if (list_empty(&ctx->list)) {
2719                 mutex_unlock(&ctx->aio_mutex);
2720                 return;
2721         }
2722
2723         rc = ctx->rc;
2724         /*
2725          * Wait for and collect replies for any successful sends in order of
2726          * increasing offset. Once an error is hit, then return without waiting
2727          * for any more replies.
2728          */
2729 restart_loop:
2730         list_for_each_entry_safe(wdata, tmp, &ctx->list, list) {
2731                 if (!rc) {
2732                         if (!try_wait_for_completion(&wdata->done)) {
2733                                 mutex_unlock(&ctx->aio_mutex);
2734                                 return;
2735                         }
2736
2737                         if (wdata->result)
2738                                 rc = wdata->result;
2739                         else
2740                                 ctx->total_len += wdata->bytes;
2741
2742                         /* resend call if it's a retryable error */
2743                         if (rc == -EAGAIN) {
2744                                 struct list_head tmp_list;
2745                                 struct iov_iter tmp_from = ctx->iter;
2746
2747                                 INIT_LIST_HEAD(&tmp_list);
2748                                 list_del_init(&wdata->list);
2749
2750                                 iov_iter_advance(&tmp_from,
2751                                                  wdata->offset - ctx->pos);
2752
2753                                 rc = cifs_write_from_iter(wdata->offset,
2754                                                 wdata->bytes, &tmp_from,
2755                                                 ctx->cfile, cifs_sb, &tmp_list,
2756                                                 ctx);
2757
2758                                 list_splice(&tmp_list, &ctx->list);
2759
2760                                 kref_put(&wdata->refcount,
2761                                          cifs_uncached_writedata_release);
2762                                 goto restart_loop;
2763                         }
2764                 }
2765                 list_del_init(&wdata->list);
2766                 kref_put(&wdata->refcount, cifs_uncached_writedata_release);
2767         }
2768
2769         for (i = 0; i < ctx->npages; i++)
2770                 put_page(ctx->bv[i].bv_page);
2771
2772         cifs_stats_bytes_written(tcon, ctx->total_len);
2773         set_bit(CIFS_INO_INVALID_MAPPING, &CIFS_I(dentry->d_inode)->flags);
2774
2775         ctx->rc = (rc == 0) ? ctx->total_len : rc;
2776
2777         mutex_unlock(&ctx->aio_mutex);
2778
2779         if (ctx->iocb && ctx->iocb->ki_complete)
2780                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
2781         else
2782                 complete(&ctx->done);
2783 }
2784
2785 ssize_t cifs_user_writev(struct kiocb *iocb, struct iov_iter *from)
2786 {
2787         struct file *file = iocb->ki_filp;
2788         ssize_t total_written = 0;
2789         struct cifsFileInfo *cfile;
2790         struct cifs_tcon *tcon;
2791         struct cifs_sb_info *cifs_sb;
2792         struct cifs_aio_ctx *ctx;
2793         struct iov_iter saved_from = *from;
2794         int rc;
2795
2796         /*
2797          * BB - optimize the way when signing is disabled. We can drop this
2798          * extra memory-to-memory copying and use iovec buffers for constructing
2799          * write request.
2800          */
2801
2802         rc = generic_write_checks(iocb, from);
2803         if (rc <= 0)
2804                 return rc;
2805
2806         cifs_sb = CIFS_FILE_SB(file);
2807         cfile = file->private_data;
2808         tcon = tlink_tcon(cfile->tlink);
2809
2810         if (!tcon->ses->server->ops->async_writev)
2811                 return -ENOSYS;
2812
2813         ctx = cifs_aio_ctx_alloc();
2814         if (!ctx)
2815                 return -ENOMEM;
2816
2817         ctx->cfile = cifsFileInfo_get(cfile);
2818
2819         if (!is_sync_kiocb(iocb))
2820                 ctx->iocb = iocb;
2821
2822         ctx->pos = iocb->ki_pos;
2823
2824         rc = setup_aio_ctx_iter(ctx, from, WRITE);
2825         if (rc) {
2826                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2827                 return rc;
2828         }
2829
2830         /* grab a lock here due to read response handlers can access ctx */
2831         mutex_lock(&ctx->aio_mutex);
2832
2833         rc = cifs_write_from_iter(iocb->ki_pos, ctx->len, &saved_from,
2834                                   cfile, cifs_sb, &ctx->list, ctx);
2835
2836         /*
2837          * If at least one write was successfully sent, then discard any rc
2838          * value from the later writes. If the other write succeeds, then
2839          * we'll end up returning whatever was written. If it fails, then
2840          * we'll get a new rc value from that.
2841          */
2842         if (!list_empty(&ctx->list))
2843                 rc = 0;
2844
2845         mutex_unlock(&ctx->aio_mutex);
2846
2847         if (rc) {
2848                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2849                 return rc;
2850         }
2851
2852         if (!is_sync_kiocb(iocb)) {
2853                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
2854                 return -EIOCBQUEUED;
2855         }
2856
2857         rc = wait_for_completion_killable(&ctx->done);
2858         if (rc) {
2859                 mutex_lock(&ctx->aio_mutex);
2860                 ctx->rc = rc = -EINTR;
2861                 total_written = ctx->total_len;
2862                 mutex_unlock(&ctx->aio_mutex);
2863         } else {
2864                 rc = ctx->rc;
2865                 total_written = ctx->total_len;
2866         }
2867
2868         kref_put(&ctx->refcount, cifs_aio_ctx_release);
2869
2870         if (unlikely(!total_written))
2871                 return rc;
2872
2873         iocb->ki_pos += total_written;
2874         return total_written;
2875 }
2876
2877 static ssize_t
2878 cifs_writev(struct kiocb *iocb, struct iov_iter *from)
2879 {
2880         struct file *file = iocb->ki_filp;
2881         struct cifsFileInfo *cfile = (struct cifsFileInfo *)file->private_data;
2882         struct inode *inode = file->f_mapping->host;
2883         struct cifsInodeInfo *cinode = CIFS_I(inode);
2884         struct TCP_Server_Info *server = tlink_tcon(cfile->tlink)->ses->server;
2885         ssize_t rc;
2886
2887         inode_lock(inode);
2888         /*
2889          * We need to hold the sem to be sure nobody modifies lock list
2890          * with a brlock that prevents writing.
2891          */
2892         down_read(&cinode->lock_sem);
2893
2894         rc = generic_write_checks(iocb, from);
2895         if (rc <= 0)
2896                 goto out;
2897
2898         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(from),
2899                                      server->vals->exclusive_lock_type, NULL,
2900                                      CIFS_WRITE_OP))
2901                 rc = __generic_file_write_iter(iocb, from);
2902         else
2903                 rc = -EACCES;
2904 out:
2905         up_read(&cinode->lock_sem);
2906         inode_unlock(inode);
2907
2908         if (rc > 0)
2909                 rc = generic_write_sync(iocb, rc);
2910         return rc;
2911 }
2912
2913 ssize_t
2914 cifs_strict_writev(struct kiocb *iocb, struct iov_iter *from)
2915 {
2916         struct inode *inode = file_inode(iocb->ki_filp);
2917         struct cifsInodeInfo *cinode = CIFS_I(inode);
2918         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
2919         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
2920                                                 iocb->ki_filp->private_data;
2921         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
2922         ssize_t written;
2923
2924         written = cifs_get_writer(cinode);
2925         if (written)
2926                 return written;
2927
2928         if (CIFS_CACHE_WRITE(cinode)) {
2929                 if (cap_unix(tcon->ses) &&
2930                 (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability))
2931                   && ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0)) {
2932                         written = generic_file_write_iter(iocb, from);
2933                         goto out;
2934                 }
2935                 written = cifs_writev(iocb, from);
2936                 goto out;
2937         }
2938         /*
2939          * For non-oplocked files in strict cache mode we need to write the data
2940          * to the server exactly from the pos to pos+len-1 rather than flush all
2941          * affected pages because it may cause a error with mandatory locks on
2942          * these pages but not on the region from pos to ppos+len-1.
2943          */
2944         written = cifs_user_writev(iocb, from);
2945         if (CIFS_CACHE_READ(cinode)) {
2946                 /*
2947                  * We have read level caching and we have just sent a write
2948                  * request to the server thus making data in the cache stale.
2949                  * Zap the cache and set oplock/lease level to NONE to avoid
2950                  * reading stale data from the cache. All subsequent read
2951                  * operations will read new data from the server.
2952                  */
2953                 cifs_zap_mapping(inode);
2954                 cifs_dbg(FYI, "Set Oplock/Lease to NONE for inode=%p after write\n",
2955                          inode);
2956                 cinode->oplock = 0;
2957         }
2958 out:
2959         cifs_put_writer(cinode);
2960         return written;
2961 }
2962
2963 static struct cifs_readdata *
2964 cifs_readdata_alloc(unsigned int nr_pages, work_func_t complete)
2965 {
2966         struct cifs_readdata *rdata;
2967
2968         rdata = kzalloc(sizeof(*rdata) + (sizeof(struct page *) * nr_pages),
2969                         GFP_KERNEL);
2970         if (rdata != NULL) {
2971                 kref_init(&rdata->refcount);
2972                 INIT_LIST_HEAD(&rdata->list);
2973                 init_completion(&rdata->done);
2974                 INIT_WORK(&rdata->work, complete);
2975         }
2976
2977         return rdata;
2978 }
2979
2980 void
2981 cifs_readdata_release(struct kref *refcount)
2982 {
2983         struct cifs_readdata *rdata = container_of(refcount,
2984                                         struct cifs_readdata, refcount);
2985
2986         if (rdata->cfile)
2987                 cifsFileInfo_put(rdata->cfile);
2988
2989         kfree(rdata);
2990 }
2991
2992 static int
2993 cifs_read_allocate_pages(struct cifs_readdata *rdata, unsigned int nr_pages)
2994 {
2995         int rc = 0;
2996         struct page *page;
2997         unsigned int i;
2998
2999         for (i = 0; i < nr_pages; i++) {
3000                 page = alloc_page(GFP_KERNEL|__GFP_HIGHMEM);
3001                 if (!page) {
3002                         rc = -ENOMEM;
3003                         break;
3004                 }
3005                 rdata->pages[i] = page;
3006         }
3007
3008         if (rc) {
3009                 unsigned int nr_page_failed = i;
3010
3011                 for (i = 0; i < nr_page_failed; i++) {
3012                         put_page(rdata->pages[i]);
3013                         rdata->pages[i] = NULL;
3014                 }
3015         }
3016         return rc;
3017 }
3018
3019 static void
3020 cifs_uncached_readdata_release(struct kref *refcount)
3021 {
3022         struct cifs_readdata *rdata = container_of(refcount,
3023                                         struct cifs_readdata, refcount);
3024         unsigned int i;
3025
3026         kref_put(&rdata->ctx->refcount, cifs_aio_ctx_release);
3027         for (i = 0; i < rdata->nr_pages; i++) {
3028                 put_page(rdata->pages[i]);
3029                 rdata->pages[i] = NULL;
3030         }
3031         cifs_readdata_release(refcount);
3032 }
3033
3034 /**
3035  * cifs_readdata_to_iov - copy data from pages in response to an iovec
3036  * @rdata:      the readdata response with list of pages holding data
3037  * @iter:       destination for our data
3038  *
3039  * This function copies data from a list of pages in a readdata response into
3040  * an array of iovecs. It will first calculate where the data should go
3041  * based on the info in the readdata and then copy the data into that spot.
3042  */
3043 static int
3044 cifs_readdata_to_iov(struct cifs_readdata *rdata, struct iov_iter *iter)
3045 {
3046         size_t remaining = rdata->got_bytes;
3047         unsigned int i;
3048
3049         for (i = 0; i < rdata->nr_pages; i++) {
3050                 struct page *page = rdata->pages[i];
3051                 size_t copy = min_t(size_t, remaining, PAGE_SIZE);
3052                 size_t written;
3053
3054                 if (unlikely(iter->type & ITER_PIPE)) {
3055                         void *addr = kmap_atomic(page);
3056
3057                         written = copy_to_iter(addr, copy, iter);
3058                         kunmap_atomic(addr);
3059                 } else
3060                         written = copy_page_to_iter(page, 0, copy, iter);
3061                 remaining -= written;
3062                 if (written < copy && iov_iter_count(iter) > 0)
3063                         break;
3064         }
3065         return remaining ? -EFAULT : 0;
3066 }
3067
3068 static void collect_uncached_read_data(struct cifs_aio_ctx *ctx);
3069
3070 static void
3071 cifs_uncached_readv_complete(struct work_struct *work)
3072 {
3073         struct cifs_readdata *rdata = container_of(work,
3074                                                 struct cifs_readdata, work);
3075
3076         complete(&rdata->done);
3077         collect_uncached_read_data(rdata->ctx);
3078         /* the below call can possibly free the last ref to aio ctx */
3079         kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3080 }
3081
3082 static int
3083 uncached_fill_pages(struct TCP_Server_Info *server,
3084                     struct cifs_readdata *rdata, struct iov_iter *iter,
3085                     unsigned int len)
3086 {
3087         int result = 0;
3088         unsigned int i;
3089         unsigned int nr_pages = rdata->nr_pages;
3090
3091         rdata->got_bytes = 0;
3092         rdata->tailsz = PAGE_SIZE;
3093         for (i = 0; i < nr_pages; i++) {
3094                 struct page *page = rdata->pages[i];
3095                 size_t n;
3096
3097                 if (len <= 0) {
3098                         /* no need to hold page hostage */
3099                         rdata->pages[i] = NULL;
3100                         rdata->nr_pages--;
3101                         put_page(page);
3102                         continue;
3103                 }
3104                 n = len;
3105                 if (len >= PAGE_SIZE) {
3106                         /* enough data to fill the page */
3107                         n = PAGE_SIZE;
3108                         len -= n;
3109                 } else {
3110                         zero_user(page, len, PAGE_SIZE - len);
3111                         rdata->tailsz = len;
3112                         len = 0;
3113                 }
3114                 if (iter)
3115                         result = copy_page_from_iter(page, 0, n, iter);
3116                 else
3117                         result = cifs_read_page_from_socket(server, page, n);
3118                 if (result < 0)
3119                         break;
3120
3121                 rdata->got_bytes += result;
3122         }
3123
3124         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3125                                                 rdata->got_bytes : result;
3126 }
3127
3128 static int
3129 cifs_uncached_read_into_pages(struct TCP_Server_Info *server,
3130                               struct cifs_readdata *rdata, unsigned int len)
3131 {
3132         return uncached_fill_pages(server, rdata, NULL, len);
3133 }
3134
3135 static int
3136 cifs_uncached_copy_into_pages(struct TCP_Server_Info *server,
3137                               struct cifs_readdata *rdata,
3138                               struct iov_iter *iter)
3139 {
3140         return uncached_fill_pages(server, rdata, iter, iter->count);
3141 }
3142
3143 static int
3144 cifs_send_async_read(loff_t offset, size_t len, struct cifsFileInfo *open_file,
3145                      struct cifs_sb_info *cifs_sb, struct list_head *rdata_list,
3146                      struct cifs_aio_ctx *ctx)
3147 {
3148         struct cifs_readdata *rdata;
3149         unsigned int npages, rsize, credits;
3150         size_t cur_len;
3151         int rc;
3152         pid_t pid;
3153         struct TCP_Server_Info *server;
3154
3155         server = tlink_tcon(open_file->tlink)->ses->server;
3156
3157         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3158                 pid = open_file->pid;
3159         else
3160                 pid = current->tgid;
3161
3162         do {
3163                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3164                                                    &rsize, &credits);
3165                 if (rc)
3166                         break;
3167
3168                 cur_len = min_t(const size_t, len, rsize);
3169                 npages = DIV_ROUND_UP(cur_len, PAGE_SIZE);
3170
3171                 /* allocate a readdata struct */
3172                 rdata = cifs_readdata_alloc(npages,
3173                                             cifs_uncached_readv_complete);
3174                 if (!rdata) {
3175                         add_credits_and_wake_if(server, credits, 0);
3176                         rc = -ENOMEM;
3177                         break;
3178                 }
3179
3180                 rc = cifs_read_allocate_pages(rdata, npages);
3181                 if (rc)
3182                         goto error;
3183
3184                 rdata->cfile = cifsFileInfo_get(open_file);
3185                 rdata->nr_pages = npages;
3186                 rdata->offset = offset;
3187                 rdata->bytes = cur_len;
3188                 rdata->pid = pid;
3189                 rdata->pagesz = PAGE_SIZE;
3190                 rdata->read_into_pages = cifs_uncached_read_into_pages;
3191                 rdata->copy_into_pages = cifs_uncached_copy_into_pages;
3192                 rdata->credits = credits;
3193                 rdata->ctx = ctx;
3194                 kref_get(&ctx->refcount);
3195
3196                 if (!rdata->cfile->invalidHandle ||
3197                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3198                         rc = server->ops->async_readv(rdata);
3199 error:
3200                 if (rc) {
3201                         add_credits_and_wake_if(server, rdata->credits, 0);
3202                         kref_put(&rdata->refcount,
3203                                  cifs_uncached_readdata_release);
3204                         if (rc == -EAGAIN)
3205                                 continue;
3206                         break;
3207                 }
3208
3209                 list_add_tail(&rdata->list, rdata_list);
3210                 offset += cur_len;
3211                 len -= cur_len;
3212         } while (len > 0);
3213
3214         return rc;
3215 }
3216
3217 static void
3218 collect_uncached_read_data(struct cifs_aio_ctx *ctx)
3219 {
3220         struct cifs_readdata *rdata, *tmp;
3221         struct iov_iter *to = &ctx->iter;
3222         struct cifs_sb_info *cifs_sb;
3223         struct cifs_tcon *tcon;
3224         unsigned int i;
3225         int rc;
3226
3227         tcon = tlink_tcon(ctx->cfile->tlink);
3228         cifs_sb = CIFS_SB(ctx->cfile->dentry->d_sb);
3229
3230         mutex_lock(&ctx->aio_mutex);
3231
3232         if (list_empty(&ctx->list)) {
3233                 mutex_unlock(&ctx->aio_mutex);
3234                 return;
3235         }
3236
3237         rc = ctx->rc;
3238         /* the loop below should proceed in the order of increasing offsets */
3239 again:
3240         list_for_each_entry_safe(rdata, tmp, &ctx->list, list) {
3241                 if (!rc) {
3242                         if (!try_wait_for_completion(&rdata->done)) {
3243                                 mutex_unlock(&ctx->aio_mutex);
3244                                 return;
3245                         }
3246
3247                         if (rdata->result == -EAGAIN) {
3248                                 /* resend call if it's a retryable error */
3249                                 struct list_head tmp_list;
3250                                 unsigned int got_bytes = rdata->got_bytes;
3251
3252                                 list_del_init(&rdata->list);
3253                                 INIT_LIST_HEAD(&tmp_list);
3254
3255                                 /*
3256                                  * Got a part of data and then reconnect has
3257                                  * happened -- fill the buffer and continue
3258                                  * reading.
3259                                  */
3260                                 if (got_bytes && got_bytes < rdata->bytes) {
3261                                         rc = cifs_readdata_to_iov(rdata, to);
3262                                         if (rc) {
3263                                                 kref_put(&rdata->refcount,
3264                                                 cifs_uncached_readdata_release);
3265                                                 continue;
3266                                         }
3267                                 }
3268
3269                                 rc = cifs_send_async_read(
3270                                                 rdata->offset + got_bytes,
3271                                                 rdata->bytes - got_bytes,
3272                                                 rdata->cfile, cifs_sb,
3273                                                 &tmp_list, ctx);
3274
3275                                 list_splice(&tmp_list, &ctx->list);
3276
3277                                 kref_put(&rdata->refcount,
3278                                          cifs_uncached_readdata_release);
3279                                 goto again;
3280                         } else if (rdata->result)
3281                                 rc = rdata->result;
3282                         else
3283                                 rc = cifs_readdata_to_iov(rdata, to);
3284
3285                         /* if there was a short read -- discard anything left */
3286                         if (rdata->got_bytes && rdata->got_bytes < rdata->bytes)
3287                                 rc = -ENODATA;
3288                 }
3289                 list_del_init(&rdata->list);
3290                 kref_put(&rdata->refcount, cifs_uncached_readdata_release);
3291         }
3292
3293         for (i = 0; i < ctx->npages; i++) {
3294                 if (ctx->should_dirty)
3295                         set_page_dirty(ctx->bv[i].bv_page);
3296                 put_page(ctx->bv[i].bv_page);
3297         }
3298
3299         ctx->total_len = ctx->len - iov_iter_count(to);
3300
3301         cifs_stats_bytes_read(tcon, ctx->total_len);
3302
3303         /* mask nodata case */
3304         if (rc == -ENODATA)
3305                 rc = 0;
3306
3307         ctx->rc = (rc == 0) ? (ssize_t)ctx->total_len : rc;
3308
3309         mutex_unlock(&ctx->aio_mutex);
3310
3311         if (ctx->iocb && ctx->iocb->ki_complete)
3312                 ctx->iocb->ki_complete(ctx->iocb, ctx->rc, 0);
3313         else
3314                 complete(&ctx->done);
3315 }
3316
3317 ssize_t cifs_user_readv(struct kiocb *iocb, struct iov_iter *to)
3318 {
3319         struct file *file = iocb->ki_filp;
3320         ssize_t rc;
3321         size_t len;
3322         ssize_t total_read = 0;
3323         loff_t offset = iocb->ki_pos;
3324         struct cifs_sb_info *cifs_sb;
3325         struct cifs_tcon *tcon;
3326         struct cifsFileInfo *cfile;
3327         struct cifs_aio_ctx *ctx;
3328
3329         len = iov_iter_count(to);
3330         if (!len)
3331                 return 0;
3332
3333         cifs_sb = CIFS_FILE_SB(file);
3334         cfile = file->private_data;
3335         tcon = tlink_tcon(cfile->tlink);
3336
3337         if (!tcon->ses->server->ops->async_readv)
3338                 return -ENOSYS;
3339
3340         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3341                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3342
3343         ctx = cifs_aio_ctx_alloc();
3344         if (!ctx)
3345                 return -ENOMEM;
3346
3347         ctx->cfile = cifsFileInfo_get(cfile);
3348
3349         if (!is_sync_kiocb(iocb))
3350                 ctx->iocb = iocb;
3351
3352         if (to->type == ITER_IOVEC)
3353                 ctx->should_dirty = true;
3354
3355         rc = setup_aio_ctx_iter(ctx, to, READ);
3356         if (rc) {
3357                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3358                 return rc;
3359         }
3360
3361         len = ctx->len;
3362
3363         /* grab a lock here due to read response handlers can access ctx */
3364         mutex_lock(&ctx->aio_mutex);
3365
3366         rc = cifs_send_async_read(offset, len, cfile, cifs_sb, &ctx->list, ctx);
3367
3368         /* if at least one read request send succeeded, then reset rc */
3369         if (!list_empty(&ctx->list))
3370                 rc = 0;
3371
3372         mutex_unlock(&ctx->aio_mutex);
3373
3374         if (rc) {
3375                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3376                 return rc;
3377         }
3378
3379         if (!is_sync_kiocb(iocb)) {
3380                 kref_put(&ctx->refcount, cifs_aio_ctx_release);
3381                 return -EIOCBQUEUED;
3382         }
3383
3384         rc = wait_for_completion_killable(&ctx->done);
3385         if (rc) {
3386                 mutex_lock(&ctx->aio_mutex);
3387                 ctx->rc = rc = -EINTR;
3388                 total_read = ctx->total_len;
3389                 mutex_unlock(&ctx->aio_mutex);
3390         } else {
3391                 rc = ctx->rc;
3392                 total_read = ctx->total_len;
3393         }
3394
3395         kref_put(&ctx->refcount, cifs_aio_ctx_release);
3396
3397         if (total_read) {
3398                 iocb->ki_pos += total_read;
3399                 return total_read;
3400         }
3401         return rc;
3402 }
3403
3404 ssize_t
3405 cifs_strict_readv(struct kiocb *iocb, struct iov_iter *to)
3406 {
3407         struct inode *inode = file_inode(iocb->ki_filp);
3408         struct cifsInodeInfo *cinode = CIFS_I(inode);
3409         struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
3410         struct cifsFileInfo *cfile = (struct cifsFileInfo *)
3411                                                 iocb->ki_filp->private_data;
3412         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
3413         int rc = -EACCES;
3414
3415         /*
3416          * In strict cache mode we need to read from the server all the time
3417          * if we don't have level II oplock because the server can delay mtime
3418          * change - so we can't make a decision about inode invalidating.
3419          * And we can also fail with pagereading if there are mandatory locks
3420          * on pages affected by this read but not on the region from pos to
3421          * pos+len-1.
3422          */
3423         if (!CIFS_CACHE_READ(cinode))
3424                 return cifs_user_readv(iocb, to);
3425
3426         if (cap_unix(tcon->ses) &&
3427             (CIFS_UNIX_FCNTL_CAP & le64_to_cpu(tcon->fsUnixInfo.Capability)) &&
3428             ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NOPOSIXBRL) == 0))
3429                 return generic_file_read_iter(iocb, to);
3430
3431         /*
3432          * We need to hold the sem to be sure nobody modifies lock list
3433          * with a brlock that prevents reading.
3434          */
3435         down_read(&cinode->lock_sem);
3436         if (!cifs_find_lock_conflict(cfile, iocb->ki_pos, iov_iter_count(to),
3437                                      tcon->ses->server->vals->shared_lock_type,
3438                                      NULL, CIFS_READ_OP))
3439                 rc = generic_file_read_iter(iocb, to);
3440         up_read(&cinode->lock_sem);
3441         return rc;
3442 }
3443
3444 static ssize_t
3445 cifs_read(struct file *file, char *read_data, size_t read_size, loff_t *offset)
3446 {
3447         int rc = -EACCES;
3448         unsigned int bytes_read = 0;
3449         unsigned int total_read;
3450         unsigned int current_read_size;
3451         unsigned int rsize;
3452         struct cifs_sb_info *cifs_sb;
3453         struct cifs_tcon *tcon;
3454         struct TCP_Server_Info *server;
3455         unsigned int xid;
3456         char *cur_offset;
3457         struct cifsFileInfo *open_file;
3458         struct cifs_io_parms io_parms;
3459         int buf_type = CIFS_NO_BUFFER;
3460         __u32 pid;
3461
3462         xid = get_xid();
3463         cifs_sb = CIFS_FILE_SB(file);
3464
3465         /* FIXME: set up handlers for larger reads and/or convert to async */
3466         rsize = min_t(unsigned int, cifs_sb->rsize, CIFSMaxBufSize);
3467
3468         if (file->private_data == NULL) {
3469                 rc = -EBADF;
3470                 free_xid(xid);
3471                 return rc;
3472         }
3473         open_file = file->private_data;
3474         tcon = tlink_tcon(open_file->tlink);
3475         server = tcon->ses->server;
3476
3477         if (!server->ops->sync_read) {
3478                 free_xid(xid);
3479                 return -ENOSYS;
3480         }
3481
3482         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3483                 pid = open_file->pid;
3484         else
3485                 pid = current->tgid;
3486
3487         if ((file->f_flags & O_ACCMODE) == O_WRONLY)
3488                 cifs_dbg(FYI, "attempting read on write only file instance\n");
3489
3490         for (total_read = 0, cur_offset = read_data; read_size > total_read;
3491              total_read += bytes_read, cur_offset += bytes_read) {
3492                 do {
3493                         current_read_size = min_t(uint, read_size - total_read,
3494                                                   rsize);
3495                         /*
3496                          * For windows me and 9x we do not want to request more
3497                          * than it negotiated since it will refuse the read
3498                          * then.
3499                          */
3500                         if (!(tcon->ses->capabilities &
3501                                 tcon->ses->server->vals->cap_large_files)) {
3502                                 current_read_size = min_t(uint,
3503                                         current_read_size, CIFSMaxBufSize);
3504                         }
3505                         if (open_file->invalidHandle) {
3506                                 rc = cifs_reopen_file(open_file, true);
3507                                 if (rc != 0)
3508                                         break;
3509                         }
3510                         io_parms.pid = pid;
3511                         io_parms.tcon = tcon;
3512                         io_parms.offset = *offset;
3513                         io_parms.length = current_read_size;
3514                         rc = server->ops->sync_read(xid, &open_file->fid, &io_parms,
3515                                                     &bytes_read, &cur_offset,
3516                                                     &buf_type);
3517                 } while (rc == -EAGAIN);
3518
3519                 if (rc || (bytes_read == 0)) {
3520                         if (total_read) {
3521                                 break;
3522                         } else {
3523                                 free_xid(xid);
3524                                 return rc;
3525                         }
3526                 } else {
3527                         cifs_stats_bytes_read(tcon, total_read);
3528                         *offset += bytes_read;
3529                 }
3530         }
3531         free_xid(xid);
3532         return total_read;
3533 }
3534
3535 /*
3536  * If the page is mmap'ed into a process' page tables, then we need to make
3537  * sure that it doesn't change while being written back.
3538  */
3539 static int
3540 cifs_page_mkwrite(struct vm_fault *vmf)
3541 {
3542         struct page *page = vmf->page;
3543
3544         lock_page(page);
3545         return VM_FAULT_LOCKED;
3546 }
3547
3548 static const struct vm_operations_struct cifs_file_vm_ops = {
3549         .fault = filemap_fault,
3550         .map_pages = filemap_map_pages,
3551         .page_mkwrite = cifs_page_mkwrite,
3552 };
3553
3554 int cifs_file_strict_mmap(struct file *file, struct vm_area_struct *vma)
3555 {
3556         int xid, rc = 0;
3557         struct inode *inode = file_inode(file);
3558
3559         xid = get_xid();
3560
3561         if (!CIFS_CACHE_READ(CIFS_I(inode)))
3562                 rc = cifs_zap_mapping(inode);
3563         if (!rc)
3564                 rc = generic_file_mmap(file, vma);
3565         if (!rc)
3566                 vma->vm_ops = &cifs_file_vm_ops;
3567
3568         free_xid(xid);
3569         return rc;
3570 }
3571
3572 int cifs_file_mmap(struct file *file, struct vm_area_struct *vma)
3573 {
3574         int rc, xid;
3575
3576         xid = get_xid();
3577
3578         rc = cifs_revalidate_file(file);
3579         if (rc)
3580                 cifs_dbg(FYI, "Validation prior to mmap failed, error=%d\n",
3581                          rc);
3582         if (!rc)
3583                 rc = generic_file_mmap(file, vma);
3584         if (!rc)
3585                 vma->vm_ops = &cifs_file_vm_ops;
3586
3587         free_xid(xid);
3588         return rc;
3589 }
3590
3591 static void
3592 cifs_readv_complete(struct work_struct *work)
3593 {
3594         unsigned int i, got_bytes;
3595         struct cifs_readdata *rdata = container_of(work,
3596                                                 struct cifs_readdata, work);
3597
3598         got_bytes = rdata->got_bytes;
3599         for (i = 0; i < rdata->nr_pages; i++) {
3600                 struct page *page = rdata->pages[i];
3601
3602                 lru_cache_add_file(page);
3603
3604                 if (rdata->result == 0 ||
3605                     (rdata->result == -EAGAIN && got_bytes)) {
3606                         flush_dcache_page(page);
3607                         SetPageUptodate(page);
3608                 }
3609
3610                 unlock_page(page);
3611
3612                 if (rdata->result == 0 ||
3613                     (rdata->result == -EAGAIN && got_bytes))
3614                         cifs_readpage_to_fscache(rdata->mapping->host, page);
3615
3616                 got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
3617
3618                 put_page(page);
3619                 rdata->pages[i] = NULL;
3620         }
3621         kref_put(&rdata->refcount, cifs_readdata_release);
3622 }
3623
3624 static int
3625 readpages_fill_pages(struct TCP_Server_Info *server,
3626                      struct cifs_readdata *rdata, struct iov_iter *iter,
3627                      unsigned int len)
3628 {
3629         int result = 0;
3630         unsigned int i;
3631         u64 eof;
3632         pgoff_t eof_index;
3633         unsigned int nr_pages = rdata->nr_pages;
3634
3635         /* determine the eof that the server (probably) has */
3636         eof = CIFS_I(rdata->mapping->host)->server_eof;
3637         eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
3638         cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
3639
3640         rdata->got_bytes = 0;
3641         rdata->tailsz = PAGE_SIZE;
3642         for (i = 0; i < nr_pages; i++) {
3643                 struct page *page = rdata->pages[i];
3644                 size_t n = PAGE_SIZE;
3645
3646                 if (len >= PAGE_SIZE) {
3647                         len -= PAGE_SIZE;
3648                 } else if (len > 0) {
3649                         /* enough for partial page, fill and zero the rest */
3650                         zero_user(page, len, PAGE_SIZE - len);
3651                         n = rdata->tailsz = len;
3652                         len = 0;
3653                 } else if (page->index > eof_index) {
3654                         /*
3655                          * The VFS will not try to do readahead past the
3656                          * i_size, but it's possible that we have outstanding
3657                          * writes with gaps in the middle and the i_size hasn't
3658                          * caught up yet. Populate those with zeroed out pages
3659                          * to prevent the VFS from repeatedly attempting to
3660                          * fill them until the writes are flushed.
3661                          */
3662                         zero_user(page, 0, PAGE_SIZE);
3663                         lru_cache_add_file(page);
3664                         flush_dcache_page(page);
3665                         SetPageUptodate(page);
3666                         unlock_page(page);
3667                         put_page(page);
3668                         rdata->pages[i] = NULL;
3669                         rdata->nr_pages--;
3670                         continue;
3671                 } else {
3672                         /* no need to hold page hostage */
3673                         lru_cache_add_file(page);
3674                         unlock_page(page);
3675                         put_page(page);
3676                         rdata->pages[i] = NULL;
3677                         rdata->nr_pages--;
3678                         continue;
3679                 }
3680
3681                 if (iter)
3682                         result = copy_page_from_iter(page, 0, n, iter);
3683                 else
3684                         result = cifs_read_page_from_socket(server, page, n);
3685                 if (result < 0)
3686                         break;
3687
3688                 rdata->got_bytes += result;
3689         }
3690
3691         return rdata->got_bytes > 0 && result != -ECONNABORTED ?
3692                                                 rdata->got_bytes : result;
3693 }
3694
3695 static int
3696 cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
3697                                struct cifs_readdata *rdata, unsigned int len)
3698 {
3699         return readpages_fill_pages(server, rdata, NULL, len);
3700 }
3701
3702 static int
3703 cifs_readpages_copy_into_pages(struct TCP_Server_Info *server,
3704                                struct cifs_readdata *rdata,
3705                                struct iov_iter *iter)
3706 {
3707         return readpages_fill_pages(server, rdata, iter, iter->count);
3708 }
3709
3710 static int
3711 readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
3712                     unsigned int rsize, struct list_head *tmplist,
3713                     unsigned int *nr_pages, loff_t *offset, unsigned int *bytes)
3714 {
3715         struct page *page, *tpage;
3716         unsigned int expected_index;
3717         int rc;
3718         gfp_t gfp = readahead_gfp_mask(mapping);
3719
3720         INIT_LIST_HEAD(tmplist);
3721
3722         page = list_entry(page_list->prev, struct page, lru);
3723
3724         /*
3725          * Lock the page and put it in the cache. Since no one else
3726          * should have access to this page, we're safe to simply set
3727          * PG_locked without checking it first.
3728          */
3729         __SetPageLocked(page);
3730         rc = add_to_page_cache_locked(page, mapping,
3731                                       page->index, gfp);
3732
3733         /* give up if we can't stick it in the cache */
3734         if (rc) {
3735                 __ClearPageLocked(page);
3736                 return rc;
3737         }
3738
3739         /* move first page to the tmplist */
3740         *offset = (loff_t)page->index << PAGE_SHIFT;
3741         *bytes = PAGE_SIZE;
3742         *nr_pages = 1;
3743         list_move_tail(&page->lru, tmplist);
3744
3745         /* now try and add more pages onto the request */
3746         expected_index = page->index + 1;
3747         list_for_each_entry_safe_reverse(page, tpage, page_list, lru) {
3748                 /* discontinuity ? */
3749                 if (page->index != expected_index)
3750                         break;
3751
3752                 /* would this page push the read over the rsize? */
3753                 if (*bytes + PAGE_SIZE > rsize)
3754                         break;
3755
3756                 __SetPageLocked(page);
3757                 rc = add_to_page_cache_locked(page, mapping, page->index, gfp);
3758                 if (rc) {
3759                         __ClearPageLocked(page);
3760                         break;
3761                 }
3762                 list_move_tail(&page->lru, tmplist);
3763                 (*bytes) += PAGE_SIZE;
3764                 expected_index++;
3765                 (*nr_pages)++;
3766         }
3767         return rc;
3768 }
3769
3770 static int cifs_readpages(struct file *file, struct address_space *mapping,
3771         struct list_head *page_list, unsigned num_pages)
3772 {
3773         int rc;
3774         int err = 0;
3775         struct list_head tmplist;
3776         struct cifsFileInfo *open_file = file->private_data;
3777         struct cifs_sb_info *cifs_sb = CIFS_FILE_SB(file);
3778         struct TCP_Server_Info *server;
3779         pid_t pid;
3780
3781         /*
3782          * Reads as many pages as possible from fscache. Returns -ENOBUFS
3783          * immediately if the cookie is negative
3784          *
3785          * After this point, every page in the list might have PG_fscache set,
3786          * so we will need to clean that up off of every page we don't use.
3787          */
3788         rc = cifs_readpages_from_fscache(mapping->host, mapping, page_list,
3789                                          &num_pages);
3790         if (rc == 0)
3791                 return rc;
3792
3793         if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD)
3794                 pid = open_file->pid;
3795         else
3796                 pid = current->tgid;
3797
3798         rc = 0;
3799         server = tlink_tcon(open_file->tlink)->ses->server;
3800
3801         cifs_dbg(FYI, "%s: file=%p mapping=%p num_pages=%u\n",
3802                  __func__, file, mapping, num_pages);
3803
3804         /*
3805          * Start with the page at end of list and move it to private
3806          * list. Do the same with any following pages until we hit
3807          * the rsize limit, hit an index discontinuity, or run out of
3808          * pages. Issue the async read and then start the loop again
3809          * until the list is empty.
3810          *
3811          * Note that list order is important. The page_list is in
3812          * the order of declining indexes. When we put the pages in
3813          * the rdata->pages, then we want them in increasing order.
3814          */
3815         while (!list_empty(page_list) && !err) {
3816                 unsigned int i, nr_pages, bytes, rsize;
3817                 loff_t offset;
3818                 struct page *page, *tpage;
3819                 struct cifs_readdata *rdata;
3820                 unsigned credits;
3821
3822                 rc = server->ops->wait_mtu_credits(server, cifs_sb->rsize,
3823                                                    &rsize, &credits);
3824                 if (rc)
3825                         break;
3826
3827                 /*
3828                  * Give up immediately if rsize is too small to read an entire
3829                  * page. The VFS will fall back to readpage. We should never
3830                  * reach this point however since we set ra_pages to 0 when the
3831                  * rsize is smaller than a cache page.
3832                  */
3833                 if (unlikely(rsize < PAGE_SIZE)) {
3834                         add_credits_and_wake_if(server, credits, 0);
3835                         return 0;
3836                 }
3837
3838                 nr_pages = 0;
3839                 err = readpages_get_pages(mapping, page_list, rsize, &tmplist,
3840                                          &nr_pages, &offset, &bytes);
3841                 if (!nr_pages) {
3842                         add_credits_and_wake_if(server, credits, 0);
3843                         break;
3844                 }
3845
3846                 rdata = cifs_readdata_alloc(nr_pages, cifs_readv_complete);
3847                 if (!rdata) {
3848                         /* best to give up if we're out of mem */
3849                         list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3850                                 list_del(&page->lru);
3851                                 lru_cache_add_file(page);
3852                                 unlock_page(page);
3853                                 put_page(page);
3854                         }
3855                         rc = -ENOMEM;
3856                         add_credits_and_wake_if(server, credits, 0);
3857                         break;
3858                 }
3859
3860                 rdata->cfile = cifsFileInfo_get(open_file);
3861                 rdata->mapping = mapping;
3862                 rdata->offset = offset;
3863                 rdata->bytes = bytes;
3864                 rdata->pid = pid;
3865                 rdata->pagesz = PAGE_SIZE;
3866                 rdata->read_into_pages = cifs_readpages_read_into_pages;
3867                 rdata->copy_into_pages = cifs_readpages_copy_into_pages;
3868                 rdata->credits = credits;
3869
3870                 list_for_each_entry_safe(page, tpage, &tmplist, lru) {
3871                         list_del(&page->lru);
3872                         rdata->pages[rdata->nr_pages++] = page;
3873                 }
3874
3875                 if (!rdata->cfile->invalidHandle ||
3876                     !(rc = cifs_reopen_file(rdata->cfile, true)))
3877                         rc = server->ops->async_readv(rdata);
3878                 if (rc) {
3879                         add_credits_and_wake_if(server, rdata->credits, 0);
3880                         for (i = 0; i < rdata->nr_pages; i++) {
3881                                 page = rdata->pages[i];
3882                                 lru_cache_add_file(page);
3883                                 unlock_page(page);
3884                                 put_page(page);
3885                         }
3886                         /* Fallback to the readpage in error/reconnect cases */
3887                         kref_put(&rdata->refcount, cifs_readdata_release);
3888                         break;
3889                 }
3890
3891                 kref_put(&rdata->refcount, cifs_readdata_release);
3892         }
3893
3894         /* Any pages that have been shown to fscache but didn't get added to
3895          * the pagecache must be uncached before they get returned to the
3896          * allocator.
3897          */
3898         cifs_fscache_readpages_cancel(mapping->host, page_list);
3899         return rc;
3900 }
3901
3902 /*
3903  * cifs_readpage_worker must be called with the page pinned
3904  */
3905 static int cifs_readpage_worker(struct file *file, struct page *page,
3906         loff_t *poffset)
3907 {
3908         char *read_data;
3909         int rc;
3910
3911         /* Is the page cached? */
3912         rc = cifs_readpage_from_fscache(file_inode(file), page);
3913         if (rc == 0)
3914                 goto read_complete;
3915
3916         read_data = kmap(page);
3917         /* for reads over a certain size could initiate async read ahead */
3918
3919         rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
3920
3921         if (rc < 0)
3922                 goto io_error;
3923         else
3924                 cifs_dbg(FYI, "Bytes read %d\n", rc);
3925
3926         file_inode(file)->i_atime =
3927                 current_time(file_inode(file));
3928
3929         if (PAGE_SIZE > rc)
3930                 memset(read_data + rc, 0, PAGE_SIZE - rc);
3931
3932         flush_dcache_page(page);
3933         SetPageUptodate(page);
3934
3935         /* send this page to the cache */
3936         cifs_readpage_to_fscache(file_inode(file), page);
3937
3938         rc = 0;
3939
3940 io_error:
3941         kunmap(page);
3942         unlock_page(page);
3943
3944 read_complete:
3945         return rc;
3946 }
3947
3948 static int cifs_readpage(struct file *file, struct page *page)
3949 {
3950         loff_t offset = (loff_t)page->index << PAGE_SHIFT;
3951         int rc = -EACCES;
3952         unsigned int xid;
3953
3954         xid = get_xid();
3955
3956         if (file->private_data == NULL) {
3957                 rc = -EBADF;
3958                 free_xid(xid);
3959                 return rc;
3960         }
3961
3962         cifs_dbg(FYI, "readpage %p at offset %d 0x%x\n",
3963                  page, (int)offset, (int)offset);
3964
3965         rc = cifs_readpage_worker(file, page, &offset);
3966
3967         free_xid(xid);
3968         return rc;
3969 }
3970
3971 static int is_inode_writable(struct cifsInodeInfo *cifs_inode)
3972 {
3973         struct cifsFileInfo *open_file;
3974         struct cifs_tcon *tcon =
3975                 cifs_sb_master_tcon(CIFS_SB(cifs_inode->vfs_inode.i_sb));
3976
3977         spin_lock(&tcon->open_file_lock);
3978         list_for_each_entry(open_file, &cifs_inode->openFileList, flist) {
3979                 if (OPEN_FMODE(open_file->f_flags) & FMODE_WRITE) {
3980                         spin_unlock(&tcon->open_file_lock);
3981                         return 1;
3982                 }
3983         }
3984         spin_unlock(&tcon->open_file_lock);
3985         return 0;
3986 }
3987
3988 /* We do not want to update the file size from server for inodes
3989    open for write - to avoid races with writepage extending
3990    the file - in the future we could consider allowing
3991    refreshing the inode only on increases in the file size
3992    but this is tricky to do without racing with writebehind
3993    page caching in the current Linux kernel design */
3994 bool is_size_safe_to_change(struct cifsInodeInfo *cifsInode, __u64 end_of_file)
3995 {
3996         if (!cifsInode)
3997                 return true;
3998
3999         if (is_inode_writable(cifsInode)) {
4000                 /* This inode is open for write at least once */
4001                 struct cifs_sb_info *cifs_sb;
4002
4003                 cifs_sb = CIFS_SB(cifsInode->vfs_inode.i_sb);
4004                 if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_DIRECT_IO) {
4005                         /* since no page cache to corrupt on directio
4006                         we can change size safely */
4007                         return true;
4008                 }
4009
4010                 if (i_size_read(&cifsInode->vfs_inode) < end_of_file)
4011                         return true;
4012
4013                 return false;
4014         } else
4015                 return true;
4016 }
4017
4018 static int cifs_write_begin(struct file *file, struct address_space *mapping,
4019                         loff_t pos, unsigned len, unsigned flags,
4020                         struct page **pagep, void **fsdata)
4021 {
4022         int oncethru = 0;
4023         pgoff_t index = pos >> PAGE_SHIFT;
4024         loff_t offset = pos & (PAGE_SIZE - 1);
4025         loff_t page_start = pos & PAGE_MASK;
4026         loff_t i_size;
4027         struct page *page;
4028         int rc = 0;
4029
4030         cifs_dbg(FYI, "write_begin from %lld len %d\n", (long long)pos, len);
4031
4032 start:
4033         page = grab_cache_page_write_begin(mapping, index, flags);
4034         if (!page) {
4035                 rc = -ENOMEM;
4036                 goto out;
4037         }
4038
4039         if (PageUptodate(page))
4040                 goto out;
4041
4042         /*
4043          * If we write a full page it will be up to date, no need to read from
4044          * the server. If the write is short, we'll end up doing a sync write
4045          * instead.
4046          */
4047         if (len == PAGE_SIZE)
4048                 goto out;
4049
4050         /*
4051          * optimize away the read when we have an oplock, and we're not
4052          * expecting to use any of the data we'd be reading in. That
4053          * is, when the page lies beyond the EOF, or straddles the EOF
4054          * and the write will cover all of the existing data.
4055          */
4056         if (CIFS_CACHE_READ(CIFS_I(mapping->host))) {
4057                 i_size = i_size_read(mapping->host);
4058                 if (page_start >= i_size ||
4059                     (offset == 0 && (pos + len) >= i_size)) {
4060                         zero_user_segments(page, 0, offset,
4061                                            offset + len,
4062                                            PAGE_SIZE);
4063                         /*
4064                          * PageChecked means that the parts of the page
4065                          * to which we're not writing are considered up
4066                          * to date. Once the data is copied to the
4067                          * page, it can be set uptodate.
4068                          */
4069                         SetPageChecked(page);
4070                         goto out;
4071                 }
4072         }
4073
4074         if ((file->f_flags & O_ACCMODE) != O_WRONLY && !oncethru) {
4075                 /*
4076                  * might as well read a page, it is fast enough. If we get
4077                  * an error, we don't need to return it. cifs_write_end will
4078                  * do a sync write instead since PG_uptodate isn't set.
4079                  */
4080                 cifs_readpage_worker(file, page, &page_start);
4081                 put_page(page);
4082                 oncethru = 1;
4083                 goto start;
4084         } else {
4085                 /* we could try using another file handle if there is one -
4086                    but how would we lock it to prevent close of that handle
4087                    racing with this read? In any case
4088                    this will be written out by write_end so is fine */
4089         }
4090 out:
4091         *pagep = page;
4092         return rc;
4093 }
4094
4095 static int cifs_release_page(struct page *page, gfp_t gfp)
4096 {
4097         if (PagePrivate(page))
4098                 return 0;
4099
4100         return cifs_fscache_release_page(page, gfp);
4101 }
4102
4103 static void cifs_invalidate_page(struct page *page, unsigned int offset,
4104                                  unsigned int length)
4105 {
4106         struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
4107
4108         if (offset == 0 && length == PAGE_SIZE)
4109                 cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
4110 }
4111
4112 static int cifs_launder_page(struct page *page)
4113 {
4114         int rc = 0;
4115         loff_t range_start = page_offset(page);
4116         loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
4117         struct writeback_control wbc = {
4118                 .sync_mode = WB_SYNC_ALL,
4119                 .nr_to_write = 0,
4120                 .range_start = range_start,
4121                 .range_end = range_end,
4122         };
4123
4124         cifs_dbg(FYI, "Launder page: %p\n", page);
4125
4126         if (clear_page_dirty_for_io(page))
4127                 rc = cifs_writepage_locked(page, &wbc);
4128
4129         cifs_fscache_invalidate_page(page, page->mapping->host);
4130         return rc;
4131 }
4132
4133 void cifs_oplock_break(struct work_struct *work)
4134 {
4135         struct cifsFileInfo *cfile = container_of(work, struct cifsFileInfo,
4136                                                   oplock_break);
4137         struct inode *inode = d_inode(cfile->dentry);
4138         struct cifsInodeInfo *cinode = CIFS_I(inode);
4139         struct cifs_tcon *tcon = tlink_tcon(cfile->tlink);
4140         struct TCP_Server_Info *server = tcon->ses->server;
4141         int rc = 0;
4142         bool purge_cache = false;
4143
4144         wait_on_bit(&cinode->flags, CIFS_INODE_PENDING_WRITERS,
4145                         TASK_UNINTERRUPTIBLE);
4146
4147         server->ops->downgrade_oplock(server, cinode, cfile->oplock_level,
4148                                       cfile->oplock_epoch, &purge_cache);
4149
4150         if (!CIFS_CACHE_WRITE(cinode) && CIFS_CACHE_READ(cinode) &&
4151                                                 cifs_has_mand_locks(cinode)) {
4152                 cifs_dbg(FYI, "Reset oplock to None for inode=%p due to mand locks\n",
4153                          inode);
4154                 cinode->oplock = 0;
4155         }
4156
4157         if (inode && S_ISREG(inode->i_mode)) {
4158                 if (CIFS_CACHE_READ(cinode))
4159                         break_lease(inode, O_RDONLY);
4160                 else
4161                         break_lease(inode, O_WRONLY);
4162                 rc = filemap_fdatawrite(inode->i_mapping);
4163                 if (!CIFS_CACHE_READ(cinode) || purge_cache) {
4164                         rc = filemap_fdatawait(inode->i_mapping);
4165                         mapping_set_error(inode->i_mapping, rc);
4166                         cifs_zap_mapping(inode);
4167                 }
4168                 cifs_dbg(FYI, "Oplock flush inode %p rc %d\n", inode, rc);
4169                 if (CIFS_CACHE_WRITE(cinode))
4170                         goto oplock_break_ack;
4171         }
4172
4173         rc = cifs_push_locks(cfile);
4174         if (rc)
4175                 cifs_dbg(VFS, "Push locks rc = %d\n", rc);
4176
4177 oplock_break_ack:
4178         /*
4179          * releasing stale oplock after recent reconnect of smb session using
4180          * a now incorrect file handle is not a data integrity issue but do
4181          * not bother sending an oplock release if session to server still is
4182          * disconnected since oplock already released by the server
4183          */
4184         if (!cfile->oplock_break_cancelled) {
4185                 rc = tcon->ses->server->ops->oplock_response(tcon, &cfile->fid,
4186                                                              cinode);
4187                 cifs_dbg(FYI, "Oplock release rc = %d\n", rc);
4188         }
4189         _cifsFileInfo_put(cfile, false /* do not wait for ourself */);
4190         cifs_done_oplock_break(cinode);
4191 }
4192
4193 /*
4194  * The presence of cifs_direct_io() in the address space ops vector
4195  * allowes open() O_DIRECT flags which would have failed otherwise.
4196  *
4197  * In the non-cached mode (mount with cache=none), we shunt off direct read and write requests
4198  * so this method should never be called.
4199  *
4200  * Direct IO is not yet supported in the cached mode. 
4201  */
4202 static ssize_t
4203 cifs_direct_io(struct kiocb *iocb, struct iov_iter *iter)
4204 {
4205         /*
4206          * FIXME
4207          * Eventually need to support direct IO for non forcedirectio mounts
4208          */
4209         return -EINVAL;
4210 }
4211
4212
4213 const struct address_space_operations cifs_addr_ops = {
4214         .readpage = cifs_readpage,
4215         .readpages = cifs_readpages,
4216         .writepage = cifs_writepage,
4217         .writepages = cifs_writepages,
4218         .write_begin = cifs_write_begin,
4219         .write_end = cifs_write_end,
4220         .set_page_dirty = __set_page_dirty_nobuffers,
4221         .releasepage = cifs_release_page,
4222         .direct_IO = cifs_direct_io,
4223         .invalidatepage = cifs_invalidate_page,
4224         .launder_page = cifs_launder_page,
4225 };
4226
4227 /*
4228  * cifs_readpages requires the server to support a buffer large enough to
4229  * contain the header plus one complete page of data.  Otherwise, we need
4230  * to leave cifs_readpages out of the address space operations.
4231  */
4232 const struct address_space_operations cifs_addr_ops_smallbuf = {
4233         .readpage = cifs_readpage,
4234         .writepage = cifs_writepage,
4235         .writepages = cifs_writepages,
4236         .write_begin = cifs_write_begin,
4237         .write_end = cifs_write_end,
4238         .set_page_dirty = __set_page_dirty_nobuffers,
4239         .releasepage = cifs_release_page,
4240         .invalidatepage = cifs_invalidate_page,
4241         .launder_page = cifs_launder_page,
4242 };