Commit 04de788e authored by Linus Torvalds's avatar Linus Torvalds

Merge tag 'nfs-for-5.7-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs

Pull NFS client updates from Trond Myklebust:
 "Highlights include:

  Stable fixes:
   - Fix a page leak in nfs_destroy_unlinked_subrequests()

   - Fix use-after-free issues in nfs_pageio_add_request()

   - Fix new mount code constant_table array definitions

   - finish_automount() requires us to hold 2 refs to the mount record

  Features:
   - Improve the accuracy of telldir/seekdir by using 64-bit cookies
     when possible.

   - Allow one RDMA active connection and several zombie connections to
     prevent blocking if the remote server is unresponsive.

   - Limit the size of the NFS access cache by default

   - Reduce the number of references to credentials that are taken by
     NFS

   - pNFS files and flexfiles drivers now support per-layout segment
     COMMIT lists.

   - Enable partial-file layout segments in the pNFS/flexfiles driver.

   - Add support for CB_RECALL_ANY to the pNFS flexfiles layout type

   - pNFS/flexfiles Report NFS4ERR_DELAY and NFS4ERR_GRACE errors from
     the DS using the layouterror mechanism.

  Bugfixes and cleanups:
   - SUNRPC: Fix krb5p regressions

   - Don't specify NFS version in "UDP not supported" error

   - nfsroot: set tcp as the default transport protocol

   - pnfs: Return valid stateids in nfs_layout_find_inode_by_stateid()

   - alloc_nfs_open_context() must use the file cred when available

   - Fix locking when dereferencing the delegation cred

   - Fix memory leaks in O_DIRECT when nfs_get_lock_context() fails

   - Various clean ups of the NFS O_DIRECT commit code

   - Clean up RDMA connect/disconnect

   - Replace zero-length arrays with C99-style flexible arrays"

* tag 'nfs-for-5.7-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (86 commits)
  NFS: Clean up process of marking inode stale.
  SUNRPC: Don't start a timer on an already queued rpc task
  NFS/pnfs: Reference the layout cred in pnfs_prepare_layoutreturn()
  NFS/pnfs: Fix dereference of layout cred in pnfs_layoutcommit_inode()
  NFS: Beware when dereferencing the delegation cred
  NFS: Add a module parameter to set nfs_mountpoint_expiry_timeout
  NFS: finish_automount() requires us to hold 2 refs to the mount record
  NFS: Fix a few constant_table array definitions
  NFS: Try to join page groups before an O_DIRECT retransmission
  NFS: Refactor nfs_lock_and_join_requests()
  NFS: Reverse the submission order of requests in __nfs_pageio_add_request()
  NFS: Clean up nfs_lock_and_join_requests()
  NFS: Remove the redundant function nfs_pgio_has_mirroring()
  NFS: Fix memory leaks in nfs_pageio_stop_mirroring()
  NFS: Fix a request reference leak in nfs_direct_write_clear_reqs()
  NFS: Fix use-after-free issues in nfs_pageio_add_request()
  NFS: Fix races nfs_page_group_destroy() vs nfs_destroy_unlinked_subrequests()
  NFS: Fix a page leak in nfs_destroy_unlinked_subrequests()
  NFS: Remove unused FLUSH_SYNC support in nfs_initiate_pgio()
  pNFS/flexfiles: Specify the layout segment range in LAYOUTGET
  ...
parents f40f31ca 93ce4af7
......@@ -476,7 +476,7 @@ static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
err = ext_tree_remove(bl, true, 0, LLONG_MAX);
WARN_ON(err);
kfree(bl);
kfree_rcu(bl, bl_layout.plh_rcu);
}
static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
......
......@@ -127,7 +127,9 @@ extern __be32 nfs4_callback_sequence(void *argp, void *resp,
#define RCA4_TYPE_MASK_OBJ_LAYOUT_MAX 9
#define RCA4_TYPE_MASK_OTHER_LAYOUT_MIN 12
#define RCA4_TYPE_MASK_OTHER_LAYOUT_MAX 15
#define RCA4_TYPE_MASK_ALL 0xf31f
#define PNFS_FF_RCA4_TYPE_MASK_READ 16
#define PNFS_FF_RCA4_TYPE_MASK_RW 17
#define RCA4_TYPE_MASK_ALL 0x3f31f
struct cb_recallanyargs {
uint32_t craa_objs_to_keep;
......
......@@ -121,31 +121,31 @@ __be32 nfs4_callback_recall(void *argp, void *resp,
*/
static struct inode *nfs_layout_find_inode_by_stateid(struct nfs_client *clp,
const nfs4_stateid *stateid)
__must_hold(RCU)
{
struct nfs_server *server;
struct inode *inode;
struct pnfs_layout_hdr *lo;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) {
list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
if (!pnfs_layout_is_valid(lo))
continue;
if (stateid != NULL &&
!nfs4_stateid_match_other(stateid, &lo->plh_stateid))
continue;
if (!nfs_sb_active(server->super))
continue;
inode = igrab(lo->plh_inode);
if (!inode)
return ERR_PTR(-EAGAIN);
if (!nfs_sb_active(inode->i_sb)) {
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
iput(inode);
spin_lock(&clp->cl_lock);
rcu_read_lock();
return ERR_PTR(-EAGAIN);
}
return inode;
rcu_read_unlock();
if (inode)
return inode;
nfs_sb_deactive(server->super);
return ERR_PTR(-EAGAIN);
}
}
rcu_read_unlock();
return ERR_PTR(-ENOENT);
}
......@@ -163,28 +163,25 @@ static struct inode *nfs_layout_find_inode_by_fh(struct nfs_client *clp,
struct inode *inode;
struct pnfs_layout_hdr *lo;
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry(lo, &server->layouts, plh_layouts) {
list_for_each_entry_rcu(lo, &server->layouts, plh_layouts) {
nfsi = NFS_I(lo->plh_inode);
if (nfs_compare_fh(fh, &nfsi->fh))
continue;
if (nfsi->layout != lo)
continue;
if (!nfs_sb_active(server->super))
continue;
inode = igrab(lo->plh_inode);
if (!inode)
return ERR_PTR(-EAGAIN);
if (!nfs_sb_active(inode->i_sb)) {
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
iput(inode);
spin_lock(&clp->cl_lock);
rcu_read_lock();
return ERR_PTR(-EAGAIN);
}
return inode;
rcu_read_unlock();
if (inode)
return inode;
nfs_sb_deactive(server->super);
return ERR_PTR(-EAGAIN);
}
}
rcu_read_unlock();
return ERR_PTR(-ENOENT);
}
......@@ -194,14 +191,9 @@ static struct inode *nfs_layout_find_inode(struct nfs_client *clp,
{
struct inode *inode;
spin_lock(&clp->cl_lock);
rcu_read_lock();
inode = nfs_layout_find_inode_by_stateid(clp, stateid);
if (inode == ERR_PTR(-ENOENT))
inode = nfs_layout_find_inode_by_fh(clp, fh);
rcu_read_unlock();
spin_unlock(&clp->cl_lock);
return inode;
}
......@@ -280,7 +272,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
goto unlock;
}
pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
pnfs_set_layout_stateid(lo, &args->cbl_stateid, NULL, true);
switch (pnfs_mark_matching_lsegs_return(lo, &free_me_list,
&args->cbl_range,
be32_to_cpu(args->cbl_stateid.seqid))) {
......@@ -605,6 +597,7 @@ __be32 nfs4_callback_recallany(void *argp, void *resp,
struct cb_recallanyargs *args = argp;
__be32 status;
fmode_t flags = 0;
bool schedule_manager = false;
status = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
if (!cps->clp) /* set in cb_sequence */
......@@ -627,6 +620,18 @@ __be32 nfs4_callback_recallany(void *argp, void *resp,
if (args->craa_type_mask & BIT(RCA4_TYPE_MASK_FILE_LAYOUT))
pnfs_recall_all_layouts(cps->clp);
if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_READ)) {
set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_READ, &cps->clp->cl_state);
schedule_manager = true;
}
if (args->craa_type_mask & BIT(PNFS_FF_RCA4_TYPE_MASK_RW)) {
set_bit(NFS4CLNT_RECALL_ANY_LAYOUT_RW, &cps->clp->cl_state);
schedule_manager = true;
}
if (schedule_manager)
nfs4_schedule_state_manager(cps->clp);
out:
dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
return status;
......
......@@ -377,6 +377,18 @@ nfs_inode_detach_delegation(struct inode *inode)
return delegation;
}
static void
nfs_update_delegation_cred(struct nfs_delegation *delegation,
const struct cred *cred)
{
const struct cred *old;
if (cred_fscmp(delegation->cred, cred) != 0) {
old = xchg(&delegation->cred, get_cred(cred));
put_cred(old);
}
}
static void
nfs_update_inplace_delegation(struct nfs_delegation *delegation,
const struct nfs_delegation *update)
......@@ -385,8 +397,14 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
delegation->stateid.seqid = update->stateid.seqid;
smp_wmb();
delegation->type = update->type;
if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
delegation->pagemod_limit = update->pagemod_limit;
if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
delegation->change_attr = update->change_attr;
nfs_update_delegation_cred(delegation, update->cred);
/* smp_mb__before_atomic() is implicit due to xchg() */
clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
atomic_long_inc(&nfs_active_delegations);
}
}
}
......@@ -545,21 +563,11 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
return ret;
}
/**
* nfs_client_return_marked_delegations - return previously marked delegations
* @clp: nfs_client to process
*
* Note that this function is designed to be called by the state
* manager thread. For this reason, it cannot flush the dirty data,
* since that could deadlock in case of a state recovery error.
*
* Returns zero on success, or a negative errno value.
*/
int nfs_client_return_marked_delegations(struct nfs_client *clp)
static int nfs_server_return_marked_delegations(struct nfs_server *server,
void __always_unused *data)
{
struct nfs_delegation *delegation;
struct nfs_delegation *prev;
struct nfs_server *server;
struct inode *inode;
struct inode *place_holder = NULL;
struct nfs_delegation *place_holder_deleg = NULL;
......@@ -569,78 +577,79 @@ int nfs_client_return_marked_delegations(struct nfs_client *clp)
/*
* To avoid quadratic looping we hold a reference
* to an inode place_holder. Each time we restart, we
* list nfs_servers from the server of that inode, and
* delegation in the server from the delegations of that
* inode.
* list delegation in the server from the delegations
* of that inode.
* prev is an RCU-protected pointer to a delegation which
* wasn't marked for return and might be a good choice for
* the next place_holder.
*/
rcu_read_lock();
prev = NULL;
delegation = NULL;
rcu_read_lock();
if (place_holder)
server = NFS_SERVER(place_holder);
else
server = list_entry_rcu(clp->cl_superblocks.next,
struct nfs_server, client_link);
list_for_each_entry_from_rcu(server, &clp->cl_superblocks, client_link) {
delegation = NULL;
if (place_holder && server == NFS_SERVER(place_holder))
delegation = rcu_dereference(NFS_I(place_holder)->delegation);
if (!delegation || delegation != place_holder_deleg)
delegation = list_entry_rcu(server->delegations.next,
struct nfs_delegation, super_list);
list_for_each_entry_from_rcu(delegation, &server->delegations, super_list) {
struct inode *to_put = NULL;
if (!nfs_delegation_need_return(delegation)) {
delegation = rcu_dereference(NFS_I(place_holder)->delegation);
if (!delegation || delegation != place_holder_deleg)
delegation = list_entry_rcu(server->delegations.next,
struct nfs_delegation, super_list);
list_for_each_entry_from_rcu(delegation, &server->delegations, super_list) {
struct inode *to_put = NULL;
if (test_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags))
continue;
if (!nfs_delegation_need_return(delegation)) {
if (nfs4_is_valid_delegation(delegation, 0))
prev = delegation;
continue;
}
if (!nfs_sb_active(server->super))
break; /* continue in outer loop */
if (prev) {
struct inode *tmp;
tmp = nfs_delegation_grab_inode(prev);
if (tmp) {
to_put = place_holder;
place_holder = tmp;
place_holder_deleg = prev;
}
}
continue;
}
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL) {
rcu_read_unlock();
if (to_put)
iput(to_put);
nfs_sb_deactive(server->super);
goto restart;
if (prev) {
struct inode *tmp = nfs_delegation_grab_inode(prev);
if (tmp) {
to_put = place_holder;
place_holder = tmp;
place_holder_deleg = prev;
}
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
}
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL) {
rcu_read_unlock();
iput(to_put);
goto restart;
}
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
if (to_put)
iput(to_put);
iput(to_put);
err = nfs_end_delegation_return(inode, delegation, 0);
iput(inode);
nfs_sb_deactive(server->super);
cond_resched();
if (!err)
goto restart;
set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state);
if (place_holder)
iput(place_holder);
return err;
}
err = nfs_end_delegation_return(inode, delegation, 0);
iput(inode);
cond_resched();
if (!err)
goto restart;
set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
goto out;
}
rcu_read_unlock();
if (place_holder)
iput(place_holder);
return 0;
out:
iput(place_holder);
return err;
}
/**
* nfs_client_return_marked_delegations - return previously marked delegations
* @clp: nfs_client to process
*
* Note that this function is designed to be called by the state
* manager thread. For this reason, it cannot flush the dirty data,
* since that could deadlock in case of a state recovery error.
*
* Returns zero on success, or a negative errno value.
*/
int nfs_client_return_marked_delegations(struct nfs_client *clp)
{
return nfs_client_for_each_server(clp,
nfs_server_return_marked_delegations, NULL);
}
/**
......@@ -1083,53 +1092,51 @@ void nfs_delegation_mark_reclaim(struct nfs_client *clp)
rcu_read_unlock();
}
/**
* nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done
* @clp: nfs_client to process
*
*/
void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
static int nfs_server_reap_unclaimed_delegations(struct nfs_server *server,
void __always_unused *data)
{
struct nfs_delegation *delegation;
struct nfs_server *server;
struct inode *inode;
restart:
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags) == 0)
continue;
if (!nfs_sb_active(server->super))
break; /* continue in outer loop */
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL) {
rcu_read_unlock();
nfs_sb_deactive(server->super);
goto restart;
}
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
if (delegation != NULL) {
if (nfs_detach_delegation(NFS_I(inode), delegation,
server) != NULL)
nfs_free_delegation(delegation);
/* Match nfs_start_delegation_return_locked */
nfs_put_delegation(delegation);
}
iput(inode);
nfs_sb_deactive(server->super);
cond_resched();
goto restart;
restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_NEED_RECLAIM,
&delegation->flags) == 0)
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
goto restart_locked;
delegation = nfs_start_delegation_return_locked(NFS_I(inode));
rcu_read_unlock();
if (delegation != NULL) {
if (nfs_detach_delegation(NFS_I(inode), delegation,
server) != NULL)
nfs_free_delegation(delegation);
/* Match nfs_start_delegation_return_locked */
nfs_put_delegation(delegation);
}
iput(inode);
cond_resched();
goto restart;
}
rcu_read_unlock();
return 0;
}
/**
* nfs_delegation_reap_unclaimed - reap unclaimed delegations after reboot recovery is done
* @clp: nfs_client to process
*
*/
void nfs_delegation_reap_unclaimed(struct nfs_client *clp)
{
nfs_client_for_each_server(clp, nfs_server_reap_unclaimed_delegations,
NULL);
}
static inline bool nfs4_server_rebooted(const struct nfs_client *clp)
......@@ -1215,62 +1222,61 @@ nfs_delegation_test_free_expired(struct inode *inode,
nfs_remove_bad_delegation(inode, stateid);
}
/**
* nfs_reap_expired_delegations - reap expired delegations
* @clp: nfs_client to process
*
* Iterates through all the delegations associated with this server and
* checks if they have may have been revoked. This function is usually
* expected to be called in cases where the server may have lost its
* lease.
*/
void nfs_reap_expired_delegations(struct nfs_client *clp)
static int nfs_server_reap_expired_delegations(struct nfs_server *server,
void __always_unused *data)
{
struct nfs_delegation *delegation;
struct nfs_server *server;
struct inode *inode;
const struct cred *cred;
nfs4_stateid stateid;
restart:
rcu_read_lock();
list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
list_for_each_entry_rcu(delegation, &server->delegations,
super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_TEST_EXPIRED,
&delegation->flags) == 0)
continue;
if (!nfs_sb_active(server->super))
break; /* continue in outer loop */
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL) {
rcu_read_unlock();
nfs_sb_deactive(server->super);
goto restart;
}
cred = get_cred_rcu(delegation->cred);
nfs4_stateid_copy(&stateid, &delegation->stateid);
clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
rcu_read_unlock();
nfs_delegation_test_free_expired(inode, &stateid, cred);
put_cred(cred);
if (nfs4_server_rebooted(clp)) {
nfs_inode_mark_test_expired_delegation(server,inode);
iput(inode);
nfs_sb_deactive(server->super);
return;
}
restart_locked:
list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
if (test_bit(NFS_DELEGATION_INODE_FREEING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_RETURNING,
&delegation->flags) ||
test_bit(NFS_DELEGATION_TEST_EXPIRED,
&delegation->flags) == 0)
continue;
inode = nfs_delegation_grab_inode(delegation);
if (inode == NULL)
goto restart_locked;
spin_lock(&delegation->lock);
cred = get_cred_rcu(delegation->cred);
nfs4_stateid_copy(&stateid, &delegation->stateid);
spin_unlock(&delegation->lock);
clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
rcu_read_unlock();
nfs_delegation_test_free_expired(inode, &stateid, cred);
put_cred(cred);
if (!nfs4_server_rebooted(server->nfs_client)) {
iput(inode);
nfs_sb_deactive(server->super);
cond_resched();
goto restart;
}
nfs_inode_mark_test_expired_delegation(server,inode);
iput(inode);
return -EAGAIN;
}
rcu_read_unlock();
return 0;
}
/**
* nfs_reap_expired_delegations - reap expired delegations
* @clp: nfs_client to process
*
* Iterates through all the delegations associated with this server and
* checks if they have may have been revoked. This function is usually
* expected to be called in cases where the server may have lost its
* lease.
*/
void nfs_reap_expired_delegations(struct nfs_client *clp)
{
nfs_client_for_each_server(clp, nfs_server_reap_expired_delegations,
NULL);
}
void nfs_inode_find_delegation_state_and_recover(struct inode *inode,
......@@ -1359,11 +1365,14 @@ bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags,
{
struct nfs_inode *nfsi = NFS_I(inode);
struct nfs_delegation *delegation;
bool ret;
bool ret = false;
flags &= FMODE_READ|FMODE_WRITE;
rcu_read_lock();
delegation = rcu_dereference(nfsi->delegation);
if (!delegation)
goto out;
spin_lock(&delegation->lock);
ret = nfs4_is_valid_delegation(delegation, flags);
if (ret) {
nfs4_stateid_copy(dst, &delegation->stateid);
......@@ -1371,6 +1380,8 @@ bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags,
if (cred)
*cred = get_cred(delegation->cred);
}
spin_unlock(&delegation->lock);
out:
rcu_read_unlock();
return ret;
}
......
......@@ -141,10 +141,9 @@ struct nfs_cache_array {
int size;
int eof_index;
u64 last_cookie;
struct nfs_cache_array_entry array[0];
struct nfs_cache_array_entry array[];
};
typedef int (*decode_dirent_t)(struct xdr_stream *, struct nfs_entry *, bool);
typedef struct {
struct file *file;
struct page *page;
......@@ -153,7 +152,7 @@ typedef struct {
u64 *dir_cookie;
u64 last_cookie;
loff_t current_index;
decode_dirent_t decode;
loff_t prev_index;
unsigned long dir_verifier;
unsigned long timestamp;
......@@ -240,6 +239,25 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
return ret;
}
static inline
int is_32bit_api(void)
{
#ifdef CONFIG_COMPAT
return in_compat_syscall();
#else
return (BITS_PER_LONG == 32);
#endif
}
static
bool nfs_readdir_use_cookie(const struct file *filp)
{
if ((filp->f_mode & FMODE_32BITHASH) ||
(!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
return false;
return true;