Commit b2deee2d authored by Linus Torvalds's avatar Linus Torvalds
Browse files

Merge tag 'ceph-for-4.11-rc1' of git://github.com/ceph/ceph-client

Pull ceph updates from Ilya Dryomov:
 "This time around we have:

   - support for rbd data-pool feature, which enables rbd images on
     erasure-coded pools (myself). CEPH_PG_MAX_SIZE has been bumped to
     allow erasure-coded profiles with k+m up to 32.

   - a patch for ceph_d_revalidate() performance regression introduced
     in 4.9, along with some cleanups in the area (Jeff Layton)

   - a set of fixes for unsafe ->d_parent accesses in CephFS (Jeff
     Layton)

   - buffered reads are now processed in rsize windows instead of rasize
     windows (Andreas Gerstmayr). The new default for rsize mount option
     is 64M.

   - ack vs commit distinction is gone, greatly simplifying ->fsync()
     and MOSDOpReply handling code (myself)

  ... also a few filesystem bug fixes from Zheng, a CRUSH sync up (CRUSH
  computations are still serialized though) and several minor fixes and
  cleanups all over"

* tag 'ceph-for-4.11-rc1' of git://github.com/ceph/ceph-client: (52 commits)
  libceph, rbd, ceph: WRITE | ONDISK -> WRITE
  libceph: get rid of ack vs commit
  ceph: remove special ack vs commit behavior
  ceph: tidy some white space in get_nonsnap_parent()
  crush: fix dprintk compilation
  crush: do is_out test only if we do not collide
  ceph: remove req from unsafe list when unregistering it
  rbd: constify device_type structure
  rbd: kill obj_request->object_name and rbd_segment_name_cache
  rbd: store and use obj_request->object_no
  rbd: RBD_V{1,2}_DATA_FORMAT macros
  rbd: factor out __rbd_osd_req_create()
  rbd: set offset and length outside of rbd_obj_request_create()
  rbd: support for data-pool feature
  rbd: introduce rbd_init_layout()
  rbd: use rbd_obj_bytes() more
  rbd: remove now unused rbd_obj_request_wait() and helpers
  rbd: switch rbd_obj_method_sync() to ceph_osdc_call()
  libceph: pass reply buffer length through ceph_osdc_call()
  rbd: do away with obj_request in rbd_obj_read_sync()
  ...
parents d4f4cf77 54ea0046
......@@ -98,11 +98,10 @@ Mount Options
size.
rsize=X
Specify the maximum read size in bytes. By default there is no
maximum.
Specify the maximum read size in bytes. Default: 64 MB.
rasize=X
Specify the maximum readahead.
Specify the maximum readahead. Default: 8 MB.
mount_timeout=X
Specify the timeout value for mount (in seconds), in the case
......
This diff is collapsed.
......@@ -25,8 +25,8 @@
*/
#define RBD_HEADER_PREFIX "rbd_header."
#define RBD_DATA_PREFIX "rbd_data."
#define RBD_ID_PREFIX "rbd_id."
#define RBD_V2_DATA_FORMAT "%s.%016llx"
#define RBD_LOCK_NAME "rbd_lock"
#define RBD_LOCK_TAG "internal"
......@@ -42,13 +42,14 @@ enum rbd_notify_op {
/*
* For format version 1, rbd image 'foo' consists of objects
* foo.rbd - image metadata
* rb.<idhi>.<idlo>.00000000
* rb.<idhi>.<idlo>.00000001
* rb.<idhi>.<idlo>.<extra>.000000000000
* rb.<idhi>.<idlo>.<extra>.000000000001
* ... - data
* There is no notion of a persistent image id in rbd format 1.
*/
#define RBD_SUFFIX ".rbd"
#define RBD_V1_DATA_FORMAT "%s.%012llx"
#define RBD_DIRECTORY "rbd_directory"
#define RBD_INFO "rbd_info"
......@@ -57,9 +58,6 @@ enum rbd_notify_op {
#define RBD_MIN_OBJ_ORDER 16
#define RBD_MAX_OBJ_ORDER 30
#define RBD_COMP_NONE 0
#define RBD_CRYPT_NONE 0
#define RBD_HEADER_TEXT "<<< Rados Block Device Image >>>\n"
#define RBD_HEADER_SIGNATURE "RBD"
#define RBD_HEADER_VERSION "001.005"
......
......@@ -391,6 +391,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
nr_pages = i;
if (nr_pages > 0) {
len = nr_pages << PAGE_SHIFT;
osd_req_op_extent_update(req, 0, len);
break;
}
goto out_pages;
......@@ -771,7 +772,7 @@ static int ceph_writepages_start(struct address_space *mapping,
wbc->sync_mode == WB_SYNC_NONE ? "NONE" :
(wbc->sync_mode == WB_SYNC_ALL ? "ALL" : "HOLD"));
if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (ci->i_wrbuffer_ref > 0) {
pr_warn_ratelimited(
"writepage_start %p %lld forced umount\n",
......@@ -1017,8 +1018,7 @@ static int ceph_writepages_start(struct address_space *mapping,
&ci->i_layout, vino,
offset, &len, 0, num_ops,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ONDISK,
CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq,
truncate_size, false);
if (IS_ERR(req)) {
......@@ -1028,8 +1028,7 @@ static int ceph_writepages_start(struct address_space *mapping,
min(num_ops,
CEPH_OSD_SLAB_OPS),
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ONDISK,
CEPH_OSD_FLAG_WRITE,
snapc, truncate_seq,
truncate_size, true);
BUG_ON(IS_ERR(req));
......@@ -1194,7 +1193,7 @@ static int ceph_update_writeable_page(struct file *file,
int r;
struct ceph_snap_context *snapc, *oldest;
if (ACCESS_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
if (READ_ONCE(fsc->mount_state) == CEPH_MOUNT_SHUTDOWN) {
dout(" page %p forced umount\n", page);
unlock_page(page);
return -EIO;
......@@ -1681,8 +1680,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), 0, &len, 0, 1,
CEPH_OSD_OP_CREATE,
CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
CEPH_OSD_OP_CREATE, CEPH_OSD_FLAG_WRITE,
NULL, 0, 0, false);
if (IS_ERR(req)) {
err = PTR_ERR(req);
......@@ -1699,8 +1697,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
ceph_vino(inode), 0, &len, 1, 3,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
CEPH_OSD_OP_WRITE, CEPH_OSD_FLAG_WRITE,
NULL, ci->i_truncate_seq,
ci->i_truncate_size, false);
if (IS_ERR(req)) {
......@@ -1873,7 +1870,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci,
goto out_unlock;
}
wr_req->r_flags = CEPH_OSD_FLAG_WRITE | CEPH_OSD_FLAG_ACK;
wr_req->r_flags = CEPH_OSD_FLAG_WRITE;
osd_req_op_init(wr_req, 0, CEPH_OSD_OP_CREATE, CEPH_OSD_OP_FLAG_EXCL);
ceph_oloc_copy(&wr_req->r_base_oloc, &rd_req->r_base_oloc);
ceph_oid_copy(&wr_req->r_base_oid, &rd_req->r_base_oid);
......
......@@ -234,7 +234,7 @@ void ceph_fscache_file_set_cookie(struct inode *inode, struct file *filp)
fscache_enable_cookie(ci->fscache, ceph_fscache_can_enable,
inode);
if (fscache_cookie_enabled(ci->fscache)) {
dout("fscache_file_set_cookie %p %p enabing cache\n",
dout("fscache_file_set_cookie %p %p enabling cache\n",
inode, filp);
}
}
......
......@@ -867,7 +867,7 @@ int __ceph_caps_file_wanted(struct ceph_inode_info *ci)
/*
* Return caps we have registered with the MDS(s) as 'wanted'.
*/
int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
int __ceph_caps_mds_wanted(struct ceph_inode_info *ci, bool check)
{
struct ceph_cap *cap;
struct rb_node *p;
......@@ -875,7 +875,7 @@ int __ceph_caps_mds_wanted(struct ceph_inode_info *ci)
for (p = rb_first(&ci->i_caps); p; p = rb_next(p)) {
cap = rb_entry(p, struct ceph_cap, ci_node);
if (!__cap_is_valid(cap))
if (check && !__cap_is_valid(cap))
continue;
if (cap == ci->i_auth_cap)
mds_wanted |= cap->mds_wanted;
......@@ -1184,6 +1184,13 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
delayed = 1;
}
ci->i_ceph_flags &= ~(CEPH_I_NODELAY | CEPH_I_FLUSH);
if (want & ~cap->mds_wanted) {
/* user space may open/close single file frequently.
* This avoids droping mds_wanted immediately after
* requesting new mds_wanted.
*/
__cap_set_timeouts(mdsc, ci);
}
cap->issued &= retain; /* drop bits we don't want */
if (cap->implemented & ~cap->issued) {
......@@ -2084,8 +2091,6 @@ int ceph_fsync(struct file *file, loff_t start, loff_t end, int datasync)
dout("fsync %p%s\n", inode, datasync ? " datasync" : "");
ceph_sync_write_wait(inode);
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret < 0)
goto out;
......@@ -2477,23 +2482,22 @@ static int try_get_cap_refs(struct ceph_inode_info *ci, int need, int want,
if (ci->i_ceph_flags & CEPH_I_CAP_DROPPED) {
int mds_wanted;
if (ACCESS_ONCE(mdsc->fsc->mount_state) ==
if (READ_ONCE(mdsc->fsc->mount_state) ==
CEPH_MOUNT_SHUTDOWN) {
dout("get_cap_refs %p forced umount\n", inode);
*err = -EIO;
ret = 1;
goto out_unlock;
}
mds_wanted = __ceph_caps_mds_wanted(ci);
if ((mds_wanted & need) != need) {
mds_wanted = __ceph_caps_mds_wanted(ci, false);
if (need & ~(mds_wanted & need)) {
dout("get_cap_refs %p caps were dropped"
" (session killed?)\n", inode);
*err = -ESTALE;
ret = 1;
goto out_unlock;
}
if ((mds_wanted & file_wanted) ==
(file_wanted & (CEPH_CAP_FILE_RD|CEPH_CAP_FILE_WR)))
if (!(file_wanted & ~mds_wanted))
ci->i_ceph_flags &= ~CEPH_I_CAP_DROPPED;
}
......@@ -3404,6 +3408,7 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
tcap->implemented |= issued;
if (cap == ci->i_auth_cap)
ci->i_auth_cap = tcap;
if (!list_empty(&ci->i_cap_flush_list) &&
ci->i_auth_cap == tcap) {
spin_lock(&mdsc->cap_dirty_lock);
......@@ -3417,9 +3422,18 @@ static void handle_cap_export(struct inode *inode, struct ceph_mds_caps *ex,
} else if (tsession) {
/* add placeholder for the export tagert */
int flag = (cap == ci->i_auth_cap) ? CEPH_CAP_FLAG_AUTH : 0;
tcap = new_cap;
ceph_add_cap(inode, tsession, t_cap_id, -1, issued, 0,
t_seq - 1, t_mseq, (u64)-1, flag, &new_cap);
if (!list_empty(&ci->i_cap_flush_list) &&
ci->i_auth_cap == tcap) {
spin_lock(&mdsc->cap_dirty_lock);
list_move_tail(&ci->i_flushing_item,
&tcap->session->s_cap_flushing);
spin_unlock(&mdsc->cap_dirty_lock);
}
__ceph_remove_cap(cap, false);
goto out_unlock;
}
......@@ -3924,9 +3938,10 @@ int ceph_encode_inode_release(void **p, struct inode *inode,
}
int ceph_encode_dentry_release(void **p, struct dentry *dentry,
struct inode *dir,
int mds, int drop, int unless)
{
struct inode *dir = d_inode(dentry->d_parent);
struct dentry *parent = NULL;
struct ceph_mds_request_release *rel = *p;
struct ceph_dentry_info *di = ceph_dentry(dentry);
int force = 0;
......@@ -3941,9 +3956,14 @@ int ceph_encode_dentry_release(void **p, struct dentry *dentry,
spin_lock(&dentry->d_lock);
if (di->lease_session && di->lease_session->s_mds == mds)
force = 1;
if (!dir) {
parent = dget(dentry->d_parent);
dir = d_inode(parent);
}
spin_unlock(&dentry->d_lock);
ret = ceph_encode_inode_release(p, dir, mds, drop, unless, force);
dput(parent);
spin_lock(&dentry->d_lock);
if (ret && di->lease_session && di->lease_session->s_mds == mds) {
......
......@@ -70,7 +70,7 @@ static int mdsc_show(struct seq_file *s, void *p)
seq_printf(s, "%s", ceph_mds_op_name(req->r_op));
if (req->r_got_unsafe)
if (test_bit(CEPH_MDS_R_GOT_UNSAFE, &req->r_req_flags))
seq_puts(s, "\t(unsafe)");
else
seq_puts(s, "\t");
......
......@@ -371,7 +371,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
/* hints to request -> mds selection code */
req->r_direct_mode = USE_AUTH_MDS;
req->r_direct_hash = ceph_frag_value(frag);
req->r_direct_is_hash = true;
__set_bit(CEPH_MDS_R_DIRECT_IS_HASH, &req->r_req_flags);
if (fi->last_name) {
req->r_path2 = kstrdup(fi->last_name, GFP_KERNEL);
if (!req->r_path2) {
......@@ -417,7 +417,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
fi->frag = frag;
fi->last_readdir = req;
if (req->r_did_prepopulate) {
if (test_bit(CEPH_MDS_R_DID_PREPOPULATE, &req->r_req_flags)) {
fi->readdir_cache_idx = req->r_readdir_cache_idx;
if (fi->readdir_cache_idx < 0) {
/* preclude from marking dir ordered */
......@@ -752,7 +752,8 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.getattr.mask = cpu_to_le32(mask);
req->r_locked_dir = dir;
req->r_parent = dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc, NULL, req);
err = ceph_handle_snapdir(req, dentry, err);
dentry = ceph_finish_lookup(req, dentry, err);
......@@ -813,7 +814,8 @@ static int ceph_mknod(struct inode *dir, struct dentry *dentry,
}
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_locked_dir = dir;
req->r_parent = dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mknod.mode = cpu_to_le32(mode);
req->r_args.mknod.rdev = cpu_to_le32(rdev);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
......@@ -864,7 +866,8 @@ static int ceph_symlink(struct inode *dir, struct dentry *dentry,
ceph_mdsc_put_request(req);
goto out;
}
req->r_locked_dir = dir;
req->r_parent = dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
......@@ -913,7 +916,8 @@ static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_locked_dir = dir;
req->r_parent = dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_args.mkdir.mode = cpu_to_le32(mode);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
......@@ -957,7 +961,8 @@ static int ceph_link(struct dentry *old_dentry, struct inode *dir,
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
req->r_locked_dir = dir;
req->r_parent = dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
/* release LINK_SHARED on source inode (mds will lock it) */
......@@ -1023,7 +1028,8 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry)
}
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
req->r_locked_dir = dir;
req->r_parent = dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_dentry_unless = CEPH_CAP_FILE_EXCL;
req->r_inode_drop = drop_caps_for_unlink(inode);
......@@ -1066,7 +1072,8 @@ static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
req->r_num_caps = 2;
req->r_old_dentry = dget(old_dentry);
req->r_old_dentry_dir = old_dir;
req->r_locked_dir = new_dir;
req->r_parent = new_dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_old_dentry_drop = CEPH_CAP_FILE_SHARED;
req->r_old_dentry_unless = CEPH_CAP_FILE_EXCL;
req->r_dentry_drop = CEPH_CAP_FILE_SHARED;
......@@ -1194,7 +1201,7 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
struct inode *dir;
if (flags & LOOKUP_RCU) {
parent = ACCESS_ONCE(dentry->d_parent);
parent = READ_ONCE(dentry->d_parent);
dir = d_inode_rcu(parent);
if (!dir)
return -ECHILD;
......@@ -1237,11 +1244,12 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
return -ECHILD;
op = ceph_snap(dir) == CEPH_SNAPDIR ?
CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_GETATTR;
CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
if (!IS_ERR(req)) {
req->r_dentry = dget(dentry);
req->r_num_caps = op == CEPH_MDS_OP_GETATTR ? 1 : 2;
req->r_num_caps = 2;
req->r_parent = dir;
mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
if (ceph_security_xattr_wanted(dir))
......
......@@ -207,7 +207,8 @@ static int ceph_get_name(struct dentry *parent, char *name,
req->r_inode = d_inode(child);
ihold(d_inode(child));
req->r_ino2 = ceph_vino(d_inode(parent));
req->r_locked_dir = d_inode(parent);
req->r_parent = d_inode(parent);
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
req->r_num_caps = 2;
err = ceph_mdsc_do_request(mdsc, NULL, req);
......
......@@ -283,7 +283,7 @@ int ceph_open(struct inode *inode, struct file *file)
spin_lock(&ci->i_ceph_lock);
if (__ceph_is_any_real_caps(ci) &&
(((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) {
int mds_wanted = __ceph_caps_mds_wanted(ci);
int mds_wanted = __ceph_caps_mds_wanted(ci, true);
int issued = __ceph_caps_issued(ci, NULL);
dout("open %p fmode %d want %s issued %s using existing\n",
......@@ -379,7 +379,8 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
mask |= CEPH_CAP_XATTR_SHARED;
req->r_args.open.mask = cpu_to_le32(mask);
req->r_locked_dir = dir; /* caller holds dir->i_mutex */
req->r_parent = dir;
set_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags);
err = ceph_mdsc_do_request(mdsc,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
req);
......@@ -758,9 +759,7 @@ static void ceph_aio_retry_work(struct work_struct *work)
goto out;
}
req->r_flags = CEPH_OSD_FLAG_ORDERSNAP |
CEPH_OSD_FLAG_ONDISK |
CEPH_OSD_FLAG_WRITE;
req->r_flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
ceph_oloc_copy(&req->r_base_oloc, &orig_req->r_base_oloc);
ceph_oid_copy(&req->r_base_oid, &orig_req->r_base_oid);
......@@ -794,89 +793,6 @@ static void ceph_aio_retry_work(struct work_struct *work)
kfree(aio_work);
}
/*
* Write commit request unsafe callback, called to tell us when a
* request is unsafe (that is, in flight--has been handed to the
* messenger to send to its target osd). It is called again when
* we've received a response message indicating the request is
* "safe" (its CEPH_OSD_FLAG_ONDISK flag is set), or when a request
* is completed early (and unsuccessfully) due to a timeout or
* interrupt.
*
* This is used if we requested both an ACK and ONDISK commit reply
* from the OSD.
*/
static void ceph_sync_write_unsafe(struct ceph_osd_request *req, bool unsafe)
{
struct ceph_inode_info *ci = ceph_inode(req->r_inode);
dout("%s %p tid %llu %ssafe\n", __func__, req, req->r_tid,
unsafe ? "un" : "");
if (unsafe) {
ceph_get_cap_refs(ci, CEPH_CAP_FILE_WR);
spin_lock(&ci->i_unsafe_lock);
list_add_tail(&req->r_unsafe_item,
&ci->i_unsafe_writes);
spin_unlock(&ci->i_unsafe_lock);
complete_all(&req->r_completion);
} else {
spin_lock(&ci->i_unsafe_lock);
list_del_init(&req->r_unsafe_item);
spin_unlock(&ci->i_unsafe_lock);
ceph_put_cap_refs(ci, CEPH_CAP_FILE_WR);
}
}
/*
* Wait on any unsafe replies for the given inode. First wait on the
* newest request, and make that the upper bound. Then, if there are
* more requests, keep waiting on the oldest as long as it is still older
* than the original request.
*/
void ceph_sync_write_wait(struct inode *inode)
{
struct ceph_inode_info *ci = ceph_inode(inode);
struct list_head *head = &ci->i_unsafe_writes;
struct ceph_osd_request *req;
u64 last_tid;
if (!S_ISREG(inode->i_mode))
return;
spin_lock(&ci->i_unsafe_lock);
if (list_empty(head))
goto out;
/* set upper bound as _last_ entry in chain */
req = list_last_entry(head, struct ceph_osd_request,
r_unsafe_item);
last_tid = req->r_tid;
do {
ceph_osdc_get_request(req);
spin_unlock(&ci->i_unsafe_lock);
dout("sync_write_wait on tid %llu (until %llu)\n",
req->r_tid, last_tid);
wait_for_completion(&req->r_done_completion);
ceph_osdc_put_request(req);
spin_lock(&ci->i_unsafe_lock);
/*
* from here on look at first entry in chain, since we
* only want to wait for anything older than last_tid
*/
if (list_empty(head))
break;
req = list_first_entry(head, struct ceph_osd_request,
r_unsafe_item);
} while (req->r_tid < last_tid);
out:
spin_unlock(&ci->i_unsafe_lock);
}
static ssize_t
ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
struct ceph_snap_context *snapc,
......@@ -915,9 +831,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (ret2 < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret2);
flags = CEPH_OSD_FLAG_ORDERSNAP |
CEPH_OSD_FLAG_ONDISK |
CEPH_OSD_FLAG_WRITE;
flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
} else {
flags = CEPH_OSD_FLAG_READ;
}
......@@ -1116,10 +1030,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
if (ret < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret);
flags = CEPH_OSD_FLAG_ORDERSNAP |
CEPH_OSD_FLAG_ONDISK |
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ACK;
flags = CEPH_OSD_FLAG_ORDERSNAP | CEPH_OSD_FLAG_WRITE;
while ((len = iov_iter_count(from)) > 0) {
size_t left;
......@@ -1165,8 +1076,6 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
goto out;
}
/* get a second commit callback */
req->r_unsafe_callback = ceph_sync_write_unsafe;
req->r_inode = inode;
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
......@@ -1616,8 +1525,7 @@ static int ceph_zero_partial_object(struct inode *inode,
ceph_vino(inode),
offset, length,
0, 1, op,
CEPH_OSD_FLAG_WRITE |
CEPH_OSD_FLAG_ONDISK,
CEPH_OSD_FLAG_WRITE,
NULL, 0, 0, false);
if (IS_ERR(req)) {
ret = PTR_ERR(req);
......
......@@ -499,7 +499,6 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
ci->i_rdcache_gen = 0;
ci->i_rdcache_revoking = 0;
INIT_LIST_HEAD(&ci->i_unsafe_writes);
INIT_LIST_HEAD(&ci->i_unsafe_dirops);
INIT_LIST_HEAD(&ci->i_unsafe_iops);
spin_lock_init(&ci->i_unsafe_lock);
......@@ -583,14 +582,6 @@ int ceph_drop_inode(struct inode *inode)
return 1;
}
void ceph_evict_inode(struct inode *inode)
{
/* wait unsafe sync writes */
ceph_sync_write_wait(inode);
truncate_inode_pages_final(&inode->i_data);
clear_inode(inode);
}
static inline blkcnt_t calc_inode_blocks(u64 size)
{
return (size + (1<<9) - 1) >> 9;
......@@ -1016,7 +1007,9 @@ static int fill_inode(struct inode *inode, struct page *locked_page,
static void update_dentry_lease(struct dentry *dentry,
struct ceph_mds_reply_lease *lease,
struct ceph_mds_session *session,
unsigned long from_time)
unsigned long from_time,
struct ceph_vino *tgt_vino,
struct ceph_vino *dir_vino)
{
struct ceph_dentry_info *di = ceph_dentry(dentry);
long unsigned duration = le32_to_cpu(lease->duration_ms);
......@@ -1024,13 +1017,27 @@ static void update_dentry_lease(struct dentry *dentry,
long unsigned half_ttl = from_time + (duration * HZ / 2) / 1000;
struct inode *dir;
/*
* Make sure dentry's inode matches tgt_vino. NULL tgt_vino means that
* we expect a negative dentry.
*/
if (!tgt_vino && d_really_is_positive(dentry))
return;
if (tgt_vino && (d_really_is_negative(dentry) ||
!ceph_ino_compare(d_inode(dentry), tgt_vino)))
return;
spin_lock(&dentry->d_lock);
dout("update_dentry_lease %p duration %lu ms ttl %lu\n",
dentry, duration, ttl);
/* make lease_rdcache_gen match directory */
dir = d_inode(dentry->d_parent);
/* make sure parent matches dir_vino */
if (!ceph_ino_compare(dir, dir_vino))
goto out_unlock;
/* only track leases on regular dentries */
if (ceph_snap(dir) != CEPH_NOSNAP)
goto out_unlock;
......@@ -1108,61 +1115,27 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
*
* Called with snap_rwsem (read).
*/
int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
struct ceph_mds_session *session)
int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req)
{
struct ceph_mds_session *session = req->r_session;
struct ceph_mds_reply_info_parsed *rinfo = &req->r_reply_info;
struct inode *in = NULL;
struct ceph_vino vino;
struct ceph_vino tvino, dvino;
struct ceph_fs_client *fsc = ceph_sb_to_client(sb);
int err = 0;
dout("fill_trace %p is_dentry %d is_target %d\n", req,
rinfo->head->is_dentry, rinfo->head->is_target);