Import of kernel-4.18.0-553.117.1.el8_10

This commit is contained in:
almalinux-bot-kernel 2026-04-15 05:02:44 +00:00
parent 195ec817d0
commit 79152bf4d5
45 changed files with 833 additions and 314 deletions

View File

@ -12,7 +12,7 @@ RHEL_MINOR = 10
#
# Use this spot to avoid future merge conflicts.
# Do not trim this comment.
RHEL_RELEASE = 553.115.1
RHEL_RELEASE = 553.117.1
#
# ZSTREAM

View File

@ -177,7 +177,7 @@ static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev)
* is unbound or probed and that userspace can't access its
* configuration space while we perform recovery.
*/
pci_dev_lock(pdev);
device_lock(&pdev->dev);
if (pdev->error_state == pci_channel_io_perm_failure) {
ers_res = PCI_ERS_RESULT_DISCONNECT;
goto out_unlock;
@ -263,7 +263,7 @@ static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es)
if (driver && driver->err_handler && driver->err_handler->error_detected)
driver->err_handler->error_detected(pdev, pdev->error_state);
out:
pci_dev_unlock(pdev);
device_unlock(&pdev->dev);
}
static void __zpci_event_error(struct zpci_ccdf_err *ccdf)

View File

@ -284,6 +284,10 @@ static int acpi_processor_get_power_info_fadt(struct acpi_processor *pr)
ACPI_CX_DESC_LEN, "ACPI P_LVL3 IOPORT 0x%x",
pr->power.states[ACPI_STATE_C3].address);
if (!pr->power.states[ACPI_STATE_C2].address &&
!pr->power.states[ACPI_STATE_C3].address)
return -ENODEV;
return 0;
}

View File

@ -221,8 +221,11 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
void __iomem *map = NULL;
/* Already mapped? */
if (refcount_inc_not_zero(&iobj->maps))
if (refcount_inc_not_zero(&iobj->maps)) {
/* read barrier match the wmb on refcount set */
smp_rmb();
return iobj->map;
}
/* Take the lock, and re-check that another thread hasn't
* already mapped the object in the meantime.
@ -249,6 +252,8 @@ nv50_instobj_acquire(struct nvkm_memory *memory)
iobj->base.memory.ptrs = &nv50_instobj_fast;
else
iobj->base.memory.ptrs = &nv50_instobj_slow;
/* barrier to ensure the ptrs are written before refcount is set */
smp_wmb();
refcount_set(&iobj->maps, 1);
}

View File

@ -254,6 +254,21 @@ int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val)
}
EXPORT_SYMBOL_GPL(nvmf_reg_write32);
int nvmf_subsystem_reset(struct nvme_ctrl *ctrl)
{
int ret;
if (!nvme_wait_reset(ctrl))
return -EBUSY;
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, NVME_SUBSYS_RESET);
if (ret)
return ret;
return nvme_try_sched_reset(ctrl);
}
EXPORT_SYMBOL_GPL(nvmf_subsystem_reset);
/**
* nvmf_log_connect_error() - Error-parsing-diagnostic print out function for
* connect() errors.

View File

@ -182,6 +182,7 @@ nvmf_ctlr_matches_baseopts(struct nvme_ctrl *ctrl,
int nvmf_reg_read32(struct nvme_ctrl *ctrl, u32 off, u32 *val);
int nvmf_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val);
int nvmf_reg_write32(struct nvme_ctrl *ctrl, u32 off, u32 val);
int nvmf_subsystem_reset(struct nvme_ctrl *ctrl);
int nvmf_connect_admin_queue(struct nvme_ctrl *ctrl);
int nvmf_connect_io_queue(struct nvme_ctrl *ctrl, u16 qid);
int nvmf_register_transport(struct nvmf_transport_ops *ops);

View File

@ -3379,6 +3379,7 @@ static const struct nvme_ctrl_ops nvme_fc_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
.subsystem_reset = nvmf_subsystem_reset,
.free_ctrl = nvme_fc_nvme_ctrl_freed,
.submit_async_event = nvme_fc_submit_async_event,
.delete_ctrl = nvme_fc_delete_ctrl,

View File

@ -505,6 +505,7 @@ struct nvme_ctrl_ops {
int (*reg_read64)(struct nvme_ctrl *ctrl, u32 off, u64 *val);
void (*free_ctrl)(struct nvme_ctrl *ctrl);
void (*submit_async_event)(struct nvme_ctrl *ctrl);
int (*subsystem_reset)(struct nvme_ctrl *ctrl);
void (*delete_ctrl)(struct nvme_ctrl *ctrl);
void (*stop_ctrl)(struct nvme_ctrl *ctrl);
int (*get_address)(struct nvme_ctrl *ctrl, char *buf, int size);
@ -574,18 +575,9 @@ int nvme_try_sched_reset(struct nvme_ctrl *ctrl);
static inline int nvme_reset_subsystem(struct nvme_ctrl *ctrl)
{
int ret;
if (!ctrl->subsystem)
if (!ctrl->subsystem || !ctrl->ops->subsystem_reset)
return -ENOTTY;
if (!nvme_wait_reset(ctrl))
return -EBUSY;
ret = ctrl->ops->reg_write32(ctrl, NVME_REG_NSSR, 0x4E564D65);
if (ret)
return ret;
return nvme_try_sched_reset(ctrl);
return ctrl->ops->subsystem_reset(ctrl);
}
/*

View File

@ -1041,6 +1041,41 @@ static void nvme_pci_submit_async_event(struct nvme_ctrl *ctrl)
nvme_submit_cmd(nvmeq, &c, true);
}
static int nvme_pci_subsystem_reset(struct nvme_ctrl *ctrl)
{
struct nvme_dev *dev = to_nvme_dev(ctrl);
int ret = 0;
/*
* Taking the shutdown_lock ensures the BAR mapping is not being
* altered by reset_work. Holding this lock before the RESETTING state
* change, if successful, also ensures nvme_remove won't be able to
* proceed to iounmap until we're done.
*/
mutex_lock(&dev->shutdown_lock);
if (!dev->bar_mapped_size) {
ret = -ENODEV;
goto unlock;
}
if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) {
ret = -EBUSY;
goto unlock;
}
writel(NVME_SUBSYS_RESET, dev->bar + NVME_REG_NSSR);
nvme_change_ctrl_state(ctrl, NVME_CTRL_LIVE);
/*
* Read controller status to flush the previous write and trigger a
* pcie read error.
*/
readl(dev->bar + NVME_REG_CSTS);
unlock:
mutex_unlock(&dev->shutdown_lock);
return ret;
}
static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
{
struct nvme_command c = { };
@ -2822,6 +2857,7 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.reg_read64 = nvme_pci_reg_read64,
.free_ctrl = nvme_pci_free_ctrl,
.submit_async_event = nvme_pci_submit_async_event,
.subsystem_reset = nvme_pci_subsystem_reset,
.get_address = nvme_pci_get_address,
};

View File

@ -2320,6 +2320,7 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
.subsystem_reset = nvmf_subsystem_reset,
.free_ctrl = nvme_rdma_free_ctrl,
.submit_async_event = nvme_rdma_submit_async_event,
.delete_ctrl = nvme_rdma_delete_ctrl,

View File

@ -2532,6 +2532,7 @@ static const struct nvme_ctrl_ops nvme_tcp_ctrl_ops = {
.reg_read32 = nvmf_reg_read32,
.reg_read64 = nvmf_reg_read64,
.reg_write32 = nvmf_reg_write32,
.subsystem_reset = nvmf_subsystem_reset,
.free_ctrl = nvme_tcp_free_ctrl,
.submit_async_event = nvme_tcp_submit_async_event,
.delete_ctrl = nvme_tcp_delete_ctrl,

View File

@ -1553,8 +1553,9 @@ qla2x00_update_optrom(struct bsg_job *bsg_job)
ha->optrom_buffer = NULL;
ha->optrom_state = QLA_SWAITING;
mutex_unlock(&ha->optrom_mutex);
bsg_job_done(bsg_job, bsg_reply->result,
bsg_reply->reply_payload_rcv_len);
if (!rval)
bsg_job_done(bsg_job, bsg_reply->result,
bsg_reply->reply_payload_rcv_len);
return rval;
}
@ -2536,8 +2537,9 @@ qla2x00_manage_host_stats(struct bsg_job *bsg_job)
sizeof(struct ql_vnd_mng_host_stats_resp));
bsg_reply->result = DID_OK;
bsg_job_done(bsg_job, bsg_reply->result,
bsg_reply->reply_payload_rcv_len);
if (!ret)
bsg_job_done(bsg_job, bsg_reply->result,
bsg_reply->reply_payload_rcv_len);
return ret;
}
@ -2626,8 +2628,9 @@ qla2x00_get_host_stats(struct bsg_job *bsg_job)
bsg_job->reply_payload.sg_cnt,
data, response_len);
bsg_reply->result = DID_OK;
bsg_job_done(bsg_job, bsg_reply->result,
bsg_reply->reply_payload_rcv_len);
if (!ret)
bsg_job_done(bsg_job, bsg_reply->result,
bsg_reply->reply_payload_rcv_len);
kfree(data);
host_stat_out:
@ -2726,8 +2729,9 @@ reply:
bsg_job->reply_payload.sg_cnt, data,
response_len);
bsg_reply->result = DID_OK;
bsg_job_done(bsg_job, bsg_reply->result,
bsg_reply->reply_payload_rcv_len);
if (!ret)
bsg_job_done(bsg_job, bsg_reply->result,
bsg_reply->reply_payload_rcv_len);
tgt_stat_out:
kfree(data);
@ -2788,8 +2792,9 @@ qla2x00_manage_host_port(struct bsg_job *bsg_job)
bsg_job->reply_payload.sg_cnt, &rsp_data,
sizeof(struct ql_vnd_mng_host_port_resp));
bsg_reply->result = DID_OK;
bsg_job_done(bsg_job, bsg_reply->result,
bsg_reply->reply_payload_rcv_len);
if (!ret)
bsg_job_done(bsg_job, bsg_reply->result,
bsg_reply->reply_payload_rcv_len);
return ret;
}
@ -3077,7 +3082,8 @@ int qla2x00_mailbox_passthru(struct bsg_job *bsg_job)
bsg_job->reply_len = sizeof(*bsg_job->reply);
bsg_reply->result = DID_OK << 16;
bsg_job_done(bsg_job, bsg_reply->result, bsg_reply->reply_payload_rcv_len);
if (!ret)
bsg_job_done(bsg_job, bsg_reply->result, bsg_reply->reply_payload_rcv_len);
kfree(req_data);

View File

@ -795,8 +795,11 @@ void iscsit_dec_session_usage_count(struct iscsit_session *sess)
spin_lock_bh(&sess->session_usage_lock);
sess->session_usage_count--;
if (!sess->session_usage_count && sess->session_waiting_on_uc)
if (!sess->session_usage_count && sess->session_waiting_on_uc) {
spin_unlock_bh(&sess->session_usage_lock);
complete(&sess->session_waiting_on_uc_comp);
return;
}
spin_unlock_bh(&sess->session_usage_lock);
}

View File

@ -912,10 +912,9 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
}
if (pgio->pg_dreq == NULL)
wb_size = pnfs_num_cont_bytes(pgio->pg_inode,
req->wb_index);
wb_size = pnfs_num_cont_bytes(pgio->pg_inode, req->wb_index);
else
wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
wb_size = nfs_dreq_bytes_left(pgio->pg_dreq, req_offset(req));
pnfs_generic_pg_init_write(pgio, req, wb_size);

View File

@ -518,6 +518,8 @@ int nfs_create_rpc_client(struct nfs_client *clp,
.version = clp->rpc_ops->version,
.authflavor = flavor,
.cred = cl_init->cred,
.connect_timeout = cl_init->connect_timeout,
.reconnect_timeout = cl_init->reconnect_timeout,
};
if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags))
@ -532,6 +534,8 @@ int nfs_create_rpc_client(struct nfs_client *clp,
args.flags |= RPC_CLNT_CREATE_NOPING;
if (test_bit(NFS_CS_REUSEPORT, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_REUSEPORT;
if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags))
args.flags |= RPC_CLNT_CREATE_NETUNREACH_FATAL;
if (!IS_ERR(clp->cl_rpcclient))
return 0;
@ -690,6 +694,9 @@ static int nfs_init_server(struct nfs_server *server,
if (ctx->flags & NFS_MOUNT_NORESVPORT)
set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
if (ctx->flags & NFS_MOUNT_NETUNREACH_FATAL)
__set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
if (IS_ERR(clp))

View File

@ -58,44 +58,12 @@
#include "internal.h"
#include "iostat.h"
#include "pnfs.h"
#include "nfstrace.h"
#define NFSDBG_FACILITY NFSDBG_VFS
static struct kmem_cache *nfs_direct_cachep;
struct nfs_direct_req {
struct kref kref; /* release manager */
/* I/O parameters */
struct nfs_open_context *ctx; /* file open context info */
struct nfs_lock_context *l_ctx; /* Lock context info */
struct kiocb * iocb; /* controlling i/o request */
struct inode * inode; /* target file of i/o */
/* completion state */
atomic_t io_count; /* i/os we're waiting for */
spinlock_t lock; /* protect completion state */
loff_t io_start; /* Start offset for I/O */
ssize_t count, /* bytes actually processed */
max_count, /* max expected count */
bytes_left, /* bytes left to be sent */
error; /* any reported error */
struct completion completion; /* wait for i/o completion */
/* commit state */
struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
struct work_struct work;
int flags;
/* for write */
#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
/* for read */
#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */
#define NFS_ODIRECT_DONE INT_MAX /* write verification failed */
};
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_direct_commit_completion_ops;
static void nfs_direct_write_complete(struct nfs_direct_req *dreq);
@ -123,12 +91,10 @@ nfs_direct_handle_truncated(struct nfs_direct_req *dreq,
dreq->max_count = dreq_len;
if (dreq->count > dreq_len)
dreq->count = dreq_len;
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags))
dreq->error = hdr->error;
else /* Clear outstanding error if this is EOF */
dreq->error = 0;
}
if (test_bit(NFS_IOHDR_ERROR, &hdr->flags) && !dreq->error)
dreq->error = hdr->error;
}
static void
@ -150,6 +116,32 @@ nfs_direct_count_bytes(struct nfs_direct_req *dreq,
dreq->count = dreq_len;
}
static void nfs_direct_truncate_request(struct nfs_direct_req *dreq,
struct nfs_page *req)
{
loff_t offs = req_offset(req);
size_t req_start = (size_t)(offs - dreq->io_start);
if (req_start < dreq->max_count)
dreq->max_count = req_start;
if (req_start < dreq->count)
dreq->count = req_start;
}
static void nfs_direct_file_adjust_size_locked(struct inode *inode,
loff_t offset, size_t count)
{
loff_t newsize = offset + (loff_t)count;
loff_t oldsize = i_size_read(inode);
if (newsize > oldsize) {
i_size_write(inode, newsize);
NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_SIZE;
trace_nfs_size_grow(inode, newsize);
nfs_inc_stats(inode, NFSIOS_EXTENDWRITE);
}
}
/**
* nfs_direct_IO - NFS address space operation for direct I/O
* @iocb: target I/O control block
@ -228,9 +220,10 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq)
kref_put(&dreq->kref, nfs_direct_req_free);
}
ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq)
ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset)
{
return dreq->bytes_left;
loff_t start = offset - dreq->io_start;
return dreq->max_count - start;
}
EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left);
@ -377,14 +370,12 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
/* XXX do we need to do the eof zeroing found in async_filler? */
req = nfs_create_request(dreq->ctx, pagevec[i],
pgbase, req_len);
req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
pgbase, pos, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
break;
}
req->wb_index = pos >> PAGE_SHIFT;
req->wb_offset = pos & ~PAGE_MASK;
if (!nfs_pageio_add_request(&desc, req)) {
result = desc.pg_error;
nfs_release_request(req);
@ -504,21 +495,47 @@ out:
return result;
}
static void
nfs_direct_join_group(struct list_head *list, struct inode *inode)
static void nfs_direct_add_page_head(struct list_head *list,
struct nfs_page *req)
{
struct nfs_page *req, *next;
struct nfs_page *head = req->wb_head;
if (!list_empty(&head->wb_list) || !nfs_lock_request(head))
return;
if (!list_empty(&head->wb_list)) {
nfs_unlock_request(head);
return;
}
list_add(&head->wb_list, list);
kref_get(&head->wb_kref);
kref_get(&head->wb_kref);
}
static void nfs_direct_join_group(struct list_head *list,
struct nfs_commit_info *cinfo,
struct inode *inode)
{
struct nfs_page *req, *subreq;
list_for_each_entry(req, list, wb_list) {
if (req->wb_head != req || req->wb_this_page == req)
if (req->wb_head != req) {
nfs_direct_add_page_head(&req->wb_list, req);
continue;
for (next = req->wb_this_page;
next != req->wb_head;
next = next->wb_this_page) {
nfs_list_remove_request(next);
nfs_release_request(next);
}
nfs_join_page_group(req, inode);
subreq = req->wb_this_page;
if (subreq == req)
continue;
do {
/*
* Remove subrequests from this list before freeing
* them in the call to nfs_join_page_group().
*/
if (!list_empty(&subreq->wb_list)) {
nfs_list_remove_request(subreq);
nfs_release_request(subreq);
}
} while ((subreq = subreq->wb_this_page) != req);
nfs_join_page_group(req, cinfo, inode);
}
}
@ -536,20 +553,15 @@ nfs_direct_write_scan_commit_list(struct inode *inode,
static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
{
struct nfs_pageio_descriptor desc;
struct nfs_page *req, *tmp;
struct nfs_page *req;
LIST_HEAD(reqs);
struct nfs_commit_info cinfo;
LIST_HEAD(failed);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
nfs_direct_write_scan_commit_list(dreq->inode, &reqs, &cinfo);
nfs_direct_join_group(&reqs, dreq->inode);
nfs_direct_join_group(&reqs, &cinfo, dreq->inode);
dreq->count = 0;
dreq->max_count = 0;
list_for_each_entry(req, &reqs, wb_list)
dreq->max_count += req->wb_bytes;
nfs_clear_pnfs_ds_commit_verifiers(&dreq->ds_cinfo);
get_dreq(dreq);
@ -557,27 +569,40 @@ static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
&nfs_direct_write_completion_ops);
desc.pg_dreq = dreq;
list_for_each_entry_safe(req, tmp, &reqs, wb_list) {
while (!list_empty(&reqs)) {
req = nfs_list_entry(reqs.next);
/* Bump the transmission count */
req->wb_nio++;
if (!nfs_pageio_add_request(&desc, req)) {
nfs_list_move_request(req, &failed);
spin_lock(&cinfo.inode->i_lock);
dreq->flags = 0;
if (desc.pg_error < 0)
spin_lock(&dreq->lock);
if (dreq->error < 0) {
desc.pg_error = dreq->error;
} else if (desc.pg_error != -EAGAIN) {
dreq->flags = 0;
if (!desc.pg_error)
desc.pg_error = -EIO;
dreq->error = desc.pg_error;
else
dreq->error = -EIO;
spin_unlock(&cinfo.inode->i_lock);
} else
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
spin_unlock(&dreq->lock);
break;
}
nfs_release_request(req);
}
nfs_pageio_complete(&desc);
while (!list_empty(&failed)) {
req = nfs_list_entry(failed.next);
while (!list_empty(&reqs)) {
req = nfs_list_entry(reqs.next);
nfs_list_remove_request(req);
nfs_unlock_and_release_request(req);
if (desc.pg_error == -EAGAIN) {
nfs_mark_request_commit(req, NULL, &cinfo, 0);
} else {
spin_lock(&dreq->lock);
nfs_direct_truncate_request(dreq, req);
spin_unlock(&dreq->lock);
nfs_release_request(req);
}
}
if (put_dreq(dreq))
@ -592,29 +617,40 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
struct nfs_page *req;
int status = data->task.tk_status;
trace_nfs_direct_commit_complete(dreq);
spin_lock(&dreq->lock);
if (status < 0) {
/* Errors in commit are fatal */
dreq->error = status;
dreq->max_count = 0;
dreq->count = 0;
dreq->flags = NFS_ODIRECT_DONE;
} else if (dreq->flags == NFS_ODIRECT_DONE)
} else {
status = dreq->error;
}
spin_unlock(&dreq->lock);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
while (!list_empty(&data->pages)) {
req = nfs_list_entry(data->pages.next);
nfs_list_remove_request(req);
if (status >= 0 && !nfs_write_match_verf(verf, req)) {
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
if (status < 0) {
spin_lock(&dreq->lock);
nfs_direct_truncate_request(dreq, req);
spin_unlock(&dreq->lock);
nfs_release_request(req);
} else if (!nfs_write_match_verf(verf, req)) {
spin_lock(&dreq->lock);
if (dreq->flags == 0)
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
spin_unlock(&dreq->lock);
/*
* Despite the reboot, the write was successful,
* so reset wb_nio.
*/
req->wb_nio = 0;
nfs_mark_request_commit(req, NULL, &cinfo, 0);
} else /* Error or match */
} else
nfs_release_request(req);
nfs_unlock_and_release_request(req);
}
@ -628,6 +664,8 @@ static void nfs_direct_resched_write(struct nfs_commit_info *cinfo,
{
struct nfs_direct_req *dreq = cinfo->dreq;
trace_nfs_direct_resched_write(dreq);
spin_lock(&dreq->lock);
if (dreq->flags != NFS_ODIRECT_DONE)
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
@ -672,6 +710,7 @@ static void nfs_direct_write_clear_reqs(struct nfs_direct_req *dreq)
while (!list_empty(&reqs)) {
req = nfs_list_entry(reqs.next);
nfs_list_remove_request(req);
nfs_direct_truncate_request(dreq, req);
nfs_release_request(req);
nfs_unlock_and_release_request(req);
}
@ -699,6 +738,7 @@ static void nfs_direct_write_schedule_work(struct work_struct *work)
static void nfs_direct_write_complete(struct nfs_direct_req *dreq)
{
trace_nfs_direct_write_complete(dreq);
queue_work(nfsiod_workqueue, &dreq->work); /* Calls nfs_direct_write_schedule_work */
}
@ -707,8 +747,11 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_commit_info cinfo;
struct nfs_page *req = nfs_list_entry(hdr->pages.next);
struct inode *inode = dreq->inode;
int flags = NFS_ODIRECT_DONE;
trace_nfs_direct_write_completion(dreq);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
spin_lock(&dreq->lock);
@ -718,13 +761,18 @@ static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
}
nfs_direct_count_bytes(dreq, hdr);
if (hdr->good_bytes != 0 && nfs_write_need_commit(hdr)) {
if (test_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags) &&
!test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
if (!dreq->flags)
dreq->flags = NFS_ODIRECT_DO_COMMIT;
flags = dreq->flags;
}
spin_unlock(&dreq->lock);
spin_lock(&inode->i_lock);
nfs_direct_file_adjust_size_locked(inode, dreq->io_start, dreq->count);
spin_unlock(&inode->i_lock);
while (!list_empty(&hdr->pages)) {
req = nfs_list_entry(hdr->pages.next);
@ -762,16 +810,23 @@ static void nfs_write_sync_pgio_error(struct list_head *head, int error)
static void nfs_direct_write_reschedule_io(struct nfs_pgio_header *hdr)
{
struct nfs_direct_req *dreq = hdr->dreq;
struct nfs_page *req;
struct nfs_commit_info cinfo;
trace_nfs_direct_write_reschedule_io(dreq);
nfs_init_cinfo_from_dreq(&cinfo, dreq);
spin_lock(&dreq->lock);
if (dreq->error == 0) {
if (dreq->error == 0)
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
/* fake unstable write to let common nfs resend pages */
hdr->verf.committed = NFS_UNSTABLE;
hdr->good_bytes = hdr->args.offset + hdr->args.count -
hdr->io_start;
}
set_bit(NFS_IOHDR_REDO, &hdr->flags);
spin_unlock(&dreq->lock);
while (!list_empty(&hdr->pages)) {
req = nfs_list_entry(hdr->pages.next);
nfs_list_remove_request(req);
nfs_unlock_request(req);
nfs_mark_request_commit(req, NULL, &cinfo, 0);
}
}
static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {
@ -799,9 +854,13 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
{
struct nfs_pageio_descriptor desc;
struct inode *inode = dreq->inode;
struct nfs_commit_info cinfo;
ssize_t result = 0;
size_t requested_bytes = 0;
size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);
bool defer = false;
trace_nfs_direct_write_schedule_iovec(dreq);
nfs_pageio_init_write(&desc, inode, FLUSH_COND_STABLE, false,
&nfs_direct_write_completion_ops);
@ -828,8 +887,8 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
struct nfs_page *req;
unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase);
req = nfs_create_request(dreq->ctx, pagevec[i],
pgbase, req_len);
req = nfs_page_create_from_page(dreq->ctx, pagevec[i],
pgbase, pos, req_len);
if (IS_ERR(req)) {
result = PTR_ERR(req);
break;
@ -841,19 +900,37 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
break;
}
nfs_lock_request(req);
req->wb_index = pos >> PAGE_SHIFT;
req->wb_offset = pos & ~PAGE_MASK;
if (!nfs_pageio_add_request(&desc, req)) {
result = desc.pg_error;
nfs_unlock_and_release_request(req);
break;
}
pgbase = 0;
bytes -= req_len;
requested_bytes += req_len;
pos += req_len;
dreq->bytes_left -= req_len;
if (defer) {
nfs_mark_request_commit(req, NULL, &cinfo, 0);
continue;
}
nfs_lock_request(req);
if (nfs_pageio_add_request(&desc, req))
continue;
/* Exit on hard errors */
if (desc.pg_error < 0 && desc.pg_error != -EAGAIN) {
result = desc.pg_error;
nfs_unlock_and_release_request(req);
break;
}
/* If the error is soft, defer remaining requests */
nfs_init_cinfo_from_dreq(&cinfo, dreq);
spin_lock(&dreq->lock);
dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
spin_unlock(&dreq->lock);
nfs_unlock_request(req);
nfs_mark_request_commit(req, NULL, &cinfo, 0);
desc.pg_error = 0;
defer = true;
}
nfs_direct_release_pages(pagevec, npages);
kvfree(pagevec);

View File

@ -275,7 +275,7 @@ ff_lseg_match_mirrors(struct pnfs_layout_segment *l1,
struct pnfs_layout_segment *l2)
{
const struct nfs4_ff_layout_segment *fl1 = FF_LAYOUT_LSEG(l1);
const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l1);
const struct nfs4_ff_layout_segment *fl2 = FF_LAYOUT_LSEG(l2);
u32 i;
if (fl1->mirror_array_cnt != fl2->mirror_array_cnt)
@ -744,25 +744,28 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
struct nfs4_pnfs_ds *ds;
struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN);
u32 idx;
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
if (!ds)
if (IS_ERR(ds))
continue;
if (check_device &&
nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node))
nfs4_test_deviceid_unavailable(&mirror->mirror_ds->id_node)) {
// reinitialize the error state in case if this is the last iteration
ds = ERR_PTR(-EINVAL);
continue;
}
*best_idx = idx;
return ds;
break;
}
return NULL;
return ds;
}
static struct nfs4_pnfs_ds *
@ -786,7 +789,7 @@ ff_layout_choose_best_ds_for_read(struct pnfs_layout_segment *lseg,
struct nfs4_pnfs_ds *ds;
ds = ff_layout_choose_valid_ds_for_read(lseg, start_idx, best_idx);
if (ds)
if (!IS_ERR(ds))
return ds;
return ff_layout_choose_any_ds_for_read(lseg, start_idx, best_idx);
}
@ -800,7 +803,7 @@ ff_layout_get_ds_for_read(struct nfs_pageio_descriptor *pgio,
ds = ff_layout_choose_best_ds_for_read(lseg, pgio->pg_mirror_idx,
best_idx);
if (ds || !pgio->pg_mirror_idx)
if (!IS_ERR(ds) || !pgio->pg_mirror_idx)
return ds;
return ff_layout_choose_best_ds_for_read(lseg, 0, best_idx);
}
@ -841,6 +844,9 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
struct nfs4_pnfs_ds *ds;
u32 ds_idx;
if (NFS_SERVER(pgio->pg_inode)->flags &
(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
pgio->pg_maxretrans = io_maxretrans;
retry:
ff_layout_pg_check_layout(pgio, req);
/* Use full layout for now */
@ -854,9 +860,11 @@ retry:
if (!pgio->pg_lseg)
goto out_nolseg;
}
/* Reset wb_nio, since getting layout segment was successful */
req->wb_nio = 0;
ds = ff_layout_get_ds_for_read(pgio, &ds_idx);
if (!ds) {
if (IS_ERR(ds)) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
pnfs_generic_pg_cleanup(pgio);
@ -870,14 +878,24 @@ retry:
pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize;
pgio->pg_mirror_idx = ds_idx;
if (NFS_SERVER(pgio->pg_inode)->flags &
(NFS_MOUNT_SOFT|NFS_MOUNT_SOFTERR))
pgio->pg_maxretrans = io_maxretrans;
return;
out_nolseg:
if (pgio->pg_error < 0)
return;
if (pgio->pg_error < 0) {
if (pgio->pg_error != -EAGAIN)
return;
/* Retry getting layout segment if lower layer returned -EAGAIN */
if (pgio->pg_maxretrans && req->wb_nio++ > pgio->pg_maxretrans) {
if (NFS_SERVER(pgio->pg_inode)->flags & NFS_MOUNT_SOFTERR)
pgio->pg_error = -ETIMEDOUT;
else
pgio->pg_error = -EIO;
return;
}
pgio->pg_error = 0;
/* Sleep for 1 second before retrying */
ssleep(1);
goto retry;
}
out_mds:
trace_pnfs_mds_fallback_pg_init_read(pgio->pg_inode,
0, NFS4_MAX_UINT64, IOMODE_READ,
@ -923,7 +941,7 @@ retry:
for (i = 0; i < pgio->pg_mirror_count; i++) {
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
if (!ds) {
if (IS_ERR(ds)) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
pnfs_generic_pg_cleanup(pgio);
@ -1056,11 +1074,13 @@ static void ff_layout_resend_pnfs_read(struct nfs_pgio_header *hdr)
{
u32 idx = hdr->pgio_mirror_idx + 1;
u32 new_idx = 0;
struct nfs4_pnfs_ds *ds;
if (ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx))
ff_layout_send_layouterror(hdr->lseg);
else
ds = ff_layout_choose_any_ds_for_read(hdr->lseg, idx, &new_idx);
if (IS_ERR(ds))
pnfs_error_mark_layout_for_return(hdr->inode, hdr->lseg);
else
ff_layout_send_layouterror(hdr->lseg);
pnfs_read_resend_pnfs(hdr, new_idx);
}
@ -1089,6 +1109,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
}
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@ -1099,32 +1120,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
switch (task->tk_status) {
case -NFS4ERR_BADSESSION:
case -NFS4ERR_BADSLOT:
case -NFS4ERR_BAD_HIGH_SLOT:
case -NFS4ERR_DEADSESSION:
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case -NFS4ERR_SEQ_FALSE_RETRY:
case -NFS4ERR_SEQ_MISORDERED:
switch (op_status) {
case NFS4_OK:
case NFS4ERR_NXIO:
break;
case NFSERR_PERM:
if (!task->tk_xprt)
break;
xprt_force_disconnect(task->tk_xprt);
goto out_retry;
case NFS4ERR_BADSESSION:
case NFS4ERR_BADSLOT:
case NFS4ERR_BAD_HIGH_SLOT:
case NFS4ERR_DEADSESSION:
case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
case NFS4ERR_SEQ_FALSE_RETRY:
case NFS4ERR_SEQ_MISORDERED:
dprintk("%s ERROR %d, Reset session. Exchangeid "
"flags 0x%x\n", __func__, task->tk_status,
clp->cl_exchange_flags);
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
break;
case -NFS4ERR_DELAY:
case -NFS4ERR_GRACE:
goto out_retry;
case NFS4ERR_DELAY:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
fallthrough;
case NFS4ERR_GRACE:
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
break;
case -NFS4ERR_RETRY_UNCACHED_REP:
break;
goto out_retry;
case NFS4ERR_RETRY_UNCACHED_REP:
goto out_retry;
/* Invalidate Layout errors */
case -NFS4ERR_PNFS_NO_LAYOUT:
case -ESTALE: /* mapped NFS4ERR_STALE */
case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
case -EISDIR: /* mapped NFS4ERR_ISDIR */
case -NFS4ERR_FHEXPIRED:
case -NFS4ERR_WRONG_TYPE:
case NFS4ERR_PNFS_NO_LAYOUT:
case NFS4ERR_STALE:
case NFS4ERR_BADHANDLE:
case NFS4ERR_ISDIR:
case NFS4ERR_FHEXPIRED:
case NFS4ERR_WRONG_TYPE:
dprintk("%s Invalid layout error %d\n", __func__,
task->tk_status);
/*
@ -1137,11 +1168,20 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
pnfs_destroy_layout(NFS_I(inode));
rpc_wake_up(&tbl->slot_tbl_waitq);
goto reset;
default:
break;
}
switch (task->tk_status) {
/* RPC connection errors */
case -ENETDOWN:
case -ENETUNREACH:
if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags))
return -NFS4ERR_FATAL_IOERROR;
fallthrough;
case -ECONNREFUSED:
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EIO:
case -ETIMEDOUT:
case -EPIPE:
@ -1152,26 +1192,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
rpc_wake_up(&tbl->slot_tbl_waitq);
/* fall through */
break;
default:
if (ff_layout_avoid_mds_available_ds(lseg))
return -NFS4ERR_RESET_TO_PNFS;
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status);
return -NFS4ERR_RESET_TO_MDS;
break;
}
if (ff_layout_avoid_mds_available_ds(lseg))
return -NFS4ERR_RESET_TO_PNFS;
reset:
dprintk("%s Retry through MDS. Error %d\n", __func__,
task->tk_status);
return -NFS4ERR_RESET_TO_MDS;
out_retry:
task->tk_status = 0;
return -EAGAIN;
}
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
u32 op_status,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
u32 idx)
{
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
switch (op_status) {
case NFS_OK:
case NFSERR_NXIO:
break;
case NFSERR_PERM:
if (!task->tk_xprt)
break;
xprt_force_disconnect(task->tk_xprt);
goto out_retry;
case NFSERR_ACCES:
case NFSERR_BADHANDLE:
case NFSERR_FBIG:
case NFSERR_IO:
case NFSERR_NOSPC:
case NFSERR_ROFS:
case NFSERR_STALE:
goto out_reset_to_pnfs;
case NFSERR_JUKEBOX:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
goto out_retry;
default:
break;
}
switch (task->tk_status) {
/* File access problems. Don't mark the device as unavailable */
case -EACCES:
@ -1184,12 +1254,18 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
case -EJUKEBOX:
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
goto out_retry;
case -ENETDOWN:
case -ENETUNREACH:
if (test_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags))
return -NFS4ERR_FATAL_IOERROR;
fallthrough;
default:
dprintk("%s DS connection error %d\n", __func__,
task->tk_status);
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
&devid->deviceid);
}
out_reset_to_pnfs:
/* FIXME: Need to prevent infinite looping here. */
return -NFS4ERR_RESET_TO_PNFS;
out_retry:
@ -1200,6 +1276,7 @@ out_retry:
}
static int ff_layout_async_handle_error(struct rpc_task *task,
u32 op_status,
struct nfs4_state *state,
struct nfs_client *clp,
struct pnfs_layout_segment *lseg,
@ -1218,10 +1295,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
switch (vers) {
case 3:
return ff_layout_async_handle_error_v3(task, lseg, idx);
case 4:
return ff_layout_async_handle_error_v4(task, state, clp,
return ff_layout_async_handle_error_v3(task, op_status, clp,
lseg, idx);
case 4:
return ff_layout_async_handle_error_v4(task, op_status, state,
clp, lseg, idx);
default:
/* should never happen */
WARN_ON_ONCE(1);
@ -1247,6 +1325,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
case -ECONNRESET:
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -ENETDOWN:
case -ENETUNREACH:
case -EADDRINUSE:
case -ENOBUFS:
@ -1272,6 +1351,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
switch (status) {
case NFS4ERR_DELAY:
case NFS4ERR_GRACE:
case NFS4ERR_PERM:
break;
case NFS4ERR_NXIO:
ff_layout_mark_ds_unreachable(lseg, idx);
@ -1301,10 +1381,11 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
hdr->args.offset, hdr->args.count,
&hdr->res.op_status, OP_READ,
task->tk_status);
trace_ff_layout_read_error(hdr);
trace_ff_layout_read_error(hdr, task->tk_status);
}
err = ff_layout_async_handle_error(task, hdr->args.context->state,
err = ff_layout_async_handle_error(task, hdr->res.op_status,
hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@ -1320,6 +1401,9 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
return task->tk_status;
case -EAGAIN:
goto out_eagain;
case -NFS4ERR_FATAL_IOERROR:
task->tk_status = -EIO;
return 0;
}
return 0;
@ -1466,10 +1550,11 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
hdr->args.offset, hdr->args.count,
&hdr->res.op_status, OP_WRITE,
task->tk_status);
trace_ff_layout_write_error(hdr);
trace_ff_layout_write_error(hdr, task->tk_status);
}
err = ff_layout_async_handle_error(task, hdr->args.context->state,
err = ff_layout_async_handle_error(task, hdr->res.op_status,
hdr->args.context->state,
hdr->ds_clp, hdr->lseg,
hdr->pgio_mirror_idx);
@ -1485,6 +1570,9 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
return task->tk_status;
case -EAGAIN:
return -EAGAIN;
case -NFS4ERR_FATAL_IOERROR:
task->tk_status = -EIO;
return 0;
}
if (hdr->res.verf->committed == NFS_FILE_SYNC ||
@ -1512,11 +1600,12 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
data->args.offset, data->args.count,
&data->res.op_status, OP_COMMIT,
task->tk_status);
trace_ff_layout_commit_error(data);
trace_ff_layout_commit_error(data, task->tk_status);
}
err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
data->lseg, data->ds_commit_index);
err = ff_layout_async_handle_error(task, data->res.op_status,
NULL, data->ds_clp, data->lseg,
data->ds_commit_index);
trace_nfs4_pnfs_commit_ds(data, err);
switch (err) {
@ -1529,6 +1618,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
case -EAGAIN:
rpc_restart_call_prepare(task);
return -EAGAIN;
case -NFS4ERR_FATAL_IOERROR:
task->tk_status = -EIO;
return 0;
}
ff_layout_set_layoutcommit(data->inode, data->lseg, data->lwb);
@ -1759,6 +1851,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
u32 idx = hdr->pgio_mirror_idx;
int vers;
struct nfs_fh *fh;
bool ds_fatal_error = false;
dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
__func__, hdr->inode->i_ino,
@ -1766,8 +1859,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
if (!ds)
if (IS_ERR(ds)) {
ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
}
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
@ -1808,7 +1903,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
return PNFS_ATTEMPTED;
out_failed:
if (ff_layout_avoid_mds_available_ds(lseg))
if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@ -1829,11 +1924,14 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
int vers;
struct nfs_fh *fh;
u32 idx = hdr->pgio_mirror_idx;
bool ds_fatal_error = false;
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
if (!ds)
if (IS_ERR(ds)) {
ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
}
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
@ -1876,7 +1974,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
return PNFS_ATTEMPTED;
out_failed:
if (ff_layout_avoid_mds_available_ds(lseg))
if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@ -1918,7 +2016,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
if (!ds)
if (IS_ERR(ds))
goto out_err;
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,

View File

@ -368,11 +368,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
struct nfs4_ff_layout_mirror *mirror,
bool fail_return)
{
struct nfs4_pnfs_ds *ds = NULL;
struct nfs4_pnfs_ds *ds;
struct inode *ino = lseg->pls_layout->plh_inode;
struct nfs_server *s = NFS_SERVER(ino);
unsigned int max_payload;
int status;
int status = -EAGAIN;
if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror))
goto noconnect;
@ -410,7 +410,7 @@ noconnect:
ff_layout_send_layouterror(lseg);
if (fail_return || !ff_layout_has_available_ds(lseg))
pnfs_error_mark_layout_for_return(ino, lseg);
ds = NULL;
ds = ERR_PTR(status);
out:
return ds;
}

View File

@ -43,6 +43,7 @@ enum nfs_param {
Opt_bsize,
Opt_clientaddr,
Opt_cto,
Opt_fatal_neterrors,
Opt_fg,
Opt_fscache,
Opt_fscache_flag,
@ -88,6 +89,20 @@ enum nfs_param {
Opt_wsize,
};
enum {
Opt_fatal_neterrors_default,
Opt_fatal_neterrors_enetunreach,
Opt_fatal_neterrors_none,
};
static const struct constant_table nfs_param_enums_fatal_neterrors[] = {
{ "default", Opt_fatal_neterrors_default },
{ "ENETDOWN:ENETUNREACH", Opt_fatal_neterrors_enetunreach },
{ "ENETUNREACH:ENETDOWN", Opt_fatal_neterrors_enetunreach },
{ "none", Opt_fatal_neterrors_none },
{}
};
enum {
Opt_local_lock_all,
Opt_local_lock_flock,
@ -130,6 +145,8 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
fsparam_u32 ("bsize", Opt_bsize),
fsparam_string("clientaddr", Opt_clientaddr),
fsparam_flag_no("cto", Opt_cto),
fsparam_enum("fatal_neterrors", Opt_fatal_neterrors,
nfs_param_enums_fatal_neterrors),
fsparam_flag ("fg", Opt_fg),
fsparam_flag_no("fsc", Opt_fscache_flag),
fsparam_string("fsc", Opt_fscache),
@ -776,6 +793,24 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
goto out_of_bounds;
ctx->nfs_server.max_connect = result.uint_32;
break;
case Opt_fatal_neterrors:
switch (result.uint_32) {
case Opt_fatal_neterrors_default:
if (fc->net_ns != &init_net)
ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL;
else
ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL;
break;
case Opt_fatal_neterrors_enetunreach:
ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL;
break;
case Opt_fatal_neterrors_none:
ctx->flags &= ~NFS_MOUNT_NETUNREACH_FATAL;
break;
default:
goto out_invalid_value;
}
break;
case Opt_lookupcache:
switch (result.uint_32) {
case Opt_lookupcache_all:
@ -1528,6 +1563,9 @@ static int nfs_init_fs_context(struct fs_context *fc)
ctx->minorversion = 0;
ctx->need_mount = true;
if (fc->net_ns != &init_net)
ctx->flags |= NFS_MOUNT_NETUNREACH_FATAL;
fc->s_iflags |= SB_I_STABLE_WRITES;
}
fc->fs_private = ctx;

View File

@ -81,6 +81,8 @@ struct nfs_client_initdata {
struct net *net;
const struct rpc_timeout *timeparms;
const struct cred *cred;
unsigned long connect_timeout;
unsigned long reconnect_timeout;
};
/*
@ -620,7 +622,7 @@ extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
/* direct.c */
void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
struct nfs_direct_req *dreq);
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq, loff_t offset);
/* nfs4proc.c */
extern struct nfs_client *nfs4_init_client(struct nfs_client *clp,
@ -865,3 +867,36 @@ static inline void nfs_set_port(struct sockaddr *sap, int *port,
rpc_set_port(sap, *port);
}
struct nfs_direct_req {
struct kref kref; /* release manager */
/* I/O parameters */
struct nfs_open_context *ctx; /* file open context info */
struct nfs_lock_context *l_ctx; /* Lock context info */
struct kiocb * iocb; /* controlling i/o request */
struct inode * inode; /* target file of i/o */
/* completion state */
atomic_t io_count; /* i/os we're waiting for */
spinlock_t lock; /* protect completion state */
loff_t io_start; /* Start offset for I/O */
ssize_t count, /* bytes actually processed */
max_count, /* max expected count */
bytes_left, /* bytes left to be sent */
error; /* any reported error */
struct completion completion; /* wait for i/o completion */
/* commit state */
struct nfs_mds_commit_info mds_cinfo; /* Storage for cinfo */
struct pnfs_ds_commit_info ds_cinfo; /* Storage for cinfo */
struct work_struct work;
int flags;
/* for write */
#define NFS_ODIRECT_DO_COMMIT (1) /* an unstable reply was received */
#define NFS_ODIRECT_RESCHED_WRITES (2) /* write verification failed */
/* for read */
#define NFS_ODIRECT_SHOULD_DIRTY (3) /* dirty user-space page after read */
#define NFS_ODIRECT_DONE INT_MAX /* write verification failed */
};

View File

@ -81,6 +81,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans)
{
struct rpc_timeout ds_timeout;
unsigned long connect_timeout = ds_timeo * (ds_retrans + 1) * HZ / 10;
struct nfs_client *mds_clp = mds_srv->nfs_client;
struct nfs_client_initdata cl_init = {
.addr = ds_addr,
@ -92,6 +93,8 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
.net = mds_clp->cl_net,
.timeparms = &ds_timeout,
.cred = mds_srv->cred,
.connect_timeout = connect_timeout,
.reconnect_timeout = connect_timeout,
};
struct nfs_client *clp;
char buf[INET6_ADDRSTRLEN + 1];
@ -106,6 +109,8 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv,
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags))
__set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
__set_bit(NFS_CS_DS, &cl_init.init_flags);

View File

@ -229,9 +229,10 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init)
__set_bit(NFS_CS_INFINITE_SLOTS, &clp->cl_flags);
__set_bit(NFS_CS_DISCRTRY, &clp->cl_flags);
__set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags);
if (test_bit(NFS_CS_DS, &cl_init->init_flags))
__set_bit(NFS_CS_DS, &clp->cl_flags);
if (test_bit(NFS_CS_PNFS, &cl_init->init_flags))
__set_bit(NFS_CS_PNFS, &clp->cl_flags);
if (test_bit(NFS_CS_NETUNREACH_FATAL, &cl_init->init_flags))
__set_bit(NFS_CS_NETUNREACH_FATAL, &clp->cl_flags);
/*
* Set up the connection to the server before we add add to the
* global list.
@ -930,6 +931,9 @@ static int nfs4_set_client(struct nfs_server *server,
__set_bit(NFS_CS_TSM_POSSIBLE, &cl_init.init_flags);
server->port = rpc_get_port(addr);
if (server->flags & NFS_MOUNT_NETUNREACH_FATAL)
__set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
/* Allocate or find a client reference we can use */
clp = nfs_get_client(&cl_init);
if (IS_ERR(clp))
@ -995,8 +999,9 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
if (mds_srv->flags & NFS_MOUNT_NORESVPORT)
__set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
if (test_bit(NFS_CS_NETUNREACH_FATAL, &mds_clp->cl_flags))
__set_bit(NFS_CS_NETUNREACH_FATAL, &cl_init.init_flags);
__set_bit(NFS_CS_DS, &cl_init.init_flags);
__set_bit(NFS_CS_PNFS, &cl_init.init_flags);
cl_init.max_connect = NFS_MAX_TRANSPORTS;
/*

View File

@ -188,6 +188,9 @@ static int nfs4_map_errors(int err)
return -EBUSY;
case -NFS4ERR_NOT_SAME:
return -ENOTSYNC;
case -ENETDOWN:
case -ENETUNREACH:
break;
default:
dprintk("%s could not handle NFSv4 error %d\n",
__func__, -err);
@ -622,6 +625,15 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
struct nfs_client *clp = server->nfs_client;
int ret;
if ((task->tk_rpc_status == -ENETDOWN ||
task->tk_rpc_status == -ENETUNREACH) &&
task->tk_flags & RPC_TASK_NETUNREACH_FATAL) {
exception->delay = 0;
exception->recovering = 0;
exception->retry = 0;
return -EIO;
}
ret = nfs4_do_handle_exception(server, errorcode, exception);
if (exception->delay) {
rpc_delay(task, nfs4_update_delay(&exception->timeout));
@ -8690,7 +8702,7 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred,
#ifdef CONFIG_NFS_V4_1_MIGRATION
calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR;
#endif
if (test_bit(NFS_CS_DS, &clp->cl_flags))
if (test_bit(NFS_CS_PNFS, &clp->cl_flags))
calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS;
msg.rpc_argp = &calldata->args;
msg.rpc_resp = &calldata->res;

View File

@ -1993,13 +1993,15 @@ DEFINE_PNFS_DEVICEID_STATUS(nfs4_find_deviceid);
DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_PROTO(
const struct nfs_pgio_header *hdr
const struct nfs_pgio_header *hdr,
int error
),
TP_ARGS(hdr),
TP_ARGS(hdr, error),
TP_STRUCT__entry(
__field(unsigned long, error)
__field(unsigned long, nfs_error)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
@ -2015,7 +2017,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_fast_assign(
const struct inode *inode = hdr->inode;
__entry->error = hdr->res.op_status;
__entry->error = -error;
__entry->nfs_error = hdr->res.op_status;
__entry->fhandle = nfs_fhandle_hash(hdr->args.fh);
__entry->fileid = NFS_FILEID(inode);
__entry->dev = inode->i_sb->s_dev;
@ -2032,7 +2035,8 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
"offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s",
"offset=%llu count=%u stateid=%d:0x%08x dstaddr=%s "
"nfs_error=%lu (%s)",
-__entry->error,
show_nfs4_status(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
@ -2040,28 +2044,32 @@ DECLARE_EVENT_CLASS(nfs4_flexfiles_io_event,
__entry->fhandle,
__entry->offset, __entry->count,
__entry->stateid_seq, __entry->stateid_hash,
__get_str(dstaddr)
__get_str(dstaddr), __entry->nfs_error,
show_nfs4_status(__entry->nfs_error)
)
);
#define DEFINE_NFS4_FLEXFILES_IO_EVENT(name) \
DEFINE_EVENT(nfs4_flexfiles_io_event, name, \
TP_PROTO( \
const struct nfs_pgio_header *hdr \
const struct nfs_pgio_header *hdr, \
int error \
), \
TP_ARGS(hdr))
TP_ARGS(hdr, error))
DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_read_error);
DEFINE_NFS4_FLEXFILES_IO_EVENT(ff_layout_write_error);
TRACE_EVENT(ff_layout_commit_error,
TP_PROTO(
const struct nfs_commit_data *data
const struct nfs_commit_data *data,
int error
),
TP_ARGS(data),
TP_ARGS(data, error),
TP_STRUCT__entry(
__field(unsigned long, error)
__field(unsigned long, nfs_error)
__field(dev_t, dev)
__field(u32, fhandle)
__field(u64, fileid)
@ -2075,7 +2083,8 @@ TRACE_EVENT(ff_layout_commit_error,
TP_fast_assign(
const struct inode *inode = data->inode;
__entry->error = data->res.op_status;
__entry->error = -error;
__entry->nfs_error = data->res.op_status;
__entry->fhandle = nfs_fhandle_hash(data->args.fh);
__entry->fileid = NFS_FILEID(inode);
__entry->dev = inode->i_sb->s_dev;
@ -2088,14 +2097,15 @@ TRACE_EVENT(ff_layout_commit_error,
TP_printk(
"error=%ld (%s) fileid=%02x:%02x:%llu fhandle=0x%08x "
"offset=%llu count=%u dstaddr=%s",
"offset=%llu count=%u dstaddr=%s nfs_error=%lu (%s)",
-__entry->error,
show_nfs4_status(__entry->error),
MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle,
__entry->offset, __entry->count,
__get_str(dstaddr)
__get_str(dstaddr), __entry->nfs_error,
show_nfs4_status(__entry->nfs_error)
)
);

View File

@ -1483,6 +1483,75 @@ TRACE_EVENT(nfs_commit_done,
)
);
#define nfs_show_direct_req_flags(v) \
__print_flags(v, "|", \
{ NFS_ODIRECT_DO_COMMIT, "DO_COMMIT" }, \
{ NFS_ODIRECT_RESCHED_WRITES, "RESCHED_WRITES" }, \
{ NFS_ODIRECT_SHOULD_DIRTY, "SHOULD DIRTY" }, \
{ NFS_ODIRECT_DONE, "DONE" } )
DECLARE_EVENT_CLASS(nfs_direct_req_class,
TP_PROTO(
const struct nfs_direct_req *dreq
),
TP_ARGS(dreq),
TP_STRUCT__entry(
__field(const struct nfs_direct_req *, dreq)
__field(dev_t, dev)
__field(u64, fileid)
__field(u32, fhandle)
__field(int, ref)
__field(loff_t, io_start)
__field(ssize_t, count)
__field(ssize_t, bytes_left)
__field(ssize_t, error)
__field(int, flags)
),
TP_fast_assign(
const struct inode *inode = dreq->inode;
const struct nfs_inode *nfsi = NFS_I(inode);
const struct nfs_fh *fh = &nfsi->fh;
__entry->dreq = dreq;
__entry->dev = inode->i_sb->s_dev;
__entry->fileid = nfsi->fileid;
__entry->fhandle = nfs_fhandle_hash(fh);
__entry->ref = kref_read(&dreq->kref);
__entry->io_start = dreq->io_start;
__entry->count = dreq->count;
__entry->bytes_left = dreq->bytes_left;
__entry->error = dreq->error;
__entry->flags = dreq->flags;
),
TP_printk(
"dreq=%p fileid=%02x:%02x:%llu fhandle=0x%08x ref=%d "
"io_start=%lld count=%zd bytes_left=%zd error=%zd flags=%s",
__entry->dreq, MAJOR(__entry->dev), MINOR(__entry->dev),
(unsigned long long)__entry->fileid,
__entry->fhandle, __entry->ref,
__entry->io_start, __entry->count, __entry->bytes_left,
__entry->error, nfs_show_direct_req_flags(__entry->flags)
)
);
#define DEFINE_NFS_DIRECT_REQ_EVENT(name) \
DEFINE_EVENT(nfs_direct_req_class, name, \
TP_PROTO( \
const struct nfs_direct_req *dreq \
), \
TP_ARGS(dreq))
DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_commit_complete);
DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_resched_write);
DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_complete);
DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_completion);
DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_schedule_iovec);
DEFINE_NFS_DIRECT_REQ_EVENT(nfs_direct_write_reschedule_io);
TRACE_EVENT(nfs_fh_to_dentry,
TP_PROTO(
const struct super_block *sb,

View File

@ -353,10 +353,9 @@ out:
nfs_release_request(head);
}
static struct nfs_page *
__nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
unsigned int pgbase, unsigned int offset,
unsigned int count)
static struct nfs_page *nfs_page_create(struct nfs_lock_context *l_ctx,
unsigned int pgbase, pgoff_t index,
unsigned int offset, unsigned int count)
{
struct nfs_page *req;
struct nfs_open_context *ctx = l_ctx->open_context;
@ -375,42 +374,51 @@ __nfs_create_request(struct nfs_lock_context *l_ctx, struct page *page,
/* Initialize the request struct. Initially, we assume a
* long write-back delay. This will be adjusted in
* update_nfs_request below if the region is not locked. */
req->wb_page = page;
if (page) {
req->wb_index = page_index(page);
get_page(page);
}
req->wb_offset = offset;
req->wb_pgbase = pgbase;
req->wb_bytes = count;
req->wb_pgbase = pgbase;
req->wb_index = index;
req->wb_offset = offset;
req->wb_bytes = count;
kref_init(&req->wb_kref);
req->wb_nio = 0;
return req;
}
static void nfs_page_assign_page(struct nfs_page *req, struct page *page)
{
if (page != NULL) {
req->wb_page = page;
get_page(page);
}
}
/**
* nfs_create_request - Create an NFS read/write request.
* nfs_page_create_from_page - Create an NFS read/write request.
* @ctx: open context to use
* @page: page to write
* @offset: starting offset within the page for the write
* @pgbase: starting offset within the page for the write
* @offset: file offset for the write
* @count: number of bytes to read/write
*
* The page must be locked by the caller. This makes sure we never
* create two different requests for the same page.
* User should ensure it is safe to sleep in this function.
*/
struct nfs_page *
nfs_create_request(struct nfs_open_context *ctx, struct page *page,
unsigned int offset, unsigned int count)
struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx,
struct page *page,
unsigned int pgbase, loff_t offset,
unsigned int count)
{
struct nfs_lock_context *l_ctx = nfs_get_lock_context(ctx);
struct nfs_page *ret;
if (IS_ERR(l_ctx))
return ERR_CAST(l_ctx);
ret = __nfs_create_request(l_ctx, page, offset, offset, count);
if (!IS_ERR(ret))
ret = nfs_page_create(l_ctx, pgbase, offset >> PAGE_SHIFT,
offset_in_page(offset), count);
if (!IS_ERR(ret)) {
nfs_page_assign_page(ret, page);
nfs_page_group_init(ret, NULL);
}
nfs_put_lock_context(l_ctx);
return ret;
}
@ -423,10 +431,12 @@ nfs_create_subreq(struct nfs_page *req,
{
struct nfs_page *last;
struct nfs_page *ret;
struct page *page = req->wb_page;
ret = __nfs_create_request(req->wb_lock_context, req->wb_page,
pgbase, offset, count);
ret = nfs_page_create(req->wb_lock_context, pgbase, req->wb_index,
offset, count);
if (!IS_ERR(ret)) {
nfs_page_assign_page(ret, page);
/* find the last request */
for (last = req->wb_head;
last->wb_this_page != req->wb_head;
@ -434,7 +444,6 @@ nfs_create_subreq(struct nfs_page *req,
;
nfs_lock_request(ret);
ret->wb_index = req->wb_index;
nfs_page_group_init(ret, last);
ret->wb_nio = req->wb_nio;
}
@ -1340,10 +1349,10 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
mirror = nfs_pgio_get_mirror(desc, midx);
if (!list_empty(&mirror->pg_list)) {
prev = nfs_list_entry(mirror->pg_list.prev);
if (index != prev->wb_index + 1) {
nfs_pageio_complete(desc);
break;
}
if (index == prev->wb_index + 1)
continue;
nfs_pageio_complete(desc);
break;
}
}
}

View File

@ -1541,6 +1541,18 @@ int pnfs_roc_done(struct rpc_task *task, struct nfs4_layoutreturn_args **argpp,
/* Was there an RPC level error? If not, retry */
if (task->tk_rpc_status == 0)
break;
/*
* Is there a fatal network level error?
* If so release the layout, but flag the error.
*/
if ((task->tk_rpc_status == -ENETDOWN ||
task->tk_rpc_status == -ENETUNREACH) &&
task->tk_flags & RPC_TASK_NETUNREACH_FATAL) {
*ret = 0;
(*respp)->lrs_present = 0;
retval = -EIO;
break;
}
/* If the call was not sent, let caller handle it */
if (!RPC_WAS_SENT(task))
return 0;
@ -2715,7 +2727,8 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
if (pgio->pg_dreq == NULL)
rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
else
rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
rd_size = nfs_dreq_bytes_left(pgio->pg_dreq,
req_offset(req));
pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
nfs_req_openctx(req),

View File

@ -851,6 +851,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
{
struct nfs_client *clp = ERR_PTR(-EIO);
struct nfs4_pnfs_ds_addr *da;
unsigned long connect_timeout = timeo * (retrans + 1) * HZ / 10;
int status = 0;
dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr);
@ -869,6 +870,8 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
.dstaddr = (struct sockaddr *)&da->da_addr,
.addrlen = da->da_addrlen,
.servername = clp->cl_hostname,
.connect_timeout = connect_timeout,
.reconnect_timeout = connect_timeout,
};
if (da->da_transport != clp->cl_proto)

View File

@ -193,10 +193,6 @@ static void nfs_initiate_read(struct nfs_pgio_header *hdr,
const struct nfs_rpc_ops *rpc_ops,
struct rpc_task_setup *task_setup_data, int how)
{
struct inode *inode = hdr->inode;
int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
task_setup_data->flags |= swap_flags;
rpc_ops->read_setup(hdr, msg);
trace_nfs_initiate_read(hdr);
}
@ -304,7 +300,8 @@ readpage_async_filler(void *data, struct page *page)
aligned_len = min_t(unsigned int, ALIGN(len, rsize), PAGE_SIZE);
new = nfs_create_request(desc->ctx, page, 0, aligned_len);
new = nfs_page_create_from_page(desc->ctx, page, 0,
page_index(page) << PAGE_SHIFT, aligned_len);
if (IS_ERR(new))
goto out_error;

View File

@ -419,6 +419,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
{ NFS_MOUNT_FORCE_RDIRPLUS, ",rdirplus=force", "" },
{ NFS_MOUNT_UNSHARED, ",nosharecache", "" },
{ NFS_MOUNT_NORESVPORT, ",noresvport", "" },
{ NFS_MOUNT_NETUNREACH_FATAL,
",fatal_neterrors=ENETDOWN:ENETUNREACH",
",fatal_neterrors=none" },
{ 0, NULL, NULL }
};
const struct proc_nfs_info *nfs_infop;

View File

@ -57,7 +57,8 @@ static const struct nfs_pgio_completion_ops nfs_async_write_completion_ops;
static const struct nfs_commit_completion_ops nfs_commit_completion_ops;
static const struct nfs_rw_ops nfs_rw_write_ops;
static void nfs_inode_remove_request(struct nfs_page *req);
static void nfs_clear_request_commit(struct nfs_page *req);
static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
struct nfs_page *req);
static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
struct inode *inode);
static struct nfs_page *
@ -451,8 +452,8 @@ nfs_destroy_unlinked_subrequests(struct nfs_page *destroy_list,
* the (former) group. All subrequests are removed from any write or commit
* lists, unlinked from the group and destroyed.
*/
void
nfs_join_page_group(struct nfs_page *head, struct inode *inode)
void nfs_join_page_group(struct nfs_page *head, struct nfs_commit_info *cinfo,
struct inode *inode)
{
struct nfs_page *subreq;
struct nfs_page *destroy_list = NULL;
@ -482,7 +483,7 @@ nfs_join_page_group(struct nfs_page *head, struct inode *inode)
* Commit list removal accounting is done after locks are dropped */
subreq = head;
do {
nfs_clear_request_commit(subreq);
nfs_clear_request_commit(cinfo, subreq);
subreq = subreq->wb_this_page;
} while (subreq != head);
@ -567,8 +568,10 @@ nfs_lock_and_join_requests(struct page *page)
{
struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *head, *subreq;
struct nfs_commit_info cinfo;
int ret;
nfs_init_cinfo_from_inode(&cinfo, inode);
/*
* A reference is taken only on the head request which acts as a
* reference to the whole page group - the group will not be destroyed
@ -611,7 +614,7 @@ retry:
nfs_page_group_unlock(head);
nfs_join_page_group(head, inode);
nfs_join_page_group(head, &cinfo, inode);
return head;
out_unlock:
@ -978,18 +981,16 @@ nfs_clear_page_commit(struct page *page)
}
/* Called holding the request lock on @req */
static void
nfs_clear_request_commit(struct nfs_page *req)
static void nfs_clear_request_commit(struct nfs_commit_info *cinfo,
struct nfs_page *req)
{
if (test_bit(PG_CLEAN, &req->wb_flags)) {
struct nfs_open_context *ctx = nfs_req_openctx(req);
struct inode *inode = d_inode(ctx->dentry);
struct nfs_commit_info cinfo;
nfs_init_cinfo_from_inode(&cinfo, inode);
mutex_lock(&NFS_I(inode)->commit_mutex);
if (!pnfs_clear_request_commit(req, &cinfo)) {
nfs_request_remove_commit_list(req, &cinfo);
if (!pnfs_clear_request_commit(req, cinfo)) {
nfs_request_remove_commit_list(req, cinfo);
}
mutex_unlock(&NFS_I(inode)->commit_mutex);
nfs_clear_page_commit(req->wb_page);
@ -1175,11 +1176,13 @@ static struct nfs_page * nfs_setup_write_request(struct nfs_open_context* ctx,
{
struct inode *inode = page_file_mapping(page)->host;
struct nfs_page *req;
loff_t file_offset;
req = nfs_try_to_update_request(inode, page, offset, bytes);
if (req != NULL)
goto out;
req = nfs_create_request(ctx, page, offset, bytes);
file_offset = ((loff_t)page_index(page) << PAGE_SHIFT) + offset;
req = nfs_page_create_from_page(ctx, page, offset, file_offset, bytes);
if (IS_ERR(req))
goto out;
nfs_inode_add_request(inode, req);
@ -1572,25 +1575,37 @@ static int nfs_writeback_done(struct rpc_task *task,
nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
trace_nfs_writeback_done(task, hdr);
if (hdr->res.verf->committed < hdr->args.stable &&
task->tk_status >= 0) {
/* We tried a write call, but the server did not
* commit data to stable storage even though we
* requested it.
* Note: There is a known bug in Tru64 < 5.0 in which
* the server reports NFS_DATA_SYNC, but performs
* NFS_FILE_SYNC. We therefore implement this checking
* as a dprintk() in order to avoid filling syslog.
*/
static unsigned long complain;
if (task->tk_status >= 0) {
enum nfs3_stable_how committed = hdr->res.verf->committed;
/* Note this will print the MDS for a DS write */
if (time_before(complain, jiffies)) {
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
NFS_SERVER(inode)->nfs_client->cl_hostname,
hdr->res.verf->committed, hdr->args.stable);
complain = jiffies + 300 * HZ;
if (committed == NFS_UNSTABLE) {
/*
* We have some uncommitted data on the server at
* this point, so ensure that we keep track of that
* fact irrespective of what later writes do.
*/
set_bit(NFS_IOHDR_UNSTABLE_WRITES, &hdr->flags);
}
if (committed < hdr->args.stable) {
/* We tried a write call, but the server did not
* commit data to stable storage even though we
* requested it.
* Note: There is a known bug in Tru64 < 5.0 in which
* the server reports NFS_DATA_SYNC, but performs
* NFS_FILE_SYNC. We therefore implement this checking
* as a dprintk() in order to avoid filling syslog.
*/
static unsigned long complain;
/* Note this will print the MDS for a DS write */
if (time_before(complain, jiffies)) {
dprintk("NFS: faulty NFS server %s:"
" (committed = %d) != (stable = %d)\n",
NFS_SERVER(inode)->nfs_client->cl_hostname,
committed, hdr->args.stable);
complain = jiffies + 300 * HZ;
}
}
}

View File

@ -296,6 +296,7 @@ enum nfsstat4 {
/* error codes for internal client use */
#define NFS4ERR_RESET_TO_MDS 12001
#define NFS4ERR_RESET_TO_PNFS 12002
#define NFS4ERR_FATAL_IOERROR 12003
static inline bool seqid_mutating_err(u32 err)
{

View File

@ -45,11 +45,6 @@
*/
#define NFS_MAX_TRANSPORTS 16
/*
* These are the default flags for swap requests
*/
#define NFS_RPC_SWAPFLAGS (RPC_TASK_SWAPPER|RPC_TASK_ROOTCREDS)
/*
* Size of the NFS directory verifier
*/

View File

@ -49,6 +49,7 @@ struct nfs_client {
#define NFS_CS_DS 7 /* - Server is a DS */
#define NFS_CS_REUSEPORT 8 /* - reuse src port on reconnect */
#define NFS_CS_PNFS 9 /* - Server used for pnfs */
#define NFS_CS_NETUNREACH_FATAL 10 /* - ENETUNREACH errors are fatal */
struct sockaddr_storage cl_addr; /* server identifier */
size_t cl_addrlen;
char * cl_hostname; /* hostname of server */
@ -159,6 +160,7 @@ struct nfs_server {
#define NFS_MOUNT_WRITE_WAIT 0x02000000
#define NFS_MOUNT_TRUNK_DISCOVERY 0x04000000
#define NFS_MOUNT_FORCE_RDIRPLUS 0x20000000
#define NFS_MOUNT_NETUNREACH_FATAL 0x40000000
unsigned int caps; /* server capabilities */
unsigned int rsize; /* read size */

View File

@ -117,10 +117,11 @@ struct nfs_pageio_descriptor {
#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
extern struct nfs_page *nfs_create_request(struct nfs_open_context *ctx,
struct page *page,
unsigned int offset,
unsigned int count);
extern struct nfs_page *nfs_page_create_from_page(struct nfs_open_context *ctx,
struct page *page,
unsigned int pgbase,
loff_t offset,
unsigned int count);
extern void nfs_release_request(struct nfs_page *);
@ -144,7 +145,9 @@ extern int nfs_wait_on_request(struct nfs_page *);
extern void nfs_unlock_request(struct nfs_page *req);
extern void nfs_unlock_and_release_request(struct nfs_page *);
extern struct nfs_page *nfs_page_group_lock_head(struct nfs_page *req);
extern void nfs_join_page_group(struct nfs_page *head, struct inode *inode);
extern void nfs_join_page_group(struct nfs_page *head,
struct nfs_commit_info *cinfo,
struct inode *inode);
extern int nfs_page_group_lock(struct nfs_page *);
extern void nfs_page_group_unlock(struct nfs_page *);
extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);

View File

@ -1571,6 +1571,7 @@ enum {
NFS_IOHDR_STAT,
NFS_IOHDR_RESEND_PNFS,
NFS_IOHDR_RESEND_MDS,
NFS_IOHDR_UNSTABLE_WRITES,
};
struct nfs_io_completion;

View File

@ -34,6 +34,9 @@
#define NVME_NSID_ALL 0xffffffff
/* Special NSSR value, 'NVMe' */
#define NVME_SUBSYS_RESET 0x4E564D65
enum nvme_subsys_type {
NVME_NQN_DISC = 1, /* Discovery type target subsystem */
NVME_NQN_NVME = 2, /* NVME type target subsystem */

View File

@ -57,7 +57,9 @@ struct rpc_clnt {
cl_discrtry : 1,/* disconnect before retry */
cl_noretranstimeo: 1,/* No retransmit timeouts */
cl_autobind : 1,/* use getport() */
cl_chatty : 1;/* be verbose */
cl_chatty : 1,/* be verbose */
cl_netunreach_fatal : 1;
/* Treat ENETUNREACH errors as fatal */
struct rpc_rtt * cl_rtt; /* RTO estimator data */
const struct rpc_timeout *cl_timeout; /* Timeout strategy */
@ -140,6 +142,8 @@ struct rpc_create_args {
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
const struct cred *cred;
unsigned int max_connect;
unsigned long connect_timeout;
unsigned long reconnect_timeout;
};
struct rpc_add_xprt_test {
@ -162,6 +166,7 @@ struct rpc_add_xprt_test {
#define RPC_CLNT_CREATE_SOFTERR (1UL << 10)
#define RPC_CLNT_CREATE_REUSEPORT (1UL << 11)
#define RPC_CLNT_CREATE_CONNECTED (1UL << 12)
#define RPC_CLNT_CREATE_NETUNREACH_FATAL (1UL << 13)
struct rpc_clnt *rpc_create(struct rpc_create_args *args);
struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,

View File

@ -123,7 +123,7 @@ struct rpc_task_setup {
#define RPC_TASK_MOVEABLE 0x0004 /* nfs4.1+ rpc tasks */
#define RPC_TASK_NULLCREDS 0x0010 /* Use AUTH_NULL credential */
#define RPC_CALL_MAJORSEEN 0x0020 /* major timeout seen */
#define RPC_TASK_ROOTCREDS 0x0040 /* force root creds */
#define RPC_TASK_NETUNREACH_FATAL 0x0040 /* ENETUNREACH is fatal */
#define RPC_TASK_DYNAMIC 0x0080 /* task was kmalloc'ed */
#define RPC_TASK_NO_ROUND_ROBIN 0x0100 /* send requests on "main" xprt */
#define RPC_TASK_SOFT 0x0200 /* Use soft timeouts */

View File

@ -332,6 +332,8 @@ struct xprt_create {
struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
struct rpc_xprt_switch *bc_xps;
unsigned int flags;
unsigned long connect_timeout;
unsigned long reconnect_timeout;
};
struct xprt_class {

View File

@ -310,7 +310,7 @@ TRACE_EVENT(rpc_request,
{ RPC_TASK_SWAPPER, "SWAPPER" }, \
{ RPC_TASK_NULLCREDS, "NULLCREDS" }, \
{ RPC_CALL_MAJORSEEN, "MAJORSEEN" }, \
{ RPC_TASK_ROOTCREDS, "ROOTCREDS" }, \
{ RPC_TASK_NETUNREACH_FATAL, "NETUNREACH_FATAL"}, \
{ RPC_TASK_DYNAMIC, "DYNAMIC" }, \
{ RPC_TASK_NO_ROUND_ROBIN, "NO_ROUND_ROBIN" }, \
{ RPC_TASK_SOFT, "SOFT" }, \

View File

@ -2177,6 +2177,7 @@ err2:
nft_use_dec_restore(&table->use);
err_use:
nf_tables_unregister_hook(net, table, chain);
synchronize_rcu();
err1:
nf_tables_chain_destroy(ctx);

View File

@ -665,7 +665,7 @@ rpcauth_bindcred(struct rpc_task *task, const struct cred *cred, int flags)
/* If machine cred couldn't be bound, try a root cred */
if (new)
;
else if (cred == &machine_cred || (flags & RPC_TASK_ROOTCREDS))
else if (cred == &machine_cred)
new = rpcauth_bind_root_cred(task, lookupflags);
else if (flags & RPC_TASK_NULLCREDS)
new = authnull_ops.lookup_cred(NULL, NULL, 0);

View File

@ -509,6 +509,8 @@ static struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args,
clnt->cl_discrtry = 1;
if (!(args->flags & RPC_CLNT_CREATE_QUIET))
clnt->cl_chatty = 1;
if (args->flags & RPC_CLNT_CREATE_NETUNREACH_FATAL)
clnt->cl_netunreach_fatal = 1;
return clnt;
}
@ -534,6 +536,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args)
.addrlen = args->addrsize,
.servername = args->servername,
.bc_xprt = args->bc_xprt,
.connect_timeout = args->connect_timeout,
.reconnect_timeout = args->reconnect_timeout,
};
char servername[48];
struct rpc_clnt *clnt;
@ -652,6 +656,7 @@ static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
new->cl_noretranstimeo = clnt->cl_noretranstimeo;
new->cl_discrtry = clnt->cl_discrtry;
new->cl_chatty = clnt->cl_chatty;
new->cl_netunreach_fatal = clnt->cl_netunreach_fatal;
new->cl_principal = clnt->cl_principal;
new->cl_max_connect = clnt->cl_max_connect;
return new;
@ -1100,6 +1105,8 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
task->tk_flags |= RPC_TASK_TIMEOUT;
if (clnt->cl_noretranstimeo)
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
if (clnt->cl_netunreach_fatal)
task->tk_flags |= RPC_TASK_NETUNREACH_FATAL;
if (atomic_read(&clnt->cl_swapper))
task->tk_flags |= RPC_TASK_SWAPPER;
/* Add to the client's list of all tasks */
@ -1997,14 +2004,17 @@ call_bind_status(struct rpc_task *task)
case -EPROTONOSUPPORT:
trace_rpcb_bind_version_err(task);
goto retry_timeout;
case -ENETDOWN:
case -ENETUNREACH:
if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL)
break;
fallthrough;
case -ECONNREFUSED: /* connection problems */
case -ECONNRESET:
case -ECONNABORTED:
case -ENOTCONN:
case -EHOSTDOWN:
case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
case -EPIPE:
trace_rpcb_unreachable_err(task);
if (!RPC_IS_SOFTCONN(task)) {
@ -2086,19 +2096,22 @@ call_connect_status(struct rpc_task *task)
task->tk_status = 0;
switch (status) {
case -ECONNREFUSED:
/* A positive refusal suggests a rebind is needed. */
if (RPC_IS_SOFTCONN(task))
break;
if (clnt->cl_autobind) {
rpc_force_rebind(clnt);
goto out_retry;
}
/* fall through */
case -ECONNRESET:
case -ECONNABORTED:
case -ENETDOWN:
case -ENETUNREACH:
if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL)
break;
fallthrough;
case -ECONNREFUSED:
case -ECONNRESET:
/* A positive refusal suggests a rebind is needed. */
if (clnt->cl_autobind) {
rpc_force_rebind(clnt);
if (RPC_IS_SOFTCONN(task))
break;
goto out_retry;
}
fallthrough;
case -ECONNABORTED:
case -EHOSTUNREACH:
case -EPIPE:
case -EPROTO:
@ -2108,7 +2121,7 @@ call_connect_status(struct rpc_task *task)
break;
/* retry with existing socket, after a delay */
rpc_delay(task, 3*HZ);
/* fall through */
fallthrough;
case -EADDRINUSE:
case -ENOTCONN:
case -EAGAIN:
@ -2349,10 +2362,13 @@ call_status(struct rpc_task *task)
trace_rpc_call_status(task);
task->tk_status = 0;
switch(status) {
case -EHOSTDOWN:
case -ENETDOWN:
case -EHOSTUNREACH:
case -ENETUNREACH:
if (task->tk_flags & RPC_TASK_NETUNREACH_FATAL)
goto out_exit;
fallthrough;
case -EHOSTDOWN:
case -EHOSTUNREACH:
case -EPERM:
if (RPC_IS_SOFTCONN(task))
goto out_exit;
@ -2977,6 +2993,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt,
}
xprt->resvport = resvport;
xprt->reuseport = reuseport;
if (xprtargs->connect_timeout)
connect_timeout = xprtargs->connect_timeout;
if (xprtargs->reconnect_timeout)
reconnect_timeout = xprtargs->reconnect_timeout;
if (xprt->ops->set_connect_timeout != NULL)
xprt->ops->set_connect_timeout(xprt,
connect_timeout,

View File

@ -2168,9 +2168,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
struct socket *sock)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct net *net = sock_net(sock->sk);
unsigned long connect_timeout;
unsigned long syn_retries;
unsigned int keepidle;
unsigned int keepcnt;
unsigned int timeo;
unsigned long t;
spin_lock(&xprt->transport_lock);
keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ);
@ -2191,6 +2195,35 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
/* TCP user timeout (see RFC5482) */
kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
(char *)&timeo, sizeof(timeo));
/* Connect timeout */
connect_timeout = max_t(unsigned long,
DIV_ROUND_UP(xprt->connect_timeout, HZ), 1);
syn_retries = max_t(unsigned long,
READ_ONCE(net->ipv4.sysctl_tcp_syn_retries), 1);
for (t = 0; t <= syn_retries && (1UL << t) < connect_timeout; t++)
;
if (t <= syn_retries)
tcp_sock_set_syncnt(sock->sk, t - 1);
}
static void xs_tcp_do_set_connect_timeout(struct rpc_xprt *xprt,
unsigned long connect_timeout)
{
struct sock_xprt *transport =
container_of(xprt, struct sock_xprt, xprt);
struct rpc_timeout to;
unsigned long initval;
memcpy(&to, xprt->timeout, sizeof(to));
/* Arbitrary lower limit */
initval = max_t(unsigned long, connect_timeout, XS_TCP_INIT_REEST_TO);
to.to_initval = initval;
to.to_maxval = initval;
to.to_retries = 0;
memcpy(&transport->tcp_timeout, &to, sizeof(transport->tcp_timeout));
xprt->timeout = &transport->tcp_timeout;
xprt->connect_timeout = connect_timeout;
}
static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
@ -2198,25 +2231,12 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
unsigned long reconnect_timeout)
{
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct rpc_timeout to;
unsigned long initval;
spin_lock(&xprt->transport_lock);
if (reconnect_timeout < xprt->max_reconnect_timeout)
xprt->max_reconnect_timeout = reconnect_timeout;
if (connect_timeout < xprt->connect_timeout) {
memcpy(&to, xprt->timeout, sizeof(to));
initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1);
/* Arbitrary lower limit */
if (initval < XS_TCP_INIT_REEST_TO << 1)
initval = XS_TCP_INIT_REEST_TO << 1;
to.to_initval = initval;
to.to_maxval = initval;
memcpy(&transport->tcp_timeout, &to,
sizeof(transport->tcp_timeout));
xprt->timeout = &transport->tcp_timeout;
xprt->connect_timeout = connect_timeout;
}
if (connect_timeout < xprt->connect_timeout)
xs_tcp_do_set_connect_timeout(xprt, connect_timeout);
set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state);
spin_unlock(&xprt->transport_lock);
}
@ -3011,8 +3031,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
xprt->timeout = &xs_tcp_default_timeout;
xprt->max_reconnect_timeout = xprt->timeout->to_maxval;
if (args->reconnect_timeout)
xprt->max_reconnect_timeout = args->reconnect_timeout;
xprt->connect_timeout = xprt->timeout->to_initval *
(xprt->timeout->to_retries + 1);
if (args->connect_timeout)
xs_tcp_do_set_connect_timeout(xprt, args->connect_timeout);
INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn);
INIT_WORK(&transport->error_worker, xs_error_handle);