diff --git a/Makefile.rhelver b/Makefile.rhelver index 76dfd0e62d..bffe118e16 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 10 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 553.89.1 +RHEL_RELEASE = 553.92.1 # # ZSTREAM diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 5a342a9b69..3b66750888 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -2,12 +2,14 @@ #include #include #include +#include #include #include #include #include #include +#include #include "../perf_event.h" @@ -1541,6 +1543,27 @@ static u64 get_data_src(struct perf_event *event, u64 aux) return val; } +static void setup_pebs_time(struct perf_event *event, + struct perf_sample_data *data, + u64 tsc) +{ + /* Converting to a user-defined clock is not supported yet. */ + if (event->attr.use_clockid != 0) + return; + + /* + * Doesn't support the conversion when the TSC is unstable. + * The TSC unstable case is a corner case and very unlikely to + * happen. If it happens, the TSC in a PEBS record will be + * dropped and fall back to perf_event_clock(). + */ + if (!using_native_sched_clock() || !sched_clock_stable()) + return; + + data->time = native_sched_clock_from_tsc(tsc) + __sched_clock_offset; + data->sample_flags |= PERF_SAMPLE_TIME; +} + static void setup_pebs_fixed_sample_data(struct perf_event *event, struct pt_regs *iregs, void *__pebs, struct perf_sample_data *data, @@ -1675,9 +1698,8 @@ static void setup_pebs_fixed_sample_data(struct perf_event *event, * * We can only do this for the default trace clock. */ - if (x86_pmu.intel_cap.pebs_format >= 3 && - event->attr.use_clockid == 0) - data->time = native_sched_clock_from_tsc(pebs->tsc); + if (x86_pmu.intel_cap.pebs_format >= 3) + setup_pebs_time(event, data, pebs->tsc); if (has_branch_stack(event)) data->br_stack = &cpuc->lbr_stack; @@ -1739,8 +1761,7 @@ static void setup_pebs_adaptive_sample_data(struct perf_event *event, perf_sample_data_init(data, 0, event->hw.last_period); data->period = event->hw.last_period; - if (event->attr.use_clockid == 0) - data->time = native_sched_clock_from_tsc(basic->tsc); + setup_pebs_time(event, data, basic->tsc); /* * We must however always use iregs for the unwinder to stay sane; the diff --git a/block/blk-mq.c b/block/blk-mq.c index b596285960..bf69e69508 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -25,6 +25,7 @@ #include #include #include +#include RH_KABI_HIDE_INCLUDE() #include @@ -1611,6 +1612,15 @@ static inline int blk_mq_first_mapped_cpu(struct blk_mq_hw_ctx *hctx) return cpu; } +/* + * ->next_cpu is always calculated from hctx->cpumask, so simply use + * it for speeding up the check + */ +static bool blk_mq_hctx_empty_cpumask(struct blk_mq_hw_ctx *hctx) +{ + return hctx->next_cpu >= nr_cpu_ids; +} + /* * It'd be great if the workqueue API had a way to pass * in a mask and had some smarts for more clever placement. @@ -1622,7 +1632,8 @@ static int blk_mq_hctx_next_cpu(struct blk_mq_hw_ctx *hctx) bool tried = false; int next_cpu = hctx->next_cpu; - if (hctx->queue->nr_hw_queues == 1) + /* Switch to unbound if no allowable CPUs in this hctx */ + if (hctx->queue->nr_hw_queues == 1 || blk_mq_hctx_empty_cpumask(hctx)) return WORK_CPU_UNBOUND; if (--hctx->next_cpu_batch <= 0) { @@ -2648,14 +2659,30 @@ static bool blk_mq_hctx_has_requests(struct blk_mq_hw_ctx *hctx) return data.has_rq; } -static inline bool blk_mq_last_cpu_in_hctx(unsigned int cpu, - struct blk_mq_hw_ctx *hctx) +static bool blk_mq_hctx_has_online_cpu(struct blk_mq_hw_ctx *hctx, + unsigned int this_cpu) { - if (cpumask_next_and(-1, hctx->cpumask, cpu_online_mask) != cpu) - return false; - if (cpumask_next_and(cpu, hctx->cpumask, cpu_online_mask) < nr_cpu_ids) - return false; - return true; + enum hctx_type type = hctx->type; + int cpu; + + /* + * hctx->cpumask has to rule out isolated CPUs, but userspace still + * might submit IOs on these isolated CPUs, so use the queue map to + * check if all CPUs mapped to this hctx are offline + */ + for_each_online_cpu(cpu) { + struct blk_mq_hw_ctx *h = blk_mq_map_queue_type(hctx->queue, + type, cpu); + + if (h != hctx) + continue; + + /* this hctx has at least one online CPU */ + if (this_cpu != cpu) + return true; + } + + return false; } static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node) @@ -2663,8 +2690,7 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node) struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_online); - if (!cpumask_test_cpu(cpu, hctx->cpumask) || - !blk_mq_last_cpu_in_hctx(cpu, hctx)) + if (blk_mq_hctx_has_online_cpu(hctx, cpu)) return 0; /* @@ -2691,12 +2717,28 @@ static int blk_mq_hctx_notify_offline(unsigned int cpu, struct hlist_node *node) return 0; } +/* + * Check if one CPU is mapped to the specified hctx + * + * Isolated CPUs have been ruled out from hctx->cpumask, which is supposed + * to be used for scheduling kworker only. For other usage, please call this + * helper for checking if one CPU belongs to the specified hctx + */ +static bool blk_mq_cpu_mapped_to_hctx(unsigned int cpu, + const struct blk_mq_hw_ctx *hctx) +{ + struct blk_mq_hw_ctx *mapped_hctx = blk_mq_map_queue_type(hctx->queue, + hctx->type, cpu); + + return mapped_hctx == hctx; +} + static int blk_mq_hctx_notify_online(unsigned int cpu, struct hlist_node *node) { struct blk_mq_hw_ctx *hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_online); - if (cpumask_test_cpu(cpu, hctx->cpumask)) + if (blk_mq_cpu_mapped_to_hctx(cpu, hctx)) clear_bit(BLK_MQ_S_INACTIVE, &hctx->state); return 0; } @@ -2714,7 +2756,7 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node) enum hctx_type type; hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead); - if (!cpumask_test_cpu(cpu, hctx->cpumask)) + if (!blk_mq_cpu_mapped_to_hctx(cpu, hctx)) return 0; ctx = __blk_mq_get_ctx(hctx->queue, cpu); @@ -3057,6 +3099,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) } queue_for_each_hw_ctx(q, hctx, i) { + int cpu; + /* * If no software queues are mapped to this hardware queue, * disable it and free the request entries. @@ -3083,6 +3127,15 @@ static void blk_mq_map_swqueue(struct request_queue *q) */ sbitmap_resize(&hctx->ctx_map, hctx->nr_ctx); + /* + * Rule out isolated CPUs from hctx->cpumask to avoid + * running block kworker on isolated CPUs + */ + for_each_cpu(cpu, hctx->cpumask) { + if (cpu_is_isolated(cpu)) + cpumask_clear_cpu(cpu, hctx->cpumask); + } + /* * Initialize batch roundrobin counts */ @@ -3392,6 +3445,12 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, if (!q->poll_cb) goto err_exit; + /* + * ->tag_set has to be setup before initialize hctx, which cpuphp + * handler needs it for checking queue mapping + */ + q->tag_set = set; + if (blk_mq_alloc_ctxs(q)) goto err_exit; @@ -3408,8 +3467,6 @@ struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, INIT_WORK(&q->timeout_work, blk_mq_timeout_work); blk_queue_rq_timeout(q, set->timeout ? set->timeout : 30 * HZ); - q->tag_set = set; - q->queue_flags |= QUEUE_FLAG_MQ_DEFAULT; if (set->nr_maps > HCTX_TYPE_POLL && set->map[HCTX_TYPE_POLL].nr_queues) diff --git a/drivers/media/rc/imon.c b/drivers/media/rc/imon.c index 1041c05685..eb89608895 100644 --- a/drivers/media/rc/imon.c +++ b/drivers/media/rc/imon.c @@ -92,6 +92,7 @@ struct imon_usb_dev_descr { __u16 flags; #define IMON_NO_FLAGS 0 #define IMON_NEED_20MS_PKT_DELAY 1 +#define IMON_SUPPRESS_REPEATED_KEYS 2 struct imon_panel_key_table key_table[]; }; @@ -158,8 +159,27 @@ struct imon_context { struct timer_list ttimer; /* touch screen timer */ int touch_x; /* x coordinate on touchscreen */ int touch_y; /* y coordinate on touchscreen */ - struct imon_usb_dev_descr *dev_descr; /* device description with key - table for front panels */ + const struct imon_usb_dev_descr *dev_descr; + /* device description with key */ + /* table for front panels */ + /* + * Fields for deferring free_imon_context(). + * + * Since reference to "struct imon_context" is stored into + * "struct file"->private_data, we need to remember + * how many file descriptors might access this "struct imon_context". + */ + refcount_t users; + /* + * Use a flag for telling display_open()/vfd_write()/lcd_write() that + * imon_disconnect() was already called. + */ + bool disconnected; + /* + * We need to wait for RCU grace period in order to allow + * display_open() to safely check ->disconnected and increment ->users. + */ + struct rcu_head rcu; }; #define TOUCH_TIMEOUT (HZ/30) @@ -167,18 +187,18 @@ struct imon_context { /* vfd character device file operations */ static const struct file_operations vfd_fops = { .owner = THIS_MODULE, - .open = &display_open, - .write = &vfd_write, - .release = &display_close, + .open = display_open, + .write = vfd_write, + .release = display_close, .llseek = noop_llseek, }; /* lcd character device file operations */ static const struct file_operations lcd_fops = { .owner = THIS_MODULE, - .open = &display_open, - .write = &lcd_write, - .release = &display_close, + .open = display_open, + .write = lcd_write, + .release = display_close, .llseek = noop_llseek, }; @@ -324,6 +344,32 @@ static const struct imon_usb_dev_descr imon_DH102 = { } }; +/* imon ultrabay front panel key table */ +static const struct imon_usb_dev_descr ultrabay_table = { + .flags = IMON_SUPPRESS_REPEATED_KEYS, + .key_table = { + { 0x0000000f0000ffeell, KEY_MEDIA }, /* Go */ + { 0x000000000100ffeell, KEY_UP }, + { 0x000000000001ffeell, KEY_DOWN }, + { 0x000000160000ffeell, KEY_ENTER }, + { 0x0000001f0000ffeell, KEY_AUDIO }, /* Music */ + { 0x000000200000ffeell, KEY_VIDEO }, /* Movie */ + { 0x000000210000ffeell, KEY_CAMERA }, /* Photo */ + { 0x000000270000ffeell, KEY_DVD }, /* DVD */ + { 0x000000230000ffeell, KEY_TV }, /* TV */ + { 0x000000050000ffeell, KEY_PREVIOUS }, /* Previous */ + { 0x000000070000ffeell, KEY_REWIND }, + { 0x000000040000ffeell, KEY_STOP }, + { 0x000000020000ffeell, KEY_PLAYPAUSE }, + { 0x000000080000ffeell, KEY_FASTFORWARD }, + { 0x000000060000ffeell, KEY_NEXT }, /* Next */ + { 0x000100000000ffeell, KEY_VOLUMEUP }, + { 0x010000000000ffeell, KEY_VOLUMEDOWN }, + { 0x000000010000ffeell, KEY_MUTE }, + { 0, KEY_RESERVED }, + } +}; + /* * USB Device ID for iMON USB Control Boards * @@ -420,9 +466,6 @@ static struct usb_driver imon_driver = { .id_table = imon_usb_id_table, }; -/* to prevent races between open() and disconnect(), probing, etc */ -static DEFINE_MUTEX(driver_lock); - /* Module bookkeeping bits */ MODULE_AUTHOR(MOD_AUTHOR); MODULE_DESCRIPTION(MOD_DESC); @@ -462,9 +505,11 @@ static void free_imon_context(struct imon_context *ictx) struct device *dev = ictx->dev; usb_free_urb(ictx->tx_urb); + WARN_ON(ictx->dev_present_intf0); usb_free_urb(ictx->rx_urb_intf0); + WARN_ON(ictx->dev_present_intf1); usb_free_urb(ictx->rx_urb_intf1); - kfree(ictx); + kfree_rcu(ictx, rcu); dev_dbg(dev, "%s: iMON context freed\n", __func__); } @@ -480,9 +525,6 @@ static int display_open(struct inode *inode, struct file *file) int subminor; int retval = 0; - /* prevent races with disconnect */ - mutex_lock(&driver_lock); - subminor = iminor(inode); interface = usb_find_interface(&imon_driver, subminor); if (!interface) { @@ -490,17 +532,22 @@ static int display_open(struct inode *inode, struct file *file) retval = -ENODEV; goto exit; } - ictx = usb_get_intfdata(interface); - if (!ictx) { + rcu_read_lock(); + ictx = usb_get_intfdata(interface); + if (!ictx || ictx->disconnected || !refcount_inc_not_zero(&ictx->users)) { + rcu_read_unlock(); pr_err("no context found for minor %d\n", subminor); retval = -ENODEV; goto exit; } + rcu_read_unlock(); mutex_lock(&ictx->lock); - if (!ictx->display_supported) { + if (ictx->disconnected) { + retval = -ENODEV; + } else if (!ictx->display_supported) { pr_err("display not supported by device\n"); retval = -ENODEV; } else if (ictx->display_isopen) { @@ -514,8 +561,10 @@ static int display_open(struct inode *inode, struct file *file) mutex_unlock(&ictx->lock); + if (retval && refcount_dec_and_test(&ictx->users)) + free_imon_context(ictx); + exit: - mutex_unlock(&driver_lock); return retval; } @@ -525,16 +574,9 @@ exit: */ static int display_close(struct inode *inode, struct file *file) { - struct imon_context *ictx = NULL; + struct imon_context *ictx = file->private_data; int retval = 0; - ictx = file->private_data; - - if (!ictx) { - pr_err("no context for device\n"); - return -ENODEV; - } - mutex_lock(&ictx->lock); if (!ictx->display_supported) { @@ -549,6 +591,8 @@ static int display_close(struct inode *inode, struct file *file) } mutex_unlock(&ictx->lock); + if (refcount_dec_and_test(&ictx->users)) + free_imon_context(ictx); return retval; } @@ -565,6 +609,9 @@ static int send_packet(struct imon_context *ictx) int retval = 0; struct usb_ctrlrequest *control_req = NULL; + if (ictx->disconnected) + return -ENODEV; + /* Check if we need to use control or interrupt urb */ if (!ictx->tx_control) { pipe = usb_sndintpipe(ictx->usbdev_intf0, @@ -613,15 +660,14 @@ static int send_packet(struct imon_context *ictx) pr_err_ratelimited("error submitting urb(%d)\n", retval); } else { /* Wait for transmission to complete (or abort) */ - mutex_unlock(&ictx->lock); retval = wait_for_completion_interruptible( &ictx->tx.finished); if (retval) { usb_kill_urb(ictx->tx_urb); pr_err_ratelimited("task interrupted\n"); } - mutex_lock(&ictx->lock); + ictx->tx.busy = false; retval = ictx->tx.status; if (retval) pr_err_ratelimited("packet tx failed (%d)\n", retval); @@ -918,17 +964,17 @@ static ssize_t vfd_write(struct file *file, const char __user *buf, int offset; int seq; int retval = 0; - struct imon_context *ictx; + struct imon_context *ictx = file->private_data; static const unsigned char vfd_packet6[] = { 0x01, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF }; - ictx = file->private_data; - if (!ictx) { - pr_err_ratelimited("no context for device\n"); - return -ENODEV; - } + if (mutex_lock_interruptible(&ictx->lock)) + return -ERESTARTSYS; - mutex_lock(&ictx->lock); + if (ictx->disconnected) { + retval = -ENODEV; + goto exit; + } if (!ictx->dev_present_intf0) { pr_err_ratelimited("no iMON device present\n"); @@ -1002,16 +1048,15 @@ static ssize_t lcd_write(struct file *file, const char __user *buf, size_t n_bytes, loff_t *pos) { int retval = 0; - struct imon_context *ictx; - - ictx = file->private_data; - if (!ictx) { - pr_err_ratelimited("no context for device\n"); - return -ENODEV; - } + struct imon_context *ictx = file->private_data; mutex_lock(&ictx->lock); + if (ictx->disconnected) { + retval = -ENODEV; + goto exit; + } + if (!ictx->display_supported) { pr_err_ratelimited("no iMON display present\n"); retval = -ENODEV; @@ -1273,9 +1318,11 @@ static u32 imon_mce_key_lookup(struct imon_context *ictx, u32 scancode) static u32 imon_panel_key_lookup(struct imon_context *ictx, u64 code) { - int i; + const struct imon_panel_key_table *key_table; u32 keycode = KEY_RESERVED; - struct imon_panel_key_table *key_table = ictx->dev_descr->key_table; + int i; + + key_table = ictx->dev_descr->key_table; for (i = 0; key_table[i].hw_code != 0; i++) { if (key_table[i].hw_code == (code | 0xffee)) { @@ -1559,7 +1606,6 @@ static void imon_incoming_packet(struct imon_context *ictx, u32 kc; u64 scancode; int press_type = 0; - long msec; ktime_t t; static ktime_t prev_time; u8 ktype; @@ -1662,14 +1708,16 @@ static void imon_incoming_packet(struct imon_context *ictx, spin_lock_irqsave(&ictx->kc_lock, flags); t = ktime_get(); - /* KEY_MUTE repeats from knob need to be suppressed */ - if (ictx->kc == KEY_MUTE && ictx->kc == ictx->last_keycode) { - msec = ktime_ms_delta(t, prev_time); - if (msec < ictx->idev->rep[REP_DELAY]) { + /* KEY repeats from knob and panel that need to be suppressed */ + if (ictx->kc == KEY_MUTE || + ictx->dev_descr->flags & IMON_SUPPRESS_REPEATED_KEYS) { + if (ictx->kc == ictx->last_keycode && + ktime_ms_delta(t, prev_time) < ictx->idev->rep[REP_DELAY]) { spin_unlock_irqrestore(&ictx->kc_lock, flags); return; } } + prev_time = t; kc = ictx->kc; @@ -1852,6 +1900,14 @@ static void imon_get_ffdc_type(struct imon_context *ictx) dev_info(ictx->dev, "0xffdc iMON Inside, iMON IR"); ictx->display_supported = false; break; + /* Soundgraph iMON UltraBay */ + case 0x98: + dev_info(ictx->dev, "0xffdc iMON UltraBay, LCD + IR"); + detected_display_type = IMON_DISPLAY_TYPE_LCD; + allowed_protos = RC_PROTO_BIT_IMON | RC_PROTO_BIT_RC6_MCE; + ictx->dev_descr = &ultrabay_table; + break; + default: dev_info(ictx->dev, "Unknown 0xffdc device, defaulting to VFD and iMON IR"); detected_display_type = IMON_DISPLAY_TYPE_VFD; @@ -1983,10 +2039,12 @@ out: static struct input_dev *imon_init_idev(struct imon_context *ictx) { - struct imon_panel_key_table *key_table = ictx->dev_descr->key_table; + const struct imon_panel_key_table *key_table; struct input_dev *idev; int ret, i; + key_table = ictx->dev_descr->key_table; + idev = input_allocate_device(); if (!idev) goto out; @@ -2369,7 +2427,6 @@ static int imon_probe(struct usb_interface *interface, int ifnum, sysfs_err; int ret = 0; struct imon_context *ictx = NULL; - struct imon_context *first_if_ctx = NULL; u16 vendor, product; usbdev = usb_get_dev(interface_to_usbdev(interface)); @@ -2381,17 +2438,12 @@ static int imon_probe(struct usb_interface *interface, dev_dbg(dev, "%s: found iMON device (%04x:%04x, intf%d)\n", __func__, vendor, product, ifnum); - /* prevent races probing devices w/multiple interfaces */ - mutex_lock(&driver_lock); - first_if = usb_ifnum_to_if(usbdev, 0); if (!first_if) { ret = -ENODEV; goto fail; } - first_if_ctx = usb_get_intfdata(first_if); - if (ifnum == 0) { ictx = imon_init_intf0(interface, id); if (!ictx) { @@ -2399,9 +2451,11 @@ static int imon_probe(struct usb_interface *interface, ret = -ENODEV; goto fail; } + refcount_set(&ictx->users, 1); } else { /* this is the secondary interface on the device */ + struct imon_context *first_if_ctx = usb_get_intfdata(first_if); /* fail early if first intf failed to register */ if (!first_if_ctx) { @@ -2415,14 +2469,13 @@ static int imon_probe(struct usb_interface *interface, ret = -ENODEV; goto fail; } + refcount_inc(&ictx->users); } usb_set_intfdata(interface, ictx); if (ifnum == 0) { - mutex_lock(&ictx->lock); - if (product == 0xffdc && ictx->rf_device) { sysfs_err = sysfs_create_group(&interface->dev.kobj, &imon_rf_attr_group); @@ -2433,21 +2486,17 @@ static int imon_probe(struct usb_interface *interface, if (ictx->display_supported) imon_init_display(ictx, interface); - - mutex_unlock(&ictx->lock); } dev_info(dev, "iMON device (%04x:%04x, intf%d) on usb<%d:%d> initialized\n", vendor, product, ifnum, usbdev->bus->busnum, usbdev->devnum); - mutex_unlock(&driver_lock); usb_put_dev(usbdev); return 0; fail: - mutex_unlock(&driver_lock); usb_put_dev(usbdev); dev_err(dev, "unable to register, err %d\n", ret); @@ -2463,10 +2512,12 @@ static void imon_disconnect(struct usb_interface *interface) struct device *dev; int ifnum; - /* prevent races with multi-interface device probing and display_open */ - mutex_lock(&driver_lock); - ictx = usb_get_intfdata(interface); + + mutex_lock(&ictx->lock); + ictx->disconnected = true; + mutex_unlock(&ictx->lock); + dev = ictx->dev; ifnum = interface->cur_altsetting->desc.bInterfaceNumber; @@ -2488,7 +2539,6 @@ static void imon_disconnect(struct usb_interface *interface) if (ifnum == 0) { ictx->dev_present_intf0 = false; usb_kill_urb(ictx->rx_urb_intf0); - usb_put_dev(ictx->usbdev_intf0); input_unregister_device(ictx->idev); rc_unregister_device(ictx->rdev); if (ictx->display_supported) { @@ -2497,21 +2547,20 @@ static void imon_disconnect(struct usb_interface *interface) else if (ictx->display_type == IMON_DISPLAY_TYPE_VFD) usb_deregister_dev(interface, &imon_vfd_class); } + usb_put_dev(ictx->usbdev_intf0); } else { ictx->dev_present_intf1 = false; usb_kill_urb(ictx->rx_urb_intf1); - usb_put_dev(ictx->usbdev_intf1); if (ictx->display_type == IMON_DISPLAY_TYPE_VGA) { input_unregister_device(ictx->touch); del_timer_sync(&ictx->ttimer); } + usb_put_dev(ictx->usbdev_intf1); } - if (!ictx->dev_present_intf0 && !ictx->dev_present_intf1) + if (refcount_dec_and_test(&ictx->users)) free_imon_context(ictx); - mutex_unlock(&driver_lock); - dev_dbg(dev, "%s: iMON device (intf%d) disconnected\n", __func__, ifnum); } diff --git a/fs/ceph/debugfs.c b/fs/ceph/debugfs.c index fbe0681b83..842589d2e9 100644 --- a/fs/ceph/debugfs.c +++ b/fs/ceph/debugfs.c @@ -55,8 +55,6 @@ static int mdsc_show(struct seq_file *s, void *p) struct ceph_mds_client *mdsc = fsc->mdsc; struct ceph_mds_request *req; struct rb_node *rp; - int pathlen = 0; - u64 pathbase; char *path; mutex_lock(&mdsc->mutex); @@ -81,8 +79,10 @@ static int mdsc_show(struct seq_file *s, void *p) if (req->r_inode) { seq_printf(s, " #%llx", ceph_ino(req->r_inode)); } else if (req->r_dentry) { - path = ceph_mdsc_build_path(req->r_dentry, &pathlen, - &pathbase, 0); + struct ceph_path_info path_info = {0}; + + path = ceph_mdsc_build_path(mdsc, req->r_dentry, + &path_info, 0); if (IS_ERR(path)) path = NULL; spin_lock(&req->r_dentry->d_lock); @@ -91,7 +91,7 @@ static int mdsc_show(struct seq_file *s, void *p) req->r_dentry, path ? path : ""); spin_unlock(&req->r_dentry->d_lock); - ceph_mdsc_free_path(path, pathlen); + ceph_mdsc_free_path_info(&path_info); } else if (req->r_path1) { seq_printf(s, " #%llx/%s", req->r_ino1.ino, req->r_path1); @@ -100,8 +100,10 @@ static int mdsc_show(struct seq_file *s, void *p) } if (req->r_old_dentry) { - path = ceph_mdsc_build_path(req->r_old_dentry, &pathlen, - &pathbase, 0); + struct ceph_path_info path_info = {0}; + + path = ceph_mdsc_build_path(mdsc, req->r_old_dentry, + &path_info, 0); if (IS_ERR(path)) path = NULL; spin_lock(&req->r_old_dentry->d_lock); @@ -111,7 +113,7 @@ static int mdsc_show(struct seq_file *s, void *p) req->r_old_dentry, path ? path : ""); spin_unlock(&req->r_old_dentry->d_lock); - ceph_mdsc_free_path(path, pathlen); + ceph_mdsc_free_path_info(&path_info); } else if (req->r_path2 && req->r_op != CEPH_MDS_OP_SYMLINK) { if (req->r_ino2.ino) seq_printf(s, " #%llx/%s", req->r_ino2.ino, diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 448a51eb60..20771a80f8 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1116,10 +1116,8 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, /* If op failed, mark everyone involved for errors */ if (result) { - int pathlen = 0; - u64 base = 0; - char *path = ceph_mdsc_build_path(dentry, &pathlen, - &base, 0); + struct ceph_path_info path_info = {0}; + char *path = ceph_mdsc_build_path(mdsc, dentry, &path_info, 0); /* mark error on parent + clear complete */ mapping_set_error(req->r_parent->i_mapping, result); @@ -1133,8 +1131,9 @@ static void ceph_async_unlink_cb(struct ceph_mds_client *mdsc, mapping_set_error(req->r_old_inode->i_mapping, result); pr_warn("async unlink failure path=(%llx)%s result=%d!\n", - base, IS_ERR(path) ? "<>" : path, result); - ceph_mdsc_free_path(path, pathlen); + path_info.vino.ino, IS_ERR(path) ? "<>" : path, + result); + ceph_mdsc_free_path_info(&path_info); } out: iput(req->r_old_inode); diff --git a/fs/ceph/file.c b/fs/ceph/file.c index bb6630d2ce..f57cba906d 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -525,11 +525,10 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, if (result) { struct dentry *dentry = req->r_dentry; + struct ceph_path_info path_info = {0}; struct inode *inode = d_inode(dentry); - int pathlen = 0; - u64 base = 0; - char *path = ceph_mdsc_build_path(req->r_dentry, &pathlen, - &base, 0); + char *path = ceph_mdsc_build_path(mdsc, req->r_dentry, + &path_info, 0); ceph_dir_clear_complete(req->r_parent); if (!d_unhashed(dentry)) @@ -538,8 +537,9 @@ static void ceph_async_create_cb(struct ceph_mds_client *mdsc, ceph_inode_shutdown(inode); pr_warn("async create failure path=(%llx)%s result=%d!\n", - base, IS_ERR(path) ? "<>" : path, result); - ceph_mdsc_free_path(path, pathlen); + path_info.vino.ino, IS_ERR(path) ? "<>" : path, + result); + ceph_mdsc_free_path_info(&path_info); } if (req->r_target_inode) { diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 696abab74f..c210845d31 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -52,6 +52,52 @@ static int ceph_set_ino_cb(struct inode *inode, void *data) return 0; } +/* + * Check if the parent inode matches the vino from directory reply info + */ +static inline bool ceph_vino_matches_parent(struct inode *parent, + struct ceph_vino vino) +{ + return ceph_ino(parent) == vino.ino && ceph_snap(parent) == vino.snap; +} + +/* + * Validate that the directory inode referenced by @req->r_parent matches the + * inode number and snapshot id contained in the reply's directory record. If + * they do not match - which can theoretically happen if the parent dentry was + * moved between the time the request was issued and the reply arrived - fall + * back to looking up the correct inode in the inode cache. + * + * A reference is *always* returned. Callers that receive a different inode + * than the original @parent are responsible for dropping the extra reference + * once the reply has been processed. + */ +static struct inode *ceph_get_reply_dir(struct super_block *sb, + struct inode *parent, + struct ceph_mds_reply_info_parsed *rinfo) +{ + struct ceph_vino vino; + + if (unlikely(!rinfo->diri.in)) + return parent; /* nothing to compare against */ + + /* If we didn't have a cached parent inode to begin with, just bail out. */ + if (!parent) + return NULL; + + vino.ino = le64_to_cpu(rinfo->diri.in->ino); + vino.snap = le64_to_cpu(rinfo->diri.in->snapid); + + if (likely(ceph_vino_matches_parent(parent, vino))) + return parent; /* matches - use the original reference */ + + /* Mismatch - this should be rare. Emit a WARN and obtain the correct inode. */ + WARN_ONCE(1, "ceph: reply dir mismatch (parent valid %llx.%llx reply %llx.%llx)\n", + ceph_ino(parent), ceph_snap(parent), vino.ino, vino.snap); + + return ceph_get_inode(sb, vino); +} + struct inode *ceph_get_inode(struct super_block *sb, struct ceph_vino vino) { struct inode *inode; @@ -1271,6 +1317,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) struct inode *in = NULL; struct ceph_vino tvino, dvino; struct ceph_fs_client *fsc = ceph_sb_to_client(sb); + struct inode *parent_dir = NULL; int err = 0; dout("fill_trace %p is_dentry %d is_target %d\n", req, @@ -1284,10 +1331,17 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) } if (rinfo->head->is_dentry) { - struct inode *dir = req->r_parent; - - if (dir) { - err = ceph_fill_inode(dir, NULL, &rinfo->diri, + /* + * r_parent may be stale, in cases when R_PARENT_LOCKED is not set, + * so we need to get the correct inode + */ + parent_dir = ceph_get_reply_dir(sb, req->r_parent, rinfo); + if (unlikely(IS_ERR(parent_dir))) { + err = PTR_ERR(parent_dir); + goto done; + } + if (parent_dir) { + err = ceph_fill_inode(parent_dir, NULL, &rinfo->diri, rinfo->dirfrag, session, -1, &req->r_caps_reservation); if (err < 0) @@ -1296,7 +1350,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) WARN_ON_ONCE(1); } - if (dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME && + if (parent_dir && req->r_op == CEPH_MDS_OP_LOOKUPNAME && test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags) && !test_bit(CEPH_MDS_R_ABORTED, &req->r_req_flags)) { struct qstr dname; @@ -1305,7 +1359,7 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req) BUG_ON(!rinfo->head->is_target); BUG_ON(req->r_dentry); - parent = d_find_any_alias(dir); + parent = d_find_any_alias(parent_dir); BUG_ON(!parent); dname.name = rinfo->dname; @@ -1333,7 +1387,7 @@ retry_lookup: ceph_snap(d_inode(dn)) != tvino.snap)) { dout(" dn %p points to wrong inode %p\n", dn, d_inode(dn)); - ceph_dir_clear_ordered(dir); + ceph_dir_clear_ordered(parent_dir); d_delete(dn); dput(dn); goto retry_lookup; @@ -1517,6 +1571,9 @@ retry_lookup: &dvino, ptvino); } done: + /* Drop extra ref from ceph_get_reply_dir() if it returned a new inode */ + if (unlikely(!IS_ERR_OR_NULL(parent_dir) && parent_dir != req->r_parent)) + iput(parent_dir); dout("fill_trace done err=%d\n", err); return err; } diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index d1ac6368a4..368b391efe 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -2390,16 +2390,18 @@ static inline u64 __get_oldest_tid(struct ceph_mds_client *mdsc) * Build a dentry's path. Allocate on heap; caller must kfree. Based * on build_path_from_dentry in fs/cifs/dir.c. * - * If @stop_on_nosnap, generate path relative to the first non-snapped + * If @for_wire, generate path relative to the first non-snapped * inode. * * Encode hidden .snap dirs as a double /, i.e. * foo/.snap/bar -> foo//bar */ -char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *pbase, - int stop_on_nosnap) +char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, + struct dentry *dentry, + struct ceph_path_info *path_info, int for_wire) { - struct dentry *temp; + struct dentry *cur; + struct inode *inode; char *path; int pos; unsigned seq; @@ -2417,33 +2419,34 @@ retry: seq = read_seqbegin(&rename_lock); rcu_read_lock(); - temp = dentry; + cur = dentry; for (;;) { - struct inode *inode; + struct dentry *parent; - spin_lock(&temp->d_lock); - inode = d_inode(temp); + spin_lock(&cur->d_lock); + inode = d_inode(cur); if (inode && ceph_snap(inode) == CEPH_SNAPDIR) { dout("build_path path+%d: %p SNAPDIR\n", - pos, temp); - } else if (stop_on_nosnap && inode && dentry != temp && + pos, cur); + } else if (for_wire && inode && dentry != cur && ceph_snap(inode) == CEPH_NOSNAP) { - spin_unlock(&temp->d_lock); + spin_unlock(&cur->d_lock); pos++; /* get rid of any prepended '/' */ break; } else { - pos -= temp->d_name.len; + pos -= cur->d_name.len; if (pos < 0) { - spin_unlock(&temp->d_lock); + spin_unlock(&cur->d_lock); break; } - memcpy(path + pos, temp->d_name.name, temp->d_name.len); + memcpy(path + pos, cur->d_name.name, cur->d_name.len); } - spin_unlock(&temp->d_lock); - temp = READ_ONCE(temp->d_parent); + parent = cur->d_parent; + spin_unlock(&cur->d_lock); + cur = parent; /* Are we at the root? */ - if (IS_ROOT(temp)) + if (IS_ROOT(cur)) break; /* Are we out of buffer? */ @@ -2452,32 +2455,47 @@ retry: path[pos] = '/'; } - base = ceph_ino(d_inode(temp)); + inode = d_inode(cur); + base = inode ? ceph_ino(inode) : 0; rcu_read_unlock(); if (read_seqretry(&rename_lock, seq)) goto retry; - if (pos < 0) { + if (pos < 0 || !inode) { /* * A rename didn't occur, but somehow we didn't end up where * we thought we would. Throw a warning and try again. */ - pr_warn("build_path did not end path lookup where " - "expected, pos is %d\n", pos); - goto retry; + pr_warn("ceph: build_path did not end path lookup where expected (pos = %d, inode = %p)\n", + pos, inode); + __putname(path); + return ERR_PTR(-ENAMETOOLONG); } - *pbase = base; - *plen = PATH_MAX - 1 - pos; + /* Initialize the output structure */ + memset(path_info, 0, sizeof(*path_info)); + + path_info->vino.ino = base; + path_info->pathlen = PATH_MAX - 1 - pos; + path_info->path = path + pos; + path_info->freepath = true; + + /* Set snap from dentry if available */ + if (d_inode(dentry)) + path_info->vino.snap = ceph_snap(d_inode(dentry)); + else + path_info->vino.snap = CEPH_NOSNAP; + dout("build_path on %p %d built %llx '%.*s'\n", - dentry, d_count(dentry), base, *plen, path + pos); + dentry, d_count(dentry), base, path_info->pathlen, path + pos); return path + pos; } -static int build_dentry_path(struct dentry *dentry, struct inode *dir, - const char **ppath, int *ppathlen, u64 *pino, - bool *pfreepath, bool parent_locked) +static int build_dentry_path(struct ceph_mds_client *mdsc, + struct dentry *dentry, struct inode *dir, + struct ceph_path_info *path_info, + bool parent_locked) { char *path; @@ -2485,40 +2503,48 @@ static int build_dentry_path(struct dentry *dentry, struct inode *dir, if (!dir) dir = d_inode_rcu(dentry->d_parent); if (dir && parent_locked && ceph_snap(dir) == CEPH_NOSNAP) { - *pino = ceph_ino(dir); + path_info->vino.ino = ceph_ino(dir); + path_info->vino.snap = ceph_snap(dir); rcu_read_unlock(); - *ppath = dentry->d_name.name; - *ppathlen = dentry->d_name.len; + path_info->path = dentry->d_name.name; + path_info->pathlen = dentry->d_name.len; + path_info->freepath = false; return 0; } rcu_read_unlock(); - path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); + path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1); if (IS_ERR(path)) return PTR_ERR(path); - *ppath = path; - *pfreepath = true; + /* + * ceph_mdsc_build_path already fills path_info, including snap handling. + */ return 0; } static int build_inode_path(struct inode *inode, - const char **ppath, int *ppathlen, u64 *pino, - bool *pfreepath) + struct ceph_path_info *path_info) { + struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb); struct dentry *dentry; char *path; if (ceph_snap(inode) == CEPH_NOSNAP) { - *pino = ceph_ino(inode); - *ppathlen = 0; + path_info->vino.ino = ceph_ino(inode); + path_info->vino.snap = ceph_snap(inode); + path_info->pathlen = 0; + path_info->freepath = false; return 0; } dentry = d_find_alias(inode); - path = ceph_mdsc_build_path(dentry, ppathlen, pino, 1); + path = ceph_mdsc_build_path(mdsc, dentry, path_info, 1); dput(dentry); if (IS_ERR(path)) return PTR_ERR(path); - *ppath = path; - *pfreepath = true; + /* + * ceph_mdsc_build_path already fills path_info, including snap from dentry. + * Override with inode's snap since that's what this function is for. + */ + path_info->vino.snap = ceph_snap(inode); return 0; } @@ -2526,27 +2552,33 @@ static int build_inode_path(struct inode *inode, * request arguments may be specified via an inode *, a dentry *, or * an explicit ino+path. */ -static int set_request_path_attr(struct inode *rinode, struct dentry *rdentry, - struct inode *rdiri, const char *rpath, - u64 rino, const char **ppath, int *pathlen, - u64 *ino, bool *freepath, bool parent_locked) +static int set_request_path_attr(struct ceph_mds_client *mdsc, + struct inode *rinode, struct dentry *rdentry, + struct inode *rdiri, const char *rpath, + u64 rino, struct ceph_path_info *path_info, + bool parent_locked) { int r = 0; + /* Initialize the output structure */ + memset(path_info, 0, sizeof(*path_info)); + if (rinode) { - r = build_inode_path(rinode, ppath, pathlen, ino, freepath); + r = build_inode_path(rinode, path_info); dout(" inode %p %llx.%llx\n", rinode, ceph_ino(rinode), ceph_snap(rinode)); } else if (rdentry) { - r = build_dentry_path(rdentry, rdiri, ppath, pathlen, ino, - freepath, parent_locked); - dout(" dentry %p %llx/%.*s\n", rdentry, *ino, *pathlen, - *ppath); + r = build_dentry_path(mdsc, rdentry, rdiri, path_info, + parent_locked); + dout(" dentry %p %llx/%.*s\n", rdentry, path_info->vino.ino, + path_info->pathlen, path_info->path); } else if (rpath || rino) { - *ino = rino; - *ppath = rpath; - *pathlen = rpath ? strlen(rpath) : 0; - dout(" path %.*s\n", *pathlen, rpath); + path_info->vino.ino = rino; + path_info->vino.snap = CEPH_NOSNAP; + path_info->path = rpath; + path_info->pathlen = rpath ? strlen(rpath) : 0; + path_info->freepath = false; + dout(" path %.*s\n", path_info->pathlen, rpath); } return r; @@ -2579,43 +2611,65 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, struct ceph_mds_client *mdsc = session->s_mdsc; struct ceph_msg *msg; struct ceph_mds_request_head_old *head; - const char *path1 = NULL; - const char *path2 = NULL; - u64 ino1 = 0, ino2 = 0; - int pathlen1 = 0, pathlen2 = 0; - bool freepath1 = false, freepath2 = false; + struct ceph_path_info path_info1 = {0}; + struct ceph_path_info path_info2 = {0}; struct dentry *old_dentry = NULL; int len; u16 releases; void *p, *end; int ret; bool legacy = !(session->s_con.peer_features & CEPH_FEATURE_FS_BTIME); + bool parent_locked = test_bit(CEPH_MDS_R_PARENT_LOCKED, &req->r_req_flags); - ret = set_request_path_attr(req->r_inode, req->r_dentry, - req->r_parent, req->r_path1, req->r_ino1.ino, - &path1, &pathlen1, &ino1, &freepath1, - test_bit(CEPH_MDS_R_PARENT_LOCKED, - &req->r_req_flags)); + ret = set_request_path_attr(mdsc, req->r_inode, req->r_dentry, + req->r_parent, req->r_path1, req->r_ino1.ino, + &path_info1, parent_locked); if (ret < 0) { msg = ERR_PTR(ret); goto out; } + /* + * When the parent directory's i_rwsem is *not* locked, req->r_parent may + * have become stale (e.g. after a concurrent rename) between the time the + * dentry was looked up and now. If we detect that the stored r_parent + * does not match the inode number we just encoded for the request, switch + * to the correct inode so that the MDS receives a valid parent reference. + */ + if (!parent_locked && req->r_parent && path_info1.vino.ino && + ceph_ino(req->r_parent) != path_info1.vino.ino) { + struct inode *old_parent = req->r_parent; + struct inode *correct_dir = ceph_get_inode(mdsc->fsc->sb, + path_info1.vino); + if (!IS_ERR(correct_dir)) { + WARN_ONCE(1, "ceph: r_parent mismatch (had %llx wanted %llx) - updating\n", + ceph_ino(old_parent), path_info1.vino.ino); + /* + * Transfer CEPH_CAP_PIN from the old parent to the new one. + * The pin was taken earlier in ceph_mdsc_submit_request(). + */ + ceph_put_cap_refs(ceph_inode(old_parent), CEPH_CAP_PIN); + iput(old_parent); + req->r_parent = correct_dir; + ceph_get_cap_refs(ceph_inode(req->r_parent), CEPH_CAP_PIN); + } + } + /* If r_old_dentry is set, then assume that its parent is locked */ if (req->r_old_dentry && !(req->r_old_dentry->d_flags & DCACHE_DISCONNECTED)) old_dentry = req->r_old_dentry; - ret = set_request_path_attr(NULL, old_dentry, - req->r_old_dentry_dir, - req->r_path2, req->r_ino2.ino, - &path2, &pathlen2, &ino2, &freepath2, true); + ret = set_request_path_attr(mdsc, NULL, old_dentry, + req->r_old_dentry_dir, + req->r_path2, req->r_ino2.ino, + &path_info2, true); if (ret < 0) { msg = ERR_PTR(ret); goto out_free1; } len = legacy ? sizeof(*head) : sizeof(struct ceph_mds_request_head); - len += pathlen1 + pathlen2 + 2*(1 + sizeof(u32) + sizeof(u64)) + + len += path_info1.pathlen + path_info2.pathlen + 2*(1 + sizeof(u32) + sizeof(u64)) + sizeof(struct ceph_timespec); len += sizeof(u32) + (sizeof(u64) * req->r_cred->group_info->ngroups); @@ -2625,9 +2679,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, !!req->r_old_inode_drop + !!req->r_old_dentry_drop); if (req->r_dentry_drop) - len += pathlen1; + len += path_info1.pathlen; if (req->r_old_dentry_drop) - len += pathlen2; + len += path_info2.pathlen; msg = ceph_msg_new2(CEPH_MSG_CLIENT_REQUEST, len, 1, GFP_NOFS, false); if (!msg) { @@ -2665,8 +2719,8 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, head->ino = cpu_to_le64(req->r_deleg_ino); head->args = req->r_args; - ceph_encode_filepath(&p, end, ino1, path1); - ceph_encode_filepath(&p, end, ino2, path2); + ceph_encode_filepath(&p, end, path_info1.vino.ino, path_info1.path); + ceph_encode_filepath(&p, end, path_info2.vino.ino, path_info2.path); /* make note of release offset, in case we need to replay */ req->r_request_release_offset = p - msg->front.iov_base; @@ -2721,11 +2775,9 @@ static struct ceph_msg *create_request_message(struct ceph_mds_session *session, msg->hdr.data_off = cpu_to_le16(0); out_free2: - if (freepath2) - ceph_mdsc_free_path((char *)path2, pathlen2); + ceph_mdsc_free_path_info(&path_info2); out_free1: - if (freepath1) - ceph_mdsc_free_path((char *)path1, pathlen1); + ceph_mdsc_free_path_info(&path_info1); out: return msg; } @@ -3922,24 +3974,21 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) struct ceph_pagelist *pagelist = recon_state->pagelist; struct dentry *dentry; struct ceph_cap *cap; - char *path; - int pathlen = 0, err; - u64 pathbase; + struct ceph_path_info path_info = {0}; + int err; u64 snap_follows; dentry = d_find_primary(inode); if (dentry) { + struct ceph_mds_client *mdsc = ceph_inode_to_client(inode)->mdsc; /* set pathbase to parent dir when msg_version >= 2 */ - path = ceph_mdsc_build_path(dentry, &pathlen, &pathbase, - recon_state->msg_version >= 2); + ceph_mdsc_build_path(mdsc, dentry, &path_info, + recon_state->msg_version >= 2); dput(dentry); - if (IS_ERR(path)) { - err = PTR_ERR(path); + if (IS_ERR(path_info.path)) { + err = PTR_ERR(path_info.path); goto out_err; } - } else { - path = NULL; - pathbase = 0; } spin_lock(&ci->i_ceph_lock); @@ -3972,7 +4021,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) rec.v2.wanted = cpu_to_le32(__ceph_caps_wanted(ci)); rec.v2.issued = cpu_to_le32(cap->issued); rec.v2.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); - rec.v2.pathbase = cpu_to_le64(pathbase); + rec.v2.pathbase = cpu_to_le64(path_info.vino.ino); rec.v2.flock_len = (__force __le32) ((ci->i_ceph_flags & CEPH_I_ERROR_FILELOCK) ? 0 : 1); } else { @@ -3983,7 +4032,7 @@ static int reconnect_caps_cb(struct inode *inode, int mds, void *arg) ceph_encode_timespec64(&rec.v1.mtime, &inode->i_mtime); ceph_encode_timespec64(&rec.v1.atime, &inode->i_atime); rec.v1.snaprealm = cpu_to_le64(ci->i_snap_realm->ino); - rec.v1.pathbase = cpu_to_le64(pathbase); + rec.v1.pathbase = cpu_to_le64(path_info.vino.ino); } if (list_empty(&ci->i_cap_snaps)) { @@ -4045,7 +4094,7 @@ encode_again: sizeof(struct ceph_filelock); rec.v2.flock_len = cpu_to_le32(struct_len); - struct_len += sizeof(u32) + pathlen + sizeof(rec.v2); + struct_len += sizeof(u32) + path_info.pathlen + sizeof(rec.v2); if (struct_v >= 2) struct_len += sizeof(u64); /* snap_follows */ @@ -4069,7 +4118,7 @@ encode_again: ceph_pagelist_encode_8(pagelist, 1); ceph_pagelist_encode_32(pagelist, struct_len); } - ceph_pagelist_encode_string(pagelist, path, pathlen); + ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen); ceph_pagelist_append(pagelist, &rec, sizeof(rec.v2)); ceph_locks_to_pagelist(flocks, pagelist, num_fcntl_locks, num_flock_locks); @@ -4080,17 +4129,17 @@ out_freeflocks: } else { err = ceph_pagelist_reserve(pagelist, sizeof(u64) + sizeof(u32) + - pathlen + sizeof(rec.v1)); + path_info.pathlen + sizeof(rec.v1)); if (err) goto out_err; ceph_pagelist_encode_64(pagelist, ceph_ino(inode)); - ceph_pagelist_encode_string(pagelist, path, pathlen); + ceph_pagelist_encode_string(pagelist, (char *)path_info.path, path_info.pathlen); ceph_pagelist_append(pagelist, &rec, sizeof(rec.v1)); } out_err: - ceph_mdsc_free_path(path, pathlen); + ceph_mdsc_free_path_info(&path_info); if (!err) recon_state->nr_caps++; return err; diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index b79bc8576f..184a3462ef 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -539,14 +539,26 @@ extern int ceph_iterate_session_caps(struct ceph_mds_session *session, void *arg); extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); -static inline void ceph_mdsc_free_path(char *path, int len) +/* + * Structure to group path-related output parameters for build_*_path functions + */ +struct ceph_path_info { + const char *path; + int pathlen; + struct ceph_vino vino; + bool freepath; +}; + +static inline void ceph_mdsc_free_path_info(const struct ceph_path_info *path_info) { - if (!IS_ERR_OR_NULL(path)) - __putname(path - (PATH_MAX - 1 - len)); + if (path_info && path_info->freepath && !IS_ERR_OR_NULL(path_info->path)) + __putname((char *)path_info->path - (PATH_MAX - 1 - path_info->pathlen)); } -extern char *ceph_mdsc_build_path(struct dentry *dentry, int *plen, u64 *base, - int stop_on_nosnap); +extern char *ceph_mdsc_build_path(struct ceph_mds_client *mdsc, + struct dentry *dentry, + struct ceph_path_info *path_info, + int for_wire); extern void __ceph_mdsc_drop_dentry_lease(struct dentry *dentry); extern void ceph_mdsc_lease_send_msg(struct ceph_mds_session *session, diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c index c09778de99..ed89117376 100644 --- a/fs/cifs/cifs_dfs_ref.c +++ b/fs/cifs/cifs_dfs_ref.c @@ -145,10 +145,9 @@ char *cifs_compose_mount_options(const char *sb_mountdata, char *mountdata = NULL; const char *prepath = NULL; int md_len; - char *tkn_e; + const char *start, *end; char *srvIP = NULL; char sep = ','; - int off, noff; if (sb_mountdata == NULL) return ERR_PTR(-EINVAL); @@ -199,45 +198,31 @@ char *cifs_compose_mount_options(const char *sb_mountdata, goto compose_mount_options_err; } - /* copy all options except of unc,ip,prefixpath */ - off = 0; - if (strncmp(sb_mountdata, "sep=", 4) == 0) { - sep = sb_mountdata[4]; - strncpy(mountdata, sb_mountdata, 5); - off += 5; + /* copy all options except of unc,ip,prefixpath,cruid */ + start = end = sb_mountdata; + for (;;) { + end = strchrnul(end, sep); + while (*end && end[0] == sep && end[1] == sep) + end += 2; + + if (strncasecmp(start, "prefixpath=", 11) == 0 || + strncasecmp(start, "cruid=", 6) == 0 || + strncasecmp(start, "unc=", 4) == 0 || + strncasecmp(start, "ip=", 3) == 0) + goto next_opt; + + if (*mountdata) + strncat(mountdata, &sep, 1); + strncat(mountdata, start, end - start); +next_opt: + if (!*end) + break; + start = ++end; } - - do { - tkn_e = strchr(sb_mountdata + off, sep); - if (tkn_e == NULL) - noff = strlen(sb_mountdata + off); - else - noff = tkn_e - (sb_mountdata + off) + 1; - - if (strncasecmp(sb_mountdata + off, "cruid=", 6) == 0) { - off += noff; - continue; - } - if (strncasecmp(sb_mountdata + off, "unc=", 4) == 0) { - off += noff; - continue; - } - if (strncasecmp(sb_mountdata + off, "ip=", 3) == 0) { - off += noff; - continue; - } - if (strncasecmp(sb_mountdata + off, "prefixpath=", 11) == 0) { - off += noff; - continue; - } - strncat(mountdata, sb_mountdata + off, noff); - off += noff; - } while (tkn_e); - strcat(mountdata, sb_mountdata + off); mountdata[md_len] = '\0'; /* copy new IP and ref share name */ - if (mountdata[strlen(mountdata) - 1] != sep) + if (*mountdata) strncat(mountdata, &sep, 1); strcat(mountdata, "ip="); strcat(mountdata, srvIP); @@ -247,8 +232,8 @@ char *cifs_compose_mount_options(const char *sb_mountdata, else kfree(name); - /*cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata);*/ - /*cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata );*/ + cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata); + cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata); compose_mount_options_out: kfree(srvIP); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 8581006a10..8cc089e815 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -61,7 +61,8 @@ struct gfs2_glock_iter { typedef void (*glock_examiner) (struct gfs2_glock * gl); -static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target); +static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, + unsigned int target, bool may_cancel); static void request_demote(struct gfs2_glock *gl, unsigned int state, unsigned long delay, bool remote); @@ -616,14 +617,6 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) list_del_init(&gh->gh_list); trace_gfs2_glock_queue(gh, 0); gl->gl_target = gl->gl_state; - gh = find_first_waiter(gl); - if (gh) { - gl->gl_target = gh->gh_state; - if (do_promote(gl)) - goto out; - do_xmote(gl, gh, gl->gl_target); - return; - } goto out; } /* Some error or failed "try lock" - report it */ @@ -637,12 +630,14 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) switch(state) { /* Unlocked due to conversion deadlock, try again */ case LM_ST_UNLOCKED: - do_xmote(gl, gh, gl->gl_target); + do_xmote(gl, gh, gl->gl_target, + !test_bit(GLF_DEMOTE_IN_PROGRESS, + &gl->gl_flags)); break; /* Conversion fails, unlock and try again */ case LM_ST_SHARED: case LM_ST_DEFERRED: - do_xmote(gl, gh, LM_ST_UNLOCKED); + do_xmote(gl, gh, LM_ST_UNLOCKED, false); break; default: /* Everything else */ fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n", @@ -671,7 +666,7 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) } out: if (!test_bit(GLF_CANCELING, &gl->gl_flags)) - clear_bit(GLF_LOCK, &gl->gl_flags); + clear_and_wake_up_bit(GLF_LOCK, &gl->gl_flags); } static bool is_system_glock(struct gfs2_glock *gl) @@ -689,11 +684,12 @@ static bool is_system_glock(struct gfs2_glock *gl) * @gl: The lock state * @gh: The holder (only for promotes) * @target: The target lock state + * @may_cancel: Operation may be canceled * */ static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, - unsigned int target) + unsigned int target, bool may_cancel) __releases(&gl->gl_lockref.lock) __acquires(&gl->gl_lockref.lock) { @@ -806,7 +802,8 @@ skip_inval: * We skip telling dlm to do the locking, so we won't get a * reply that would otherwise clear GLF_LOCK. So we clear it here. */ - clear_bit(GLF_LOCK, &gl->gl_flags); + if (!test_bit(GLF_CANCELING, &gl->gl_flags)) + clear_and_wake_up_bit(GLF_LOCK, &gl->gl_flags); clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); return; @@ -816,16 +813,19 @@ skip_inval: } if (ls->ls_ops->lm_lock) { - set_bit(GLF_PENDING_REPLY, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); ret = ls->ls_ops->lm_lock(gl, target, lck_flags); spin_lock(&gl->gl_lockref.lock); if (!ret) { + if (may_cancel) { + set_bit(GLF_MAY_CANCEL, &gl->gl_flags); + smp_mb__after_atomic(); + wake_up_bit(&gl->gl_flags, GLF_LOCK); + } /* The operation will be completed asynchronously. */ return; } - clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); if (ret == -ENODEV && gl->gl_target == LM_ST_UNLOCKED && target == LM_ST_UNLOCKED) { @@ -857,7 +857,7 @@ __acquires(&gl->gl_lockref.lock) { struct gfs2_holder *gh; - if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) + if (test_bit(GLF_LOCK, &gl->gl_flags)) return; /* While a demote is in progress, the GLF_LOCK flag must be set. */ @@ -866,39 +866,34 @@ __acquires(&gl->gl_lockref.lock) if (test_bit(GLF_DEMOTE, &gl->gl_flags) && gl->gl_demote_state != gl->gl_state) { if (find_first_holder(gl)) - goto out_unlock; + return; if (nonblock) goto out_sched; set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE); gl->gl_target = gl->gl_demote_state; - do_xmote(gl, NULL, gl->gl_target); + set_bit(GLF_LOCK, &gl->gl_flags); + do_xmote(gl, NULL, gl->gl_target, false); return; } else { if (test_bit(GLF_DEMOTE, &gl->gl_flags)) gfs2_demote_wake(gl); if (do_promote(gl)) - goto out_unlock; + return; gh = find_first_waiter(gl); if (!gh) - goto out_unlock; + return; gl->gl_target = gh->gh_state; if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) do_error(gl, 0); /* Fail queued try locks */ - do_xmote(gl, gh, gl->gl_target); + set_bit(GLF_LOCK, &gl->gl_flags); + do_xmote(gl, gh, gl->gl_target, true); return; } out_sched: - clear_bit(GLF_LOCK, &gl->gl_flags); - smp_mb__after_atomic(); gl->gl_lockref.count++; gfs2_glock_queue_work(gl, 0); - return; - -out_unlock: - clear_bit(GLF_LOCK, &gl->gl_flags); - smp_mb__after_atomic(); } /** @@ -1372,31 +1367,45 @@ static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs) * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions * @num_gh: the number of holders in the array * @ghs: the glock holder array + * @retries: number of retries attempted so far * * Returns: 0 on success, meaning all glocks have been granted and are held. * -ESTALE if the request timed out, meaning all glocks were released, * and the caller should retry the operation. */ -int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs) +int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs, + unsigned int retries) { struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd; - int i, ret = 0, timeout = 0; unsigned long start_time = jiffies; + int i, ret = 0; + long timeout; might_sleep(); - /* - * Total up the (minimum hold time * 2) of all glocks and use that to - * determine the max amount of time we should wait. - */ - for (i = 0; i < num_gh; i++) - timeout += ghs[i].gh_gl->gl_hold_time << 1; - if (!wait_event_timeout(sdp->sd_async_glock_wait, + timeout = GL_GLOCK_MIN_HOLD; + if (retries) { + unsigned int max_shift; + long incr; + + /* Add a random delay and increase the timeout exponentially. */ + max_shift = BITS_PER_LONG - 2 - __fls(GL_GLOCK_HOLD_INCR); + incr = min(GL_GLOCK_HOLD_INCR << min(retries - 1, max_shift), + 10 * HZ - GL_GLOCK_MIN_HOLD); + schedule_timeout_interruptible(get_random_long() % (incr / 3)); + if (signal_pending(current)) + goto interrupted; + timeout += (incr / 3) + get_random_long() % (incr / 3); + } + + if (!wait_event_interruptible_timeout(sdp->sd_async_glock_wait, !glocks_pending(num_gh, ghs), timeout)) { ret = -ESTALE; /* request timed out. */ goto out; } + if (signal_pending(current)) + goto interrupted; for (i = 0; i < num_gh; i++) { struct gfs2_holder *gh = &ghs[i]; @@ -1420,6 +1429,10 @@ out: } } return ret; + +interrupted: + ret = -EINTR; + goto out; } /** @@ -1611,6 +1624,8 @@ static void __gfs2_glock_dq(struct gfs2_holder *gh) list_del_init(&gh->gh_list); clear_bit(HIF_HOLDER, &gh->gh_iflags); trace_gfs2_glock_queue(gh, 0); + if (test_bit(HIF_WAIT, &gh->gh_iflags)) + gfs2_holder_wake(gh); /* * If there hasn't been a demote request we are done. @@ -1641,6 +1656,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh) struct gfs2_glock *gl = gh->gh_gl; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; +again: spin_lock(&gl->gl_lockref.lock); if (!gfs2_holder_queued(gh)) { /* @@ -1655,13 +1671,25 @@ void gfs2_glock_dq(struct gfs2_holder *gh) test_bit(GLF_LOCK, &gl->gl_flags) && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) && !test_bit(GLF_CANCELING, &gl->gl_flags)) { + if (!test_bit(GLF_MAY_CANCEL, &gl->gl_flags)) { + struct wait_queue_head *wq; + DEFINE_WAIT(wait); + + wq = bit_waitqueue(&gl->gl_flags, GLF_LOCK); + prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE); + spin_unlock(&gl->gl_lockref.lock); + schedule(); + finish_wait(wq, &wait); + goto again; + } + set_bit(GLF_CANCELING, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); gl->gl_name.ln_sbd->sd_lockstruct.ls_ops->lm_cancel(gl); wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE); spin_lock(&gl->gl_lockref.lock); clear_bit(GLF_CANCELING, &gl->gl_flags); - clear_bit(GLF_LOCK, &gl->gl_flags); + clear_and_wake_up_bit(GLF_LOCK, &gl->gl_flags); if (!gfs2_holder_queued(gh)) goto out; } @@ -1910,7 +1938,7 @@ void gfs2_glock_complete(struct gfs2_glock *gl, int ret) struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct; spin_lock(&gl->gl_lockref.lock); - clear_bit(GLF_PENDING_REPLY, &gl->gl_flags); + clear_bit(GLF_MAY_CANCEL, &gl->gl_flags); gl->gl_reply = ret; if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) { @@ -2314,8 +2342,8 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'f'; if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags)) *p++ = 'i'; - if (test_bit(GLF_PENDING_REPLY, gflags)) - *p++ = 'R'; + if (test_bit(GLF_MAY_CANCEL, gflags)) + *p++ = 'c'; if (test_bit(GLF_HAVE_REPLY, gflags)) *p++ = 'r'; if (test_bit(GLF_INITIAL, gflags)) diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 508610ddde..7ef7c62dd7 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -200,7 +200,8 @@ int gfs2_glock_poll(struct gfs2_holder *gh); int gfs2_instantiate(struct gfs2_holder *gh); int gfs2_glock_holder_ready(struct gfs2_holder *gh); int gfs2_glock_wait(struct gfs2_holder *gh); -int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs); +int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs, + unsigned int retries); void gfs2_glock_dq(struct gfs2_holder *gh); void gfs2_glock_dq_wait(struct gfs2_holder *gh); void gfs2_glock_dq_uninit(struct gfs2_holder *gh); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 36018c48b8..ab08ee9f43 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -334,7 +334,7 @@ enum { GLF_UNLOCKED = 16, /* Wait for glock to be unlocked */ GLF_TRY_TO_EVICT = 17, /* iopen glocks only */ GLF_VERIFY_DELETE = 18, /* iopen glocks only */ - GLF_PENDING_REPLY = 19, + GLF_MAY_CANCEL = 19, GLF_DEFER_DELETE = 20, /* iopen glocks only */ GLF_CANCELING = 21, }; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index d885e51beb..c71c0e97c9 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1481,7 +1481,7 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, unsigned int num_gh; int dir_rename = 0; struct gfs2_diradd da = { .nr_blocks = 0, .save_loc = 0, }; - unsigned int x; + unsigned int retries = 0, x; int error; gfs2_holder_mark_uninitialized(&r_gh); @@ -1531,12 +1531,17 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, num_gh++; } +again: for (x = 0; x < num_gh; x++) { error = gfs2_glock_nq(ghs + x); if (error) goto out_gunlock; } - error = gfs2_glock_async_wait(num_gh, ghs); + error = gfs2_glock_async_wait(num_gh, ghs, retries); + if (error == -ESTALE) { + retries++; + goto again; + } if (error) goto out_gunlock; @@ -1723,7 +1728,7 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry, struct gfs2_sbd *sdp = GFS2_SB(odir); struct gfs2_holder ghs[4], r_gh; unsigned int num_gh; - unsigned int x; + unsigned int retries = 0, x; umode_t old_mode = oip->i_inode.i_mode; umode_t new_mode = nip->i_inode.i_mode; int error; @@ -1767,13 +1772,18 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry, gfs2_holder_init(nip->i_gl, LM_ST_EXCLUSIVE, GL_ASYNC, ghs + num_gh); num_gh++; +again: for (x = 0; x < num_gh; x++) { error = gfs2_glock_nq(ghs + x); if (error) goto out_gunlock; } - error = gfs2_glock_async_wait(num_gh, ghs); + error = gfs2_glock_async_wait(num_gh, ghs, retries); + if (error == -ESTALE) { + retries++; + goto again; + } if (error) goto out_gunlock; diff --git a/fs/gfs2/trace_gfs2.h b/fs/gfs2/trace_gfs2.h index 26036ffc3f..afa3b16396 100644 --- a/fs/gfs2/trace_gfs2.h +++ b/fs/gfs2/trace_gfs2.h @@ -53,7 +53,7 @@ {(1UL << GLF_DIRTY), "y" }, \ {(1UL << GLF_LFLUSH), "f" }, \ {(1UL << GLF_INVALIDATE_IN_PROGRESS), "i" }, \ - {(1UL << GLF_PENDING_REPLY), "R" }, \ + {(1UL << GLF_MAY_CANCEL), "c" }, \ {(1UL << GLF_HAVE_REPLY), "r" }, \ {(1UL << GLF_INITIAL), "a" }, \ {(1UL << GLF_HAVE_FROZEN_REPLY), "F" }, \ diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 0663a8ca3e..a74fadadbe 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5977,6 +5977,8 @@ static ssize_t nfs4_proc_get_acl(struct inode *inode, void *buf, size_t buflen) struct nfs_server *server = NFS_SERVER(inode); int ret; + if (unlikely(NFS_FH(inode)->size == 0)) + return -ENODATA; if (!nfs4_server_supports_acls(server)) return -EOPNOTSUPP; ret = nfs_revalidate_inode(inode, NFS_INO_INVALID_CHANGE); @@ -6048,6 +6050,9 @@ static int nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t buflen { struct nfs4_exception exception = { }; int err; + + if (unlikely(NFS_FH(inode)->size == 0)) + return -ENODATA; do { err = __nfs4_proc_set_acl(inode, buf, buflen); trace_nfs4_set_acl(inode, err); diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index cc9f393e2a..8f34042498 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -45,6 +45,10 @@ static inline bool housekeeping_enabled(enum hk_flags flags) static inline void housekeeping_affine(struct task_struct *t, enum hk_flags flags) { } +static inline bool housekeeping_test_cpu(int cpu, enum hk_flags flags) +{ + return true; +} static inline void housekeeping_init(void) { } #endif /* CONFIG_CPU_ISOLATION */ @@ -57,4 +61,10 @@ static inline bool housekeeping_cpu(int cpu, enum hk_flags flags) return true; } +static inline bool cpu_is_isolated(int cpu) +{ + return !housekeeping_test_cpu(cpu, HK_FLAG_DOMAIN) || + !housekeeping_test_cpu(cpu, HK_FLAG_TICK); +} + #endif /* _LINUX_SCHED_ISOLATION_H */ diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c index 49a94417b7..b8b620898a 100644 --- a/net/ceph/ceph_common.c +++ b/net/ceph/ceph_common.c @@ -821,42 +821,53 @@ void ceph_reset_client_addr(struct ceph_client *client) } EXPORT_SYMBOL(ceph_reset_client_addr); -/* - * true if we have the mon map (and have thus joined the cluster) - */ -static bool have_mon_and_osd_map(struct ceph_client *client) -{ - return client->monc.monmap && client->monc.monmap->epoch && - client->osdc.osdmap && client->osdc.osdmap->epoch; -} - /* * mount: join the ceph cluster, and open root directory. */ int __ceph_open_session(struct ceph_client *client, unsigned long started) { - unsigned long timeout = client->options->mount_timeout; - long err; + DEFINE_WAIT_FUNC(wait, woken_wake_function); + long timeout = ceph_timeout_jiffies(client->options->mount_timeout); + bool have_monmap, have_osdmap; + int err; /* open session, and wait for mon and osd maps */ err = ceph_monc_open_session(&client->monc); if (err < 0) return err; - while (!have_mon_and_osd_map(client)) { - if (timeout && time_after_eq(jiffies, started + timeout)) - return -ETIMEDOUT; + add_wait_queue(&client->auth_wq, &wait); + for (;;) { + mutex_lock(&client->monc.mutex); + err = client->auth_err; + have_monmap = client->monc.monmap && client->monc.monmap->epoch; + mutex_unlock(&client->monc.mutex); + + down_read(&client->osdc.lock); + have_osdmap = client->osdc.osdmap && client->osdc.osdmap->epoch; + up_read(&client->osdc.lock); + + if (err || (have_monmap && have_osdmap)) + break; + + if (signal_pending(current)) { + err = -ERESTARTSYS; + break; + } + + if (!timeout) { + err = -ETIMEDOUT; + break; + } /* wait */ dout("mount waiting for mon_map\n"); - err = wait_event_interruptible_timeout(client->auth_wq, - have_mon_and_osd_map(client) || (client->auth_err < 0), - ceph_timeout_jiffies(timeout)); - if (err < 0) - return err; - if (client->auth_err < 0) - return client->auth_err; + timeout = wait_woken(&wait, TASK_INTERRUPTIBLE, timeout); } + remove_wait_queue(&client->auth_wq, &wait); + + if (err) + return err; pr_info("client%llu fsid %pU\n", ceph_client_gid(client), &client->fsid); diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index 2110439f8a..83c270bce6 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -36,8 +36,9 @@ static int monmap_show(struct seq_file *s, void *p) int i; struct ceph_client *client = s->private; + mutex_lock(&client->monc.mutex); if (client->monc.monmap == NULL) - return 0; + goto out_unlock; seq_printf(s, "epoch %d\n", client->monc.monmap->epoch); for (i = 0; i < client->monc.monmap->num_mon; i++) { @@ -48,6 +49,9 @@ static int monmap_show(struct seq_file *s, void *p) ENTITY_NAME(inst->name), ceph_pr_addr(&inst->addr)); } + +out_unlock: + mutex_unlock(&client->monc.mutex); return 0; } @@ -56,13 +60,14 @@ static int osdmap_show(struct seq_file *s, void *p) int i; struct ceph_client *client = s->private; struct ceph_osd_client *osdc = &client->osdc; - struct ceph_osdmap *map = osdc->osdmap; + struct ceph_osdmap *map; struct rb_node *n; - if (map == NULL) - return 0; - down_read(&osdc->lock); + map = osdc->osdmap; + if (map == NULL) + goto out_unlock; + seq_printf(s, "epoch %u barrier %u flags 0x%x\n", map->epoch, osdc->epoch_barrier, map->flags); @@ -131,6 +136,7 @@ static int osdmap_show(struct seq_file *s, void *p) seq_printf(s, "]\n"); } +out_unlock: up_read(&osdc->lock); return 0; } diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index eb93ffe240..dffb0dbcb7 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -178,13 +178,14 @@ next_chunk: chunk->head_skb = chunk->skb; /* skbs with "cover letter" */ - if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len) + if (chunk->head_skb && chunk->skb->data_len == chunk->skb->len) { + if (WARN_ON(!skb_shinfo(chunk->skb)->frag_list)) { + __SCTP_INC_STATS(dev_net(chunk->skb->dev), + SCTP_MIB_IN_PKT_DISCARDS); + sctp_chunk_free(chunk); + goto next_chunk; + } chunk->skb = skb_shinfo(chunk->skb)->frag_list; - - if (WARN_ON(!chunk->skb)) { - __SCTP_INC_STATS(dev_net(chunk->skb->dev), SCTP_MIB_IN_PKT_DISCARDS); - sctp_chunk_free(chunk); - goto next_chunk; } } diff --git a/scripts/basic/fixdep b/scripts/basic/fixdep index dc25c3056f..f56f11be62 100755 Binary files a/scripts/basic/fixdep and b/scripts/basic/fixdep differ diff --git a/scripts/kconfig/conf b/scripts/kconfig/conf index f3b04c0014..ed045d399d 100755 Binary files a/scripts/kconfig/conf and b/scripts/kconfig/conf differ