diff --git a/Documentation/scsi/st.txt b/Documentation/scsi/st.txt index ec0acf6acc..0d21722cd6 100644 --- a/Documentation/scsi/st.txt +++ b/Documentation/scsi/st.txt @@ -147,6 +147,11 @@ enabled driver and mode options. The value in the file is a bit mask where the bit definitions are the same as those used with MTSETDRVBUFFER in setting the options. +Each directory contains the entry 'position_lost_in_reset'. If this value is +one, reading and writing to the device is blocked after device reset. Most +devices rewind the tape after reset and the writes/read don't access the +tape position the user expects. + A link named 'tape' is made from the SCSI device directory to the class directory corresponding to the mode 0 auto-rewind device (e.g., st0). diff --git a/Makefile.rhelver b/Makefile.rhelver index 4ad74d2e67..a867467338 100644 --- a/Makefile.rhelver +++ b/Makefile.rhelver @@ -12,7 +12,7 @@ RHEL_MINOR = 10 # # Use this spot to avoid future merge conflicts. # Do not trim this comment. -RHEL_RELEASE = 553.111.1 +RHEL_RELEASE = 553.115.1 # # ZSTREAM diff --git a/arch/s390/boot/head.S b/arch/s390/boot/head.S index c7cbf21dfc..1bd4f5436d 100644 --- a/arch/s390/boot/head.S +++ b/arch/s390/boot/head.S @@ -350,6 +350,7 @@ ENTRY(startup_kdump) .quad 0 # INITRD_SIZE .quad 0 # OLDMEM_BASE .quad 0 # OLDMEM_SIZE + .quad 0 # KERNEL_VERSION .quad COMMAND_LINE_SIZE .org COMMAND_LINE diff --git a/arch/s390/kernel/machine_kexec_file.c b/arch/s390/kernel/machine_kexec_file.c index 2fc92a2a9c..8c42ae07e0 100644 --- a/arch/s390/kernel/machine_kexec_file.c +++ b/arch/s390/kernel/machine_kexec_file.c @@ -254,8 +254,10 @@ void *kexec_file_add_components(struct kimage *image, if (image->kernel_buf_len < minsize + max_command_line_size) goto out; - if (image->cmdline_buf_len >= max_command_line_size) + if (image->cmdline_buf_len >= max_command_line_size) { + pr_err("Kernel command line exceeds supported limit of %lu", max_command_line_size); goto out; + } memcpy(data.parm->command_line, image->cmdline_buf, image->cmdline_buf_len); diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 3b87bca027..708de629e1 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -1107,3 +1107,27 @@ bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx, else return regs->sp <= ret->stack; } + +#ifdef CONFIG_IA32_EMULATION +unsigned long arch_uprobe_get_xol_area(void) +{ + struct thread_info *ti = current_thread_info(); + unsigned long vaddr; + + /* + * HACK: we are not in a syscall, but x86 get_unmapped_area() paths + * ignore TIF_ADDR32 and rely on in_32bit_syscall() to calculate + * vm_unmapped_area_info.high_limit. + * + * The #ifdef above doesn't cover the CONFIG_X86_X32_ABI=y case, + * but in this case in_32bit_syscall() -> in_x32_syscall() always + * (falsely) returns true because ->orig_ax == -1. + */ + if (test_thread_flag(TIF_ADDR32)) + ti->status |= TS_COMPAT; + vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); + ti->status &= ~TS_COMPAT; + + return vaddr; +} +#endif diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index def83a317a..644b33a8fd 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1565,10 +1565,16 @@ destroy_macvlan_port: /* the macvlan port may be freed by macvlan_uninit when fail to register. * so we destroy the macvlan port only when it's valid. */ - if (create && macvlan_port_get_rtnl(lowerdev)) { + if (macvlan_port_get_rtnl(lowerdev)) { macvlan_flush_sources(port, vlan); - macvlan_port_destroy(port->dev); + if (create) + macvlan_port_destroy(port->dev); } + /* @dev might have been made visible before an error was detected. + * Make sure to observe an RCU grace period before our caller + * (rtnl_newlink()) frees it. + */ + synchronize_net(); return err; } EXPORT_SYMBOL_GPL(macvlan_common_newlink); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 2da16b98dd..3f97c4a02c 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -541,6 +541,18 @@ int scsi_check_sense(struct scsi_cmnd *scmd) scsi_report_sense(sdev, &sshdr); + if (sshdr.sense_key == UNIT_ATTENTION) { + /* + * Increment the counters for Power on/Reset or New Media so + * that all ULDs interested in these can see that those have + * happened, even if someone else gets the sense data. + */ + if (sshdr.asc == 0x28) + atomic_inc(&sdev->ua_new_media_ctr); + else if (sshdr.asc == 0x29) + atomic_inc(&sdev->ua_por_ctr); + } + if (scsi_sense_is_deferred(&sshdr)) return NEEDS_RETRY; diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index ed43e7ee78..fd7ce9c041 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -161,9 +161,11 @@ static const char *st_formats[] = { static int debugging = DEBUG; +/* Setting these non-zero may risk recognizing resets */ #define MAX_RETRIES 0 #define MAX_WRITE_RETRIES 0 #define MAX_READY_RETRIES 0 + #define NO_TAPE NOT_READY #define ST_TIMEOUT (900 * HZ) @@ -362,10 +364,18 @@ static int st_chk_result(struct scsi_tape *STp, struct st_request * SRpnt) { int result = SRpnt->result; u8 scode; + unsigned int ctr; DEB(const char *stp;) char *name = tape_name(STp); struct st_cmdstatus *cmdstatp; + ctr = scsi_get_ua_por_ctr(STp->device); + if (ctr != STp->por_ctr) { + STp->por_ctr = ctr; + STp->pos_unknown = 1; /* ASC => power on / reset */ + st_printk(KERN_WARNING, STp, "Power on/reset recognized."); + } + if (!result) return 0; @@ -418,10 +428,11 @@ static int st_chk_result(struct scsi_tape *STp, struct st_request * SRpnt) if (cmdstatp->have_sense && cmdstatp->sense_hdr.asc == 0 && cmdstatp->sense_hdr.ascq == 0x17) STp->cleaning_req = 1; /* ASC and ASCQ => cleaning requested */ - if (cmdstatp->have_sense && scode == UNIT_ATTENTION && cmdstatp->sense_hdr.asc == 0x29) + if (cmdstatp->have_sense && scode == UNIT_ATTENTION && + cmdstatp->sense_hdr.asc == 0x29 && !STp->pos_unknown) { STp->pos_unknown = 1; /* ASC => power on / reset */ - - STp->pos_unknown |= STp->device->was_reset; + st_printk(KERN_WARNING, STp, "Power on/reset recognized."); + } if (cmdstatp->have_sense && scode == RECOVERED_ERROR @@ -836,6 +847,9 @@ static int flush_buffer(struct scsi_tape *STp, int seek_next) int backspace, result; struct st_partstat *STps; + if (STp->ready != ST_READY) + return 0; + /* * If there was a bus reset, block further access * to this device. @@ -843,8 +857,6 @@ static int flush_buffer(struct scsi_tape *STp, int seek_next) if (STp->pos_unknown) return (-EIO); - if (STp->ready != ST_READY) - return 0; STps = &(STp->ps[STp->partition]); if (STps->rw == ST_WRITING) /* Writing */ return st_flush_write_buffer(STp); @@ -953,7 +965,6 @@ static void reset_state(struct scsi_tape *STp) STp->partition = find_partition(STp); if (STp->partition < 0) STp->partition = 0; - STp->new_partition = STp->partition; } } @@ -970,6 +981,7 @@ static int test_ready(struct scsi_tape *STp, int do_wait) { int attentions, waits, max_wait, scode; int retval = CHKRES_READY, new_session = 0; + unsigned int ctr; unsigned char cmd[MAX_COMMAND_SIZE]; struct st_request *SRpnt = NULL; struct st_cmdstatus *cmdstatp = &STp->buffer->cmdstat; @@ -1026,6 +1038,13 @@ static int test_ready(struct scsi_tape *STp, int do_wait) } } + ctr = scsi_get_ua_new_media_ctr(STp->device); + if (ctr != STp->new_media_ctr) { + STp->new_media_ctr = ctr; + new_session = 1; + DEBC_printk(STp, "New tape session."); + } + retval = (STp->buffer)->syscall_result; if (!retval) retval = new_session ? CHKRES_NEW_SESSION : CHKRES_READY; @@ -2898,7 +2917,6 @@ static int st_int_ioctl(struct scsi_tape *STp, unsigned int cmd_in, unsigned lon timeout = STp->long_timeout * 8; DEBC_printk(STp, "Erasing tape.\n"); - fileno = blkno = at_sm = 0; break; case MTSETBLK: /* Set block length */ case MTSETDENSITY: /* Set tape density */ @@ -2931,14 +2949,17 @@ static int st_int_ioctl(struct scsi_tape *STp, unsigned int cmd_in, unsigned lon if (cmd_in == MTSETDENSITY) { (STp->buffer)->b_data[4] = arg; STp->density_changed = 1; /* At least we tried ;-) */ + STp->changed_density = arg; } else if (cmd_in == SET_DENS_AND_BLK) (STp->buffer)->b_data[4] = arg >> 24; else (STp->buffer)->b_data[4] = STp->density; if (cmd_in == MTSETBLK || cmd_in == SET_DENS_AND_BLK) { ltmp = arg & MT_ST_BLKSIZE_MASK; - if (cmd_in == MTSETBLK) + if (cmd_in == MTSETBLK) { STp->blksize_changed = 1; /* At least we tried ;-) */ + STp->changed_blksize = arg; + } } else ltmp = STp->block_size; (STp->buffer)->b_data[9] = (ltmp >> 16); @@ -3085,7 +3106,9 @@ static int st_int_ioctl(struct scsi_tape *STp, unsigned int cmd_in, unsigned lon cmd_in == MTSETDRVBUFFER || cmd_in == SET_DENS_AND_BLK) { if (cmdstatp->sense_hdr.sense_key == ILLEGAL_REQUEST && - !(STp->use_pf & PF_TESTED)) { + cmdstatp->sense_hdr.asc == 0x24 && + (STp->device)->scsi_level <= SCSI_2 && + !(STp->use_pf & PF_TESTED)) { /* Try the other possible state of Page Format if not already tried */ STp->use_pf = (STp->use_pf ^ USE_PF) | PF_TESTED; @@ -3505,8 +3528,69 @@ static int partition_tape(struct scsi_tape *STp, int size) out: return result; } - +/* + * Handles any extra state needed for ioctls which are not st-specific. + * Called with the scsi_tape lock held, released before return + */ +static long st_common_ioctl(struct scsi_tape *STp, struct st_modedef *STm, + struct file *file, unsigned int cmd_in, + unsigned long arg) +{ + int i, retval = 0; + void __user *p = (void __user *)arg; + + if (!STm->defined) { + retval = -ENXIO; + goto out; + } + + switch (cmd_in) { + case SCSI_IOCTL_GET_IDLUN: + case SCSI_IOCTL_GET_BUS_NUMBER: + case SCSI_IOCTL_GET_PCI: + break; + case SG_IO: + case SCSI_IOCTL_SEND_COMMAND: + case CDROM_SEND_PACKET: + if (!capable(CAP_SYS_RAWIO)) { + retval = -EPERM; + goto out; + } + retval = scsi_cmd_ioctl(STp->disk->queue, STp->disk, + file->f_mode, cmd_in, p); + + if (retval != -ENOTTY) + goto out; + + fallthrough; + default: + if ((i = flush_buffer(STp, 0)) < 0) { + retval = i; + goto out; + } else { /* flush_buffer succeeds */ + if (STp->can_partitions) { + i = switch_partition(STp); + if (i < 0) { + retval = i; + goto out; + } + } + } + } + mutex_unlock(&STp->lock); + + retval = scsi_ioctl(STp->device, cmd_in, p); + if (!retval && cmd_in == SCSI_IOCTL_STOP_UNIT) { + /* unload */ + STp->rew_at_close = 0; + STp->ready = ST_NO_TAPE; + } + return retval; +out: + mutex_unlock(&STp->lock); + return retval; +} /* The ioctl command */ static long st_ioctl(struct file *file, unsigned int cmd_in, unsigned long arg) @@ -3544,6 +3628,15 @@ static long st_ioctl(struct file *file, unsigned int cmd_in, unsigned long arg) if (retval) goto out; + switch (cmd_in) { + case MTIOCPOS: + case MTIOCGET: + case MTIOCTOP: + break; + default: + return st_common_ioctl(STp, STm, file, cmd_in, arg); + } + cmd_type = _IOC_TYPE(cmd_in); cmd_nr = _IOC_NR(cmd_in); @@ -3637,9 +3730,23 @@ static long st_ioctl(struct file *file, unsigned int cmd_in, unsigned long arg) retval = (-EIO); goto out; } - reset_state(STp); - /* remove this when the midlevel properly clears was_reset */ - STp->device->was_reset = 0; + reset_state(STp); /* Clears pos_unknown */ + + /* Fix the device settings after reset, ignore errors */ + if (mtc.mt_op == MTREW || mtc.mt_op == MTSEEK || + mtc.mt_op == MTEOM) { + if (STp->can_partitions) { + /* STp->new_partition contains the + * latest partition set + */ + STp->partition = 0; + switch_partition(STp); + } + if (STp->density_changed) + st_int_ioctl(STp, MTSETDENSITY, STp->changed_density); + if (STp->blksize_changed) + st_int_ioctl(STp, MTSETBLK, STp->changed_blksize); + } } if (mtc.mt_op != MTNOP && mtc.mt_op != MTSETBLK && @@ -3846,33 +3953,7 @@ static long st_ioctl(struct file *file, unsigned int cmd_in, unsigned long arg) i = copy_to_user(p, &mt_pos, sizeof(struct mtpos)); if (i) retval = (-EFAULT); - goto out; } - mutex_unlock(&STp->lock); - switch (cmd_in) { - case SCSI_IOCTL_GET_IDLUN: - case SCSI_IOCTL_GET_BUS_NUMBER: - break; - default: - if ((cmd_in == SG_IO || - cmd_in == SCSI_IOCTL_SEND_COMMAND || - cmd_in == CDROM_SEND_PACKET) && - !capable(CAP_SYS_RAWIO)) - i = -EPERM; - else - i = scsi_cmd_ioctl(STp->disk->queue, STp->disk, - file->f_mode, cmd_in, p); - if (i != -ENOTTY) - return i; - break; - } - retval = scsi_ioctl(STp->device, cmd_in, p); - if (!retval && cmd_in == SCSI_IOCTL_STOP_UNIT) { /* unload */ - STp->rew_at_close = 0; - STp->ready = ST_NO_TAPE; - } - return retval; - out: mutex_unlock(&STp->lock); return retval; @@ -4133,7 +4214,7 @@ static void validate_options(void) */ static int __init st_setup(char *str) { - int i, len, ints[5]; + int i, len, ints[ARRAY_SIZE(parms) + 1]; char *stp; stp = get_options(str, ARRAY_SIZE(ints), ints); @@ -4412,6 +4493,9 @@ static int st_probe(struct device *dev) goto out_idr_remove; } + tpnt->new_media_ctr = scsi_get_ua_new_media_ctr(SDp); + tpnt->por_ctr = scsi_get_ua_por_ctr(SDp); + dev_set_drvdata(dev, tpnt); @@ -4699,6 +4783,24 @@ options_show(struct device *dev, struct device_attribute *attr, char *buf) } static DEVICE_ATTR_RO(options); +/** + * position_lost_in_reset_show - Value 1 indicates that reads, writes, etc. + * are blocked because a device reset has occurred and no operation positioning + * the tape has been issued. + * @dev: struct device + * @attr: attribute structure + * @buf: buffer to return formatted data in + */ +static ssize_t position_lost_in_reset_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct st_modedef *STm = dev_get_drvdata(dev); + struct scsi_tape *STp = STm->tape; + + return sprintf(buf, "%d", STp->pos_unknown); +} +static DEVICE_ATTR_RO(position_lost_in_reset); + /* Support for tape stats */ /** @@ -4883,6 +4985,7 @@ static struct attribute *st_dev_attrs[] = { &dev_attr_default_density.attr, &dev_attr_default_compression.attr, &dev_attr_options.attr, + &dev_attr_position_lost_in_reset.attr, NULL, }; diff --git a/drivers/scsi/st.h b/drivers/scsi/st.h index 355e55f681..729131a9dc 100644 --- a/drivers/scsi/st.h +++ b/drivers/scsi/st.h @@ -168,6 +168,7 @@ struct scsi_tape { unsigned char compression_changed; unsigned char drv_buffer; unsigned char density; + unsigned char changed_density; unsigned char door_locked; unsigned char autorew_dev; /* auto-rewind device */ unsigned char rew_at_close; /* rewind necessary at close */ @@ -175,11 +176,16 @@ struct scsi_tape { unsigned char cleaning_req; /* cleaning requested? */ unsigned char first_tur; /* first TEST UNIT READY */ int block_size; + int changed_blksize; int min_block; int max_block; int recover_count; /* From tape opening */ int recover_reg; /* From last status call */ + /* The saved values of midlevel counters */ + unsigned int new_media_ctr; + unsigned int por_ctr; + #if DEBUG unsigned char write_pending; int nbr_finished; diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 3ee3b7de4d..c6257c8e90 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -11,13 +11,13 @@ cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \ readdir.o ioctl.o sess.o export.o smb1ops.o unc.o winucase.o \ smb2ops.o smb2maperror.o smb2transport.o \ smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o \ - dns_resolve.o + dns_resolve.o namespace.o cifs-$(CONFIG_CIFS_XATTR) += xattr.o cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o -cifs-$(CONFIG_CIFS_DFS_UPCALL) += cifs_dfs_ref.o dfs_cache.o +cifs-$(CONFIG_CIFS_DFS_UPCALL) += dfs_cache.o dfs.o cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c index 6aa15832a8..904b0a3574 100644 --- a/fs/cifs/cifs_debug.c +++ b/fs/cifs/cifs_debug.c @@ -134,6 +134,12 @@ static void cifs_debug_tcon(struct seq_file *m, struct cifs_tcon *tcon) if (tcon->need_reconnect) seq_puts(m, "\tDISCONNECTED "); + spin_lock(&tcon->tc_lock); + if (tcon->origin_fullpath) { + seq_printf(m, "\n\tDFS origin fullpath: %s", + tcon->origin_fullpath); + } + spin_unlock(&tcon->tc_lock); seq_putc(m, '\n'); } @@ -398,6 +404,10 @@ skip_rdma: seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d", atomic_read(&server->in_send), atomic_read(&server->num_waiters)); + if (server->leaf_fullpath) { + seq_printf(m, "\nDFS leaf full path: %s", + server->leaf_fullpath); + } seq_printf(m, "\n\n\tSessions: "); i = 0; @@ -443,6 +453,11 @@ skip_rdma: from_kuid(&init_user_ns, ses->linux_uid), from_kuid(&init_user_ns, ses->cred_uid)); + if (ses->dfs_root_ses) { + seq_printf(m, "\n\tDFS root session id: 0x%llx", + ses->dfs_root_ses->Suid); + } + spin_lock(&ses->chan_lock); if (CIFS_CHAN_NEEDS_RECONNECT(ses, 0)) seq_puts(m, "\tPrimary channel: DISCONNECTED "); diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c deleted file mode 100644 index ed89117376..0000000000 --- a/fs/cifs/cifs_dfs_ref.c +++ /dev/null @@ -1,361 +0,0 @@ -/* - * Contains the CIFS DFS referral mounting routines used for handling - * traversal via DFS junction point - * - * Copyright (c) 2007 Igor Mammedov - * Copyright (C) International Business Machines Corp., 2008 - * Author(s): Igor Mammedov (niallain@gmail.com) - * Steve French (sfrench@us.ibm.com) - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include -#include -#include -#include -#include -#include -#include -#include "cifsglob.h" -#include "cifsproto.h" -#include "cifsfs.h" -#include "dns_resolve.h" -#include "cifs_debug.h" -#include "cifs_unicode.h" -#include "dfs_cache.h" -#include "fs_context.h" - -static LIST_HEAD(cifs_dfs_automount_list); - -static void cifs_dfs_expire_automounts(struct work_struct *work); -static DECLARE_DELAYED_WORK(cifs_dfs_automount_task, - cifs_dfs_expire_automounts); -static int cifs_dfs_mountpoint_expiry_timeout = 500 * HZ; - -static void cifs_dfs_expire_automounts(struct work_struct *work) -{ - struct list_head *list = &cifs_dfs_automount_list; - - mark_mounts_for_expiry(list); - if (!list_empty(list)) - schedule_delayed_work(&cifs_dfs_automount_task, - cifs_dfs_mountpoint_expiry_timeout); -} - -void cifs_dfs_release_automount_timer(void) -{ - BUG_ON(!list_empty(&cifs_dfs_automount_list)); - cancel_delayed_work_sync(&cifs_dfs_automount_task); -} - -/** - * cifs_build_devname - build a devicename from a UNC and optional prepath - * @nodename: pointer to UNC string - * @prepath: pointer to prefixpath (or NULL if there isn't one) - * - * Build a new cifs devicename after chasing a DFS referral. Allocate a buffer - * big enough to hold the final thing. Copy the UNC from the nodename, and - * concatenate the prepath onto the end of it if there is one. - * - * Returns pointer to the built string, or a ERR_PTR. Caller is responsible - * for freeing the returned string. - */ -static char * -cifs_build_devname(char *nodename, const char *prepath) -{ - size_t pplen; - size_t unclen; - char *dev; - char *pos; - - /* skip over any preceding delimiters */ - nodename += strspn(nodename, "\\"); - if (!*nodename) - return ERR_PTR(-EINVAL); - - /* get length of UNC and set pos to last char */ - unclen = strlen(nodename); - pos = nodename + unclen - 1; - - /* trim off any trailing delimiters */ - while (*pos == '\\') { - --pos; - --unclen; - } - - /* allocate a buffer: - * +2 for preceding "//" - * +1 for delimiter between UNC and prepath - * +1 for trailing NULL - */ - pplen = prepath ? strlen(prepath) : 0; - dev = kmalloc(2 + unclen + 1 + pplen + 1, GFP_KERNEL); - if (!dev) - return ERR_PTR(-ENOMEM); - - pos = dev; - /* add the initial "//" */ - *pos = '/'; - ++pos; - *pos = '/'; - ++pos; - - /* copy in the UNC portion from referral */ - memcpy(pos, nodename, unclen); - pos += unclen; - - /* copy the prefixpath remainder (if there is one) */ - if (pplen) { - *pos = '/'; - ++pos; - memcpy(pos, prepath, pplen); - pos += pplen; - } - - /* NULL terminator */ - *pos = '\0'; - - convert_delimiter(dev, '/'); - return dev; -} - - -/** - * cifs_compose_mount_options - creates mount options for referral - * @sb_mountdata: parent/root DFS mount options (template) - * @fullpath: full path in UNC format - * @ref: optional server's referral - * @devname: return the built cifs device name if passed pointer not NULL - * creates mount options for submount based on template options sb_mountdata - * and replacing unc,ip,prefixpath options with ones we've got form ref_unc. - * - * Returns: pointer to new mount options or ERR_PTR. - * Caller is responsible for freeing returned value if it is not error. - */ -char *cifs_compose_mount_options(const char *sb_mountdata, - const char *fullpath, - const struct dfs_info3_param *ref, - char **devname) -{ - int rc; - char *name; - char *mountdata = NULL; - const char *prepath = NULL; - int md_len; - const char *start, *end; - char *srvIP = NULL; - char sep = ','; - - if (sb_mountdata == NULL) - return ERR_PTR(-EINVAL); - - if (ref) { - if (WARN_ON_ONCE(!ref->node_name || ref->path_consumed < 0)) - return ERR_PTR(-EINVAL); - - if (strlen(fullpath) - ref->path_consumed) { - prepath = fullpath + ref->path_consumed; - /* skip initial delimiter */ - if (*prepath == '/' || *prepath == '\\') - prepath++; - } - - name = cifs_build_devname(ref->node_name, prepath); - if (IS_ERR(name)) { - rc = PTR_ERR(name); - name = NULL; - goto compose_mount_options_err; - } - } else { - name = cifs_build_devname((char *)fullpath, NULL); - if (IS_ERR(name)) { - rc = PTR_ERR(name); - name = NULL; - goto compose_mount_options_err; - } - } - - rc = dns_resolve_server_name_to_ip(name, &srvIP, NULL); - if (rc < 0) { - cifs_dbg(FYI, "%s: Failed to resolve server part of %s to IP: %d\n", - __func__, name, rc); - goto compose_mount_options_err; - } - - /* - * In most cases, we'll be building a shorter string than the original, - * but we do have to assume that the address in the ip= option may be - * much longer than the original. Add the max length of an address - * string to the length of the original string to allow for worst case. - */ - md_len = strlen(sb_mountdata) + INET6_ADDRSTRLEN; - mountdata = kzalloc(md_len + sizeof("ip=") + 1, GFP_KERNEL); - if (mountdata == NULL) { - rc = -ENOMEM; - goto compose_mount_options_err; - } - - /* copy all options except of unc,ip,prefixpath,cruid */ - start = end = sb_mountdata; - for (;;) { - end = strchrnul(end, sep); - while (*end && end[0] == sep && end[1] == sep) - end += 2; - - if (strncasecmp(start, "prefixpath=", 11) == 0 || - strncasecmp(start, "cruid=", 6) == 0 || - strncasecmp(start, "unc=", 4) == 0 || - strncasecmp(start, "ip=", 3) == 0) - goto next_opt; - - if (*mountdata) - strncat(mountdata, &sep, 1); - strncat(mountdata, start, end - start); -next_opt: - if (!*end) - break; - start = ++end; - } - mountdata[md_len] = '\0'; - - /* copy new IP and ref share name */ - if (*mountdata) - strncat(mountdata, &sep, 1); - strcat(mountdata, "ip="); - strcat(mountdata, srvIP); - - if (devname) - *devname = name; - else - kfree(name); - - cifs_dbg(FYI, "%s: parent mountdata: %s\n", __func__, sb_mountdata); - cifs_dbg(FYI, "%s: submount mountdata: %s\n", __func__, mountdata); - -compose_mount_options_out: - kfree(srvIP); - return mountdata; - -compose_mount_options_err: - kfree(mountdata); - mountdata = ERR_PTR(rc); - kfree(name); - goto compose_mount_options_out; -} - -/** - * cifs_dfs_do_mount - mounts specified path using DFS full path - * - * Always pass down @fullpath to smb3_do_mount() so we can use the root server - * to perform failover in case we failed to connect to the first target in the - * referral. - * - * @cifs_sb: parent/root superblock - * @fullpath: full path in UNC format - */ -static struct vfsmount *cifs_dfs_do_mount(struct dentry *mntpt, - struct cifs_sb_info *cifs_sb, - const char *fullpath) -{ - struct vfsmount *mnt; - char *mountdata; - char *devname; - - devname = kstrdup(fullpath, GFP_KERNEL); - if (!devname) - return ERR_PTR(-ENOMEM); - - convert_delimiter(devname, '/'); - - /* TODO: change to call fs_context_for_mount(), fill in context directly, call fc_mount */ - - /* See afs_mntpt_do_automount in fs/afs/mntpt.c for an example */ - - /* strip first '\' from fullpath */ - mountdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, - fullpath + 1, NULL, NULL); - if (IS_ERR(mountdata)) { - kfree(devname); - return (struct vfsmount *)mountdata; - } - - mnt = vfs_submount(mntpt, &cifs_fs_type, devname, mountdata); - kfree(mountdata); - kfree(devname); - return mnt; -} - -/* - * Create a vfsmount that we can automount - */ -static struct vfsmount *cifs_dfs_do_automount(struct dentry *mntpt) -{ - struct cifs_sb_info *cifs_sb; - void *page; - char *full_path; - struct vfsmount *mnt; - - cifs_dbg(FYI, "in %s\n", __func__); - BUG_ON(IS_ROOT(mntpt)); - - /* - * The MSDFS spec states that paths in DFS referral requests and - * responses must be prefixed by a single '\' character instead of - * the double backslashes usually used in the UNC. This function - * gives us the latter, so we must adjust the result. - */ - cifs_sb = CIFS_SB(mntpt->d_sb); - if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) { - mnt = ERR_PTR(-EREMOTE); - goto cdda_exit; - } - - page = alloc_dentry_path(); - /* always use tree name prefix */ - full_path = build_path_from_dentry_optional_prefix(mntpt, page, true); - if (IS_ERR(full_path)) { - mnt = ERR_CAST(full_path); - goto free_full_path; - } - - convert_delimiter(full_path, '\\'); - cifs_dbg(FYI, "%s: full_path: %s\n", __func__, full_path); - - mnt = cifs_dfs_do_mount(mntpt, cifs_sb, full_path); - cifs_dbg(FYI, "%s: cifs_dfs_do_mount:%s , mnt:%p\n", __func__, full_path + 1, mnt); - -free_full_path: - free_dentry_path(page); -cdda_exit: - cifs_dbg(FYI, "leaving %s\n" , __func__); - return mnt; -} - -/* - * Attempt to automount the referral - */ -struct vfsmount *cifs_dfs_d_automount(struct path *path) -{ - struct vfsmount *newmnt; - - cifs_dbg(FYI, "in %s\n", __func__); - - newmnt = cifs_dfs_do_automount(path->dentry); - if (IS_ERR(newmnt)) { - cifs_dbg(FYI, "leaving %s [automount failed]\n" , __func__); - return newmnt; - } - - mntget(newmnt); /* prevent immediate expiration */ - mnt_set_expiry(newmnt, &cifs_dfs_automount_list); - schedule_delayed_work(&cifs_dfs_automount_task, - cifs_dfs_mountpoint_expiry_timeout); - cifs_dbg(FYI, "leaving %s [ok]\n" , __func__); - return newmnt; -} - -const struct inode_operations cifs_dfs_referral_inode_operations = { -}; diff --git a/fs/cifs/cifs_fs_sb.h b/fs/cifs/cifs_fs_sb.h index 05ee26f75b..67008db5da 100644 --- a/fs/cifs/cifs_fs_sb.h +++ b/fs/cifs/cifs_fs_sb.h @@ -60,8 +60,6 @@ struct cifs_sb_info { /* only used when CIFS_MOUNT_USE_PREFIX_PATH is set */ char *prepath; - /* randomly generated 128-bit number for indexing dfs mount groups in referral cache */ - uuid_t dfs_mount_id; /* * Indicate whether serverino option was turned off later * (cifs_autodisable_serverino) in order to match new mounts. diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c index c7d8c4cfdf..6678d9aeff 100644 --- a/fs/cifs/cifsencrypt.c +++ b/fs/cifs/cifsencrypt.c @@ -384,59 +384,72 @@ build_avpair_blob(struct cifs_ses *ses, const struct nls_table *nls_cp) return 0; } -/* Server has provided av pairs/target info in the type 2 challenge - * packet and we have plucked it and stored within smb session. - * We parse that blob here to find netbios domain name to be used - * as part of ntlmv2 authentication (in Target String), if not already - * specified on the command line. - * If this function returns without any error but without fetching - * domain name, authentication may fail against some server but - * may not fail against other (those who are not very particular - * about target string i.e. for some, just user name might suffice. - */ -static int -find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp) +#define AV_TYPE(av) (le16_to_cpu(av->type)) +#define AV_LEN(av) (le16_to_cpu(av->length)) +#define AV_DATA_PTR(av) ((void *)av->data) + +#define av_for_each_entry(ses, av) \ + for (av = NULL; (av = find_next_av(ses, av));) + +static struct ntlmssp2_name *find_next_av(struct cifs_ses *ses, + struct ntlmssp2_name *av) { - unsigned int attrsize; - unsigned int type; - unsigned int onesize = sizeof(struct ntlmssp2_name); - unsigned char *blobptr; - unsigned char *blobend; - struct ntlmssp2_name *attrptr; + u16 len; + u8 *end; - if (!ses->auth_key.len || !ses->auth_key.response) - return 0; - - blobptr = ses->auth_key.response; - blobend = blobptr + ses->auth_key.len; - - while (blobptr + onesize < blobend) { - attrptr = (struct ntlmssp2_name *) blobptr; - type = le16_to_cpu(attrptr->type); - if (type == NTLMSSP_AV_EOL) - break; - blobptr += 2; /* advance attr type */ - attrsize = le16_to_cpu(attrptr->length); - blobptr += 2; /* advance attr size */ - if (blobptr + attrsize > blobend) - break; - if (type == NTLMSSP_AV_NB_DOMAIN_NAME) { - if (!attrsize || attrsize >= CIFS_MAX_DOMAINNAME_LEN) - break; - if (!ses->domainName) { - ses->domainName = - kmalloc(attrsize + 1, GFP_KERNEL); - if (!ses->domainName) - return -ENOMEM; - cifs_from_utf16(ses->domainName, - (__le16 *)blobptr, attrsize, attrsize, - nls_cp, NO_MAP_UNI_RSVD); - break; - } - } - blobptr += attrsize; /* advance attr value */ + end = (u8 *)ses->auth_key.response + ses->auth_key.len; + if (!av) { + if (unlikely(!ses->auth_key.response || !ses->auth_key.len)) + return NULL; + av = (void *)ses->auth_key.response; + } else { + av = (void *)((u8 *)av + sizeof(*av) + AV_LEN(av)); } + if ((u8 *)av + sizeof(*av) > end) + return NULL; + + len = AV_LEN(av); + if (AV_TYPE(av) == NTLMSSP_AV_EOL) + return NULL; + if ((u8 *)av + sizeof(*av) + len > end) + return NULL; + return av; +} + +/* + * Check if server has provided av pair of @type in the NTLMSSP + * CHALLENGE_MESSAGE blob. + */ +static int find_av_name(struct cifs_ses *ses, u16 type, char **name, u16 maxlen) +{ + const struct nls_table *nlsc = ses->local_nls; + struct ntlmssp2_name *av; + u16 len, nlen; + + if (*name) + return 0; + + av_for_each_entry(ses, av) { + len = AV_LEN(av); + if (AV_TYPE(av) != type || !len) + continue; + if (!IS_ALIGNED(len, sizeof(__le16))) { + cifs_dbg(VFS | ONCE, "%s: bad length(%u) for type %u\n", + __func__, len, type); + continue; + } + nlen = len / sizeof(__le16); + if (nlen <= maxlen) { + ++nlen; + *name = kmalloc(nlen, GFP_KERNEL); + if (!*name) + return -ENOMEM; + cifs_from_utf16(*name, AV_DATA_PTR(av), nlen, + len, nlsc, NO_MAP_UNI_RSVD); + break; + } + } return 0; } @@ -446,40 +459,16 @@ find_domain_name(struct cifs_ses *ses, const struct nls_table *nls_cp) * as part of ntlmv2 authentication (or local current time as * default in case of failure) */ -static __le64 -find_timestamp(struct cifs_ses *ses) +static __le64 find_timestamp(struct cifs_ses *ses) { - unsigned int attrsize; - unsigned int type; - unsigned int onesize = sizeof(struct ntlmssp2_name); - unsigned char *blobptr; - unsigned char *blobend; - struct ntlmssp2_name *attrptr; + struct ntlmssp2_name *av; struct timespec64 ts; - if (!ses->auth_key.len || !ses->auth_key.response) - return 0; - - blobptr = ses->auth_key.response; - blobend = blobptr + ses->auth_key.len; - - while (blobptr + onesize < blobend) { - attrptr = (struct ntlmssp2_name *) blobptr; - type = le16_to_cpu(attrptr->type); - if (type == NTLMSSP_AV_EOL) - break; - blobptr += 2; /* advance attr type */ - attrsize = le16_to_cpu(attrptr->length); - blobptr += 2; /* advance attr size */ - if (blobptr + attrsize > blobend) - break; - if (type == NTLMSSP_AV_TIMESTAMP) { - if (attrsize == sizeof(u64)) - return *((__le64 *)blobptr); - } - blobptr += attrsize; /* advance attr value */ + av_for_each_entry(ses, av) { + if (AV_TYPE(av) == NTLMSSP_AV_TIMESTAMP && + AV_LEN(av) == sizeof(u64)) + return *((__le64 *)AV_DATA_PTR(av)); } - ktime_get_real_ts64(&ts); return cpu_to_le64(cifs_UnixTimeToNT(ts)); } @@ -660,16 +649,29 @@ setup_ntlmv2_rsp(struct cifs_ses *ses, const struct nls_table *nls_cp) if (ses->server->negflavor == CIFS_NEGFLAVOR_EXTENDED) { if (!ses->domainName) { if (ses->domainAuto) { - rc = find_domain_name(ses, nls_cp); - if (rc) { - cifs_dbg(VFS, "error %d finding domain name\n", - rc); + /* + * Domain (workgroup) hasn't been specified in + * mount options, so try to find it in + * CHALLENGE_MESSAGE message and then use it as + * part of NTLMv2 authentication. + */ + rc = find_av_name(ses, NTLMSSP_AV_NB_DOMAIN_NAME, + &ses->domainName, + CIFS_MAX_DOMAINNAME_LEN); + if (rc) goto setup_ntlmv2_rsp_ret; - } } else { ses->domainName = kstrdup("", GFP_KERNEL); + if (!ses->domainName) { + rc = -ENOMEM; + goto setup_ntlmv2_rsp_ret; + } } } + rc = find_av_name(ses, NTLMSSP_AV_DNS_DOMAIN_NAME, + &ses->dns_dom, CIFS_MAX_DOMAINNAME_LEN); + if (rc) + goto setup_ntlmv2_rsp_ret; } else { rc = build_avpair_blob(ses, nls_cp); if (rc) { diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 223f909b0d..dbd2bd1fd3 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -713,13 +713,16 @@ static void cifs_umount_begin(struct super_block *sb) spin_lock(&tcon->tc_lock); if ((tcon->tc_count > 1) || (tcon->status == TID_EXITING)) { /* we have other mounts to same share or we have - already tried to force umount this and woken up + already tried to umount this and woken up all waiting network requests, nothing to do */ spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); return; - } else if (tcon->tc_count == 1) - tcon->status = TID_EXITING; + } + /* + * can not set tcon->status to TID_EXITING yet since we don't know if umount -f will + * fail later (e.g. due to open files). TID_EXITING will be set just before tdis req sent + */ spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); @@ -868,12 +871,6 @@ cifs_smb3_do_mount(struct file_system_type *fs_type, goto out; } - rc = cifs_setup_volume_info(cifs_sb->ctx, NULL, NULL); - if (rc) { - root = ERR_PTR(rc); - goto out; - } - rc = cifs_setup_cifs_sb(cifs_sb); if (rc) { root = ERR_PTR(rc); @@ -1720,7 +1717,7 @@ exit_cifs(void) cifs_dbg(NOISY, "exit_smb3\n"); unregister_filesystem(&cifs_fs_type); unregister_filesystem(&smb3_fs_type); - cifs_dfs_release_automount_timer(); + cifs_release_automount_timer(); exit_cifs_idmap(); #ifdef CONFIG_CIFS_SWN_UPCALL cifs_genl_exit(); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index 1c7028ff1f..1f50d2b349 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -77,7 +77,7 @@ extern int cifs_fiemap(struct inode *, struct fiemap_extent_info *, u64 start, extern const struct inode_operations cifs_file_inode_ops; extern const struct inode_operations cifs_symlink_inode_ops; -extern const struct inode_operations cifs_dfs_referral_inode_operations; +extern const struct inode_operations cifs_namespace_inode_operations; /* Functions related to files and directories */ @@ -111,11 +111,7 @@ extern int cifs_readdir(struct file *file, struct dir_context *ctx); extern const struct dentry_operations cifs_dentry_ops; extern const struct dentry_operations cifs_ci_dentry_ops; -#ifdef CONFIG_CIFS_DFS_UPCALL -extern struct vfsmount *cifs_dfs_d_automount(struct path *path); -#else -#define cifs_dfs_d_automount NULL -#endif +extern struct vfsmount *cifs_d_automount(struct path *path); /* Functions related to symlinks */ extern const char *cifs_get_link(struct dentry *, struct inode *, diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index d3acbd984b..6d4493723b 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -19,6 +19,7 @@ #include #include #include +#include #include "cifs_fs_sb.h" #include "cifsacl.h" #include @@ -105,6 +106,8 @@ #define CIFS_MAX_WORKSTATION_LEN (__NEW_UTS_LEN + 1) /* reasonable max for client */ +#define CIFS_DFS_ROOT_SES(ses) ((ses)->dfs_root_ses ?: (ses)) + /* * CIFS vfs client Status information (based on what we know.) */ @@ -737,22 +740,13 @@ struct TCP_Server_Info { bool use_swn_dstaddr; struct sockaddr_storage swn_dstaddr; #endif -#ifdef CONFIG_CIFS_DFS_UPCALL - bool is_dfs_conn; /* if a dfs connection */ - struct mutex refpath_lock; /* protects leaf_fullpath */ /* - * Canonical DFS full paths that were used to chase referrals in mount and reconnect. - * - * origin_fullpath: first or original referral path - * leaf_fullpath: last referral path (might be changed due to nested links in reconnect) - * - * current_fullpath: pointer to either origin_fullpath or leaf_fullpath - * NOTE: cannot be accessed outside cifs_reconnect() and smb2_reconnect() - * - * format: \\HOST\SHARE\[OPTIONAL PATH] + * Canonical DFS referral path used in cifs_reconnect() for failover as + * well as in DFS cache refresher. */ - char *origin_fullpath, *leaf_fullpath, *current_fullpath; -#endif + char *leaf_fullpath; + bool dfs_conn:1; + char dns_dom[CIFS_MAX_DOMAINNAME_LEN + 1]; }; static inline void cifs_server_lock(struct TCP_Server_Info *server) @@ -970,43 +964,6 @@ release_iface(struct kref *ref) kfree(iface); } -/* - * compare two interfaces a and b - * return 0 if everything matches. - * return 1 if a has higher link speed, or rdma capable, or rss capable - * return -1 otherwise. - */ -static inline int -iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b) -{ - int cmp_ret = 0; - - WARN_ON(!a || !b); - if (a->speed == b->speed) { - if (a->rdma_capable == b->rdma_capable) { - if (a->rss_capable == b->rss_capable) { - cmp_ret = memcmp(&a->sockaddr, &b->sockaddr, - sizeof(a->sockaddr)); - if (!cmp_ret) - return 0; - else if (cmp_ret > 0) - return 1; - else - return -1; - } else if (a->rss_capable > b->rss_capable) - return 1; - else - return -1; - } else if (a->rdma_capable > b->rdma_capable) - return 1; - else - return -1; - } else if (a->speed > b->speed) - return 1; - else - return -1; -} - struct cifs_chan { unsigned int in_reconnect : 1; /* if session setup in progress for this channel */ struct TCP_Server_Info *server; @@ -1021,6 +978,7 @@ struct cifs_ses { struct list_head smb_ses_list; struct list_head rlist; /* reconnect list */ struct list_head tcon_list; + struct list_head dlist; /* dfs list */ struct cifs_tcon *tcon_ipc; spinlock_t ses_lock; /* protect anything here that is not protected */ struct mutex session_mutex; @@ -1102,6 +1060,9 @@ struct cifs_ses { */ unsigned long chans_need_reconnect; /* ========= end: protected by chan_lock ======== */ + struct cifs_ses *dfs_root_ses; + struct nls_table *local_nls; + char *dns_dom; /* FQDN of the domain */ }; static inline bool @@ -1218,9 +1179,11 @@ struct cifs_tcon { struct cached_fid crfid; /* Cached root fid */ /* BB add field for back pointer to sb struct(s)? */ #ifdef CONFIG_CIFS_DFS_UPCALL - struct list_head ulist; /* cache update list */ + struct delayed_work dfs_cache_work; + struct list_head dfs_ses_list; #endif struct delayed_work query_interfaces; /* query interfaces workqueue job */ + char *origin_fullpath; /* canonical copy of smb3_fs_context::source */ }; /* @@ -1754,11 +1717,32 @@ struct cifs_fattr { u32 cf_cifstag; }; +struct cifs_mount_ctx { + struct cifs_sb_info *cifs_sb; + struct smb3_fs_context *fs_ctx; + unsigned int xid; + struct TCP_Server_Info *server; + struct cifs_ses *ses; + struct cifs_tcon *tcon; +}; + +static inline void __free_dfs_info_param(struct dfs_info3_param *param) +{ + kfree(param->path_name); + kfree(param->node_name); +} + static inline void free_dfs_info_param(struct dfs_info3_param *param) +{ + if (param) + __free_dfs_info_param(param); +} + +static inline void zfree_dfs_info_param(struct dfs_info3_param *param) { if (param) { - kfree(param->path_name); - kfree(param->node_name); + __free_dfs_info_param(param); + memset(param, 0, sizeof(*param)); } } @@ -2208,4 +2192,24 @@ static inline struct scatterlist *cifs_sg_set_buf(struct scatterlist *sg, return sg; } +static inline bool cifs_netbios_name(const char *name, size_t namelen) +{ + bool ret = false; + size_t i; + + if (namelen >= 1 && namelen <= RFC1001_NAME_LEN) { + for (i = 0; i < namelen; i++) { + const unsigned char c = name[i]; + + if (c == '\\' || c == '/' || c == ':' || c == '*' || + c == '?' || c == '"' || c == '<' || c == '>' || + c == '|' || c == '.') + return false; + if (!ret && isalpha(c)) + ret = true; + } + } + return ret; +} + #endif /* _CIFS_GLOB_H */ diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 03e355b677..ba4f4a6efb 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -688,7 +688,7 @@ typedef union smb_com_session_setup_andx { struct ntlmssp2_name { __le16 type; __le16 length; -/* char name[length]; */ + __u8 data[]; } __attribute__((packed)); struct ntlmv2_resp { diff --git a/fs/cifs/cifsproto.h b/fs/cifs/cifsproto.h index a88cc26bd0..9e45861e64 100644 --- a/fs/cifs/cifsproto.h +++ b/fs/cifs/cifsproto.h @@ -8,6 +8,7 @@ #ifndef _CIFSPROTO_H #define _CIFSPROTO_H #include +#include #include "trace.h" #ifdef CONFIG_CIFS_DFS_UPCALL #include "dfs_cache.h" @@ -57,6 +58,9 @@ extern void exit_cifs_idmap(void); extern int init_cifs_spnego(void); extern void exit_cifs_spnego(void); extern const char *build_path_from_dentry(struct dentry *, void *); +char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, + const char *tree, int tree_len, + bool prefix); extern char *build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, bool prefix); static inline void *alloc_dentry_path(void) @@ -75,16 +79,17 @@ extern char *cifs_build_path_to_root(struct smb3_fs_context *ctx, struct cifs_tcon *tcon, int add_treename); extern char *build_wildcard_path_from_dentry(struct dentry *direntry); -extern char *cifs_compose_mount_options(const char *sb_mountdata, - const char *fullpath, const struct dfs_info3_param *ref, - char **devname); +char *cifs_build_devname(char *nodename, const char *prepath); extern void delete_mid(struct mid_q_entry *mid); void __release_mid(struct kref *refcount); extern void cifs_wake_up_task(struct mid_q_entry *mid); extern int cifs_handle_standard(struct TCP_Server_Info *server, struct mid_q_entry *mid); +extern char *smb3_fs_context_fullpath(const struct smb3_fs_context *ctx, + char dirsep); extern int smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx); extern int smb3_parse_opt(const char *options, const char *key, char **val); +extern int cifs_ipaddr_cmp(struct sockaddr *srcaddr, struct sockaddr *rhs); extern bool cifs_match_ipaddr(struct sockaddr *srcaddr, struct sockaddr *rhs); extern int cifs_discard_remaining_data(struct TCP_Server_Info *server); extern int cifs_call_async(struct TCP_Server_Info *server, @@ -241,6 +246,10 @@ extern int cifs_read_page_from_socket(struct TCP_Server_Info *server, unsigned int page_offset, unsigned int to_read); extern int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb); +void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx); +int cifs_mount_get_session(struct cifs_mount_ctx *mnt_ctx); +int cifs_is_path_remote(struct cifs_mount_ctx *mnt_ctx); +int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx); extern int cifs_match_super(struct super_block *, void *); extern int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx); extern void cifs_umount(struct cifs_sb_info *); @@ -265,11 +274,7 @@ extern void cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect); extern void cifs_put_tcon(struct cifs_tcon *tcon); -#if IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) -extern void cifs_dfs_release_automount_timer(void); -#else /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */ -#define cifs_dfs_release_automount_timer() do { } while (0) -#endif /* ! IS_ENABLED(CONFIG_CIFS_DFS_UPCALL) */ +extern void cifs_release_automount_timer(void); void cifs_proc_init(void); void cifs_proc_clean(void); @@ -278,8 +283,7 @@ extern void cifs_move_llist(struct list_head *source, struct list_head *dest); extern void cifs_free_llist(struct list_head *llist); extern void cifs_del_lock_waiters(struct cifsLockInfo *lock); -extern int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, - const struct nls_table *nlsc); +int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon); extern int cifs_negotiate_protocol(const unsigned int xid, struct cifs_ses *ses, @@ -551,13 +555,12 @@ extern int E_md4hash(const unsigned char *passwd, unsigned char *p16, extern int SMBencrypt(unsigned char *passwd, const unsigned char *c8, unsigned char *p24); -extern int -cifs_setup_volume_info(struct smb3_fs_context *ctx, const char *mntopts, const char *devname); - extern struct TCP_Server_Info * cifs_find_tcp_session(struct smb3_fs_context *ctx); -extern void cifs_put_smb_ses(struct cifs_ses *ses); +struct cifs_tcon *cifs_setup_ipc(struct cifs_ses *ses, bool seal); + +void __cifs_put_smb_ses(struct cifs_ses *ses); extern struct cifs_ses * cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx); @@ -641,7 +644,7 @@ int smb2_parse_query_directory(struct cifs_tcon *tcon, struct kvec *rsp_iov, int resp_buftype, struct cifs_search_info *srch_inf); -struct super_block *cifs_get_tcp_super(struct TCP_Server_Info *server); +struct super_block *cifs_get_dfs_tcon_super(struct cifs_tcon *tcon); void cifs_put_tcp_super(struct super_block *sb); int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix); char *extract_hostname(const char *unc); @@ -658,7 +661,7 @@ static inline int get_dfs_path(const unsigned int xid, struct cifs_ses *ses, } int match_target_ip(struct TCP_Server_Info *server, - const char *share, size_t share_len, + const char *host, size_t hostlen, bool *result); #endif @@ -679,4 +682,37 @@ static inline void release_mid(struct mid_q_entry *mid) kref_put(&mid->refcount, __release_mid); } +static inline void cifs_put_smb_ses(struct cifs_ses *ses) +{ + __cifs_put_smb_ses(ses); +} + +/* Get an active reference of @ses and its children. + * + * NOTE: make sure to call this function when incrementing reference count of + * @ses to ensure that any DFS root session attached to it (@ses->dfs_root_ses) + * will also get its reference count incremented. + * + * cifs_put_smb_ses() will put all references, so call it when you're done. + */ +static inline void cifs_smb_ses_inc_refcount(struct cifs_ses *ses) +{ + lockdep_assert_held(&cifs_tcp_ses_lock); + ses->ses_count++; +} + +static inline bool dfs_src_pathname_equal(const char *s1, const char *s2) +{ + if (strlen(s1) != strlen(s2)) + return false; + for (; *s1; s1++, s2++) { + if (*s1 == '/' || *s1 == '\\') { + if (*s2 != '/' && *s2 != '\\') + return false; + } else if (tolower(*s1) != tolower(*s2)) + return false; + } + return true; +} + #endif /* _CIFSPROTO_H */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index 323d060676..e912d44732 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -90,13 +90,16 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) struct list_head *tmp1; /* only send once per connect */ - spin_lock(&tcon->ses->ses_lock); - if ((tcon->ses->ses_status != SES_GOOD) || (tcon->status != TID_NEED_RECON)) { - spin_unlock(&tcon->ses->ses_lock); + spin_lock(&tcon->tc_lock); + if (tcon->need_reconnect) + tcon->status = TID_NEED_RECON; + + if (tcon->status != TID_NEED_RECON) { + spin_unlock(&tcon->tc_lock); return; } tcon->status = TID_IN_FILES_INVALIDATE; - spin_unlock(&tcon->ses->ses_lock); + spin_unlock(&tcon->tc_lock); /* list all files open on tree connection and mark them invalid */ spin_lock(&tcon->open_file_lock); @@ -129,10 +132,9 @@ cifs_mark_open_files_invalid(struct cifs_tcon *tcon) static int cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) { - int rc; - struct cifs_ses *ses; struct TCP_Server_Info *server; - struct nls_table *nls_codepage = NULL; + struct cifs_ses *ses; + int rc; /* * SMBs NegProt, SessSetup, uLogoff do not have tcon yet so check for @@ -147,13 +149,11 @@ cifs_reconnect_tcon(struct cifs_tcon *tcon, int smb_command) /* * only tree disconnect, open, and write, (and ulogoff which does not - * have tcon) are allowed as we start force umount + * have tcon) are allowed as we start umount */ spin_lock(&tcon->tc_lock); if (tcon->status == TID_EXITING) { - if (smb_command != SMB_COM_WRITE_ANDX && - smb_command != SMB_COM_OPEN_ANDX && - smb_command != SMB_COM_TREE_DISCONNECT) { + if (smb_command != SMB_COM_TREE_DISCONNECT) { spin_unlock(&tcon->tc_lock); cifs_dbg(FYI, "can not send cmd %d while umounting\n", smb_command); @@ -192,8 +192,6 @@ again: } spin_unlock(&server->srv_lock); - nls_codepage = load_nls_default(); - /* * need to prevent multiple threads trying to simultaneously * reconnect the same SMB session @@ -217,7 +215,7 @@ again: rc = cifs_negotiate_protocol(0, ses, server); if (!rc) - rc = cifs_setup_session(0, ses, server, nls_codepage); + rc = cifs_setup_session(0, ses, server, ses->local_nls); /* do we need to reconnect tcon? */ if (rc || !tcon->need_reconnect) { @@ -227,7 +225,7 @@ again: skip_sess_setup: cifs_mark_open_files_invalid(tcon); - rc = cifs_tree_connect(0, tcon, nls_codepage); + rc = cifs_tree_connect(0, tcon); mutex_unlock(&ses->session_mutex); cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); @@ -263,7 +261,6 @@ out: rc = -EAGAIN; } - unload_nls(nls_codepage); return rc; } @@ -4882,8 +4879,8 @@ getDFSRetry: * CIFSGetDFSRefer() may be called from cifs_reconnect_tcon() and thus * causing an infinite recursion. */ - rc = smb_init_no_reconnect(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, - (void **)&pSMB, (void **)&pSMBr); + rc = smb_init(SMB_COM_TRANSACTION2, 15, ses->tcon_ipc, + (void **)&pSMB, (void **)&pSMBr); if (rc) return rc; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 987db14468..ca4e7f4785 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -45,6 +45,7 @@ #include "smbdirect.h" #include "dns_resolve.h" #ifdef CONFIG_CIFS_DFS_UPCALL +#include "dfs.h" #include "dfs_cache.h" #endif #include "fs_context.h" @@ -60,20 +61,6 @@ extern bool disable_legacy_dialects; /* Drop the connection to not overload the server */ #define MAX_STATUS_IO_TIMEOUT 5 -struct mount_ctx { - struct cifs_sb_info *cifs_sb; - struct smb3_fs_context *fs_ctx; - unsigned int xid; - struct TCP_Server_Info *server; - struct cifs_ses *ses; - struct cifs_tcon *tcon; -#ifdef CONFIG_CIFS_DFS_UPCALL - struct cifs_ses *root_ses; - uuid_t mount_id; - char *origin_fullpath, *leaf_fullpath; -#endif -}; - static int ip_connect(struct TCP_Server_Info *server); static int generic_ip_connect(struct TCP_Server_Info *server); static void tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink); @@ -87,9 +74,8 @@ static void cifs_prune_tlinks(struct work_struct *work); */ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) { + struct sockaddr_storage ss; int rc; - int len; - char *unc, *ipaddr = NULL; if (!server->hostname) return -EINVAL; @@ -98,27 +84,17 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) if (server->hostname[0] == '\0') return 0; - len = strlen(server->hostname) + 3; + spin_lock(&server->srv_lock); + ss = server->dstaddr; + spin_unlock(&server->srv_lock); - unc = kmalloc(len, GFP_KERNEL); - if (!unc) { - cifs_dbg(FYI, "%s: failed to create UNC path\n", __func__); - return -ENOMEM; - } - scnprintf(unc, len, "\\\\%s", server->hostname); - - rc = dns_resolve_server_name_to_ip(unc, &ipaddr, NULL); - kfree(unc); - - if (rc < 0) { - cifs_dbg(FYI, "%s: failed to resolve server part of %s to IP: %d\n", - __func__, server->hostname, rc); - } else { + rc = dns_resolve_name(server->dns_dom, server->hostname, + strlen(server->hostname), + (struct sockaddr *)&ss); + if (!rc) { spin_lock(&server->srv_lock); - rc = cifs_convert_address((struct sockaddr *)&server->dstaddr, ipaddr, - strlen(ipaddr)); + memcpy(&server->dstaddr, &ss, sizeof(server->dstaddr)); spin_unlock(&server->srv_lock); - kfree(ipaddr); } return rc; } @@ -431,7 +407,8 @@ static int __cifs_reconnect(struct TCP_Server_Info *server, } #ifdef CONFIG_CIFS_DFS_UPCALL -static int __reconnect_target_unlocked(struct TCP_Server_Info *server, const char *target) +static int __reconnect_target_locked(struct TCP_Server_Info *server, + const char *target) { int rc; char *hostname; @@ -464,37 +441,46 @@ static int __reconnect_target_unlocked(struct TCP_Server_Info *server, const cha return rc; } -static int reconnect_target_unlocked(struct TCP_Server_Info *server, struct dfs_cache_tgt_list *tl, - struct dfs_cache_tgt_iterator **target_hint) +static int reconnect_target_locked(struct TCP_Server_Info *server, + struct dfs_cache_tgt_list *tl, + struct dfs_cache_tgt_iterator **target_hint) { - int rc; struct dfs_cache_tgt_iterator *tit; + int rc; *target_hint = NULL; /* If dfs target list is empty, then reconnect to last server */ tit = dfs_cache_get_tgt_iterator(tl); if (!tit) - return __reconnect_target_unlocked(server, server->hostname); + return __reconnect_target_locked(server, server->hostname); /* Otherwise, try every dfs target in @tl */ - for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) { - rc = __reconnect_target_unlocked(server, dfs_cache_get_tgt_name(tit)); + do { + const char *target = dfs_cache_get_tgt_name(tit); + + spin_lock(&server->srv_lock); + if (server->tcpStatus != CifsNeedReconnect) { + spin_unlock(&server->srv_lock); + return -ECONNRESET; + } + spin_unlock(&server->srv_lock); + rc = __reconnect_target_locked(server, target); if (!rc) { *target_hint = tit; break; } - } + } while ((tit = dfs_cache_get_next_tgt(tl, tit))); return rc; } static int reconnect_dfs_server(struct TCP_Server_Info *server) { - int rc = 0; - const char *refpath = server->current_fullpath + 1; - struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); struct dfs_cache_tgt_iterator *target_hint = NULL; + const char *ref_path = server->leaf_fullpath + 1; + DFS_CACHE_TGT_LIST(tl); int num_targets = 0; + int rc = 0; /* * Determine the number of dfs targets the referral path in @cifs_sb resolves to. @@ -504,7 +490,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) * through /proc/fs/cifs/dfscache or the target list is empty due to server settings after * refreshing the referral, so, in this case, default it to 1. */ - if (!dfs_cache_noreq_find(refpath, NULL, &tl)) + if (!dfs_cache_noreq_find(ref_path, NULL, &tl)) num_targets = dfs_cache_get_nr_tgts(&tl); if (!num_targets) num_targets = 1; @@ -525,7 +511,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) try_to_freeze(); cifs_server_lock(server); - rc = reconnect_target_unlocked(server, &tl, &target_hint); + rc = reconnect_target_locked(server, &tl, &target_hint); if (rc) { /* Failed to reconnect socket */ cifs_server_unlock(server); @@ -549,9 +535,7 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) mod_delayed_work(cifsiod_wq, &server->reconnect, 0); } while (server->tcpStatus == CifsNeedReconnect); - if (target_hint) - dfs_cache_noreq_update_tgthint(refpath, target_hint); - + dfs_cache_noreq_update_tgthint(ref_path, target_hint); dfs_cache_free_tgts(&tl); /* Need to set up echo worker again once connection has been established */ @@ -566,21 +550,8 @@ static int reconnect_dfs_server(struct TCP_Server_Info *server) int cifs_reconnect(struct TCP_Server_Info *server, bool mark_smb_session) { - /* If tcp session is not an dfs connection, then reconnect to last target server */ - spin_lock(&server->srv_lock); - if (!server->is_dfs_conn) { - spin_unlock(&server->srv_lock); + if (!server->leaf_fullpath) return __cifs_reconnect(server, mark_smb_session); - } - spin_unlock(&server->srv_lock); - - mutex_lock(&server->refpath_lock); - if (!server->origin_fullpath || !server->leaf_fullpath) { - mutex_unlock(&server->refpath_lock); - return __cifs_reconnect(server, mark_smb_session); - } - mutex_unlock(&server->refpath_lock); - return reconnect_dfs_server(server); } #else @@ -978,12 +949,10 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server) */ } -#ifdef CONFIG_CIFS_DFS_UPCALL /* Release netns reference for this server. */ put_net(cifs_net_ns(server)); - kfree(server->origin_fullpath); kfree(server->leaf_fullpath); -#endif + kfree(server->hostname); kfree(server); length = atomic_dec_return(&tcpSesAllocCount); @@ -1287,6 +1256,56 @@ next_pdu: module_put_and_exit(0); } +int +cifs_ipaddr_cmp(struct sockaddr *srcaddr, struct sockaddr *rhs) +{ + struct sockaddr_in *saddr4 = (struct sockaddr_in *)srcaddr; + struct sockaddr_in *vaddr4 = (struct sockaddr_in *)rhs; + struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr; + struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs; + + switch (srcaddr->sa_family) { + case AF_UNSPEC: + switch (rhs->sa_family) { + case AF_UNSPEC: + return 0; + case AF_INET: + case AF_INET6: + return 1; + default: + return -1; + } + case AF_INET: { + switch (rhs->sa_family) { + case AF_UNSPEC: + return -1; + case AF_INET: + return memcmp(saddr4, vaddr4, + sizeof(struct sockaddr_in)); + case AF_INET6: + return 1; + default: + return -1; + } + } + case AF_INET6: { + switch (rhs->sa_family) { + case AF_UNSPEC: + case AF_INET: + return -1; + case AF_INET6: + return memcmp(saddr6, + vaddr6, + sizeof(struct sockaddr_in6)); + default: + return -1; + } + } + default: + return -1; /* don't expect to be here */ + } +} + /* * Returns true if srcaddr isn't specified and rhs isn't specified, or * if srcaddr is specified and matches the IP address of the rhs argument @@ -1353,16 +1372,11 @@ match_port(struct TCP_Server_Info *server, struct sockaddr *addr) return port == *sport; } -static bool -match_address(struct TCP_Server_Info *server, struct sockaddr *addr, - struct sockaddr *srcaddr) +static bool match_server_address(struct TCP_Server_Info *server, struct sockaddr *addr) { if (!cifs_match_ipaddr(addr, (struct sockaddr *)&server->dstaddr)) return false; - if (!cifs_match_ipaddr(srcaddr, (struct sockaddr *)&server->srcaddr)) - return false; - return true; } @@ -1390,7 +1404,9 @@ match_security(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) } /* this function must be called with srv_lock held */ -static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) +static int match_server(struct TCP_Server_Info *server, + struct smb3_fs_context *ctx, + bool match_super) { struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr; @@ -1403,6 +1419,9 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context * if (server->nosharesock) return 0; + if (!match_super && (ctx->dfs_conn || server->dfs_conn)) + return 0; + /* If multidialect negotiation see if existing sessions match one */ if (strcmp(ctx->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { if (server->vals->protocol_id < SMB30_PROT_ID) @@ -1417,14 +1436,13 @@ static int match_server(struct TCP_Server_Info *server, struct smb3_fs_context * if (!net_eq(cifs_net_ns(server), current->nsproxy->net_ns)) return 0; - if (strcasecmp(server->hostname, ctx->server_hostname)) + if (!cifs_match_ipaddr((struct sockaddr *)&ctx->srcaddr, + (struct sockaddr *)&server->srcaddr)) return 0; - if (!match_address(server, addr, - (struct sockaddr *)&ctx->srcaddr)) - return 0; - - if (!match_port(server, addr)) + if (strcasecmp(server->hostname, ctx->server_hostname) || + !match_server_address(server, addr) || + !match_port(server, addr)) return 0; if (!match_security(server, ctx)) @@ -1453,23 +1471,12 @@ cifs_find_tcp_session(struct smb3_fs_context *ctx) spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { spin_lock(&server->srv_lock); -#ifdef CONFIG_CIFS_DFS_UPCALL - /* - * DFS failover implementation in cifs_reconnect() requires unique tcp sessions for - * DFS connections to do failover properly, so avoid sharing them with regular - * shares or even links that may connect to same server but having completely - * different failover targets. - */ - if (server->is_dfs_conn) { - spin_unlock(&server->srv_lock); - continue; - } -#endif /* * Skip ses channels since they're only handled in lower layers * (e.g. cifs_send_recv). */ - if (CIFS_SERVER_IS_CHAN(server) || !match_server(server, ctx)) { + if (CIFS_SERVER_IS_CHAN(server) || + !match_server(server, ctx, false)) { spin_unlock(&server->srv_lock); continue; } @@ -1531,8 +1538,6 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect) kfree(server->session_key.response); server->session_key.response = NULL; server->session_key.len = 0; - kfree(server->hostname); - server->hostname = NULL; task = xchg(&server->tsk, NULL); if (task) @@ -1565,8 +1570,19 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, goto out_err; } + if (ctx->leaf_fullpath) { + tcp_ses->leaf_fullpath = kstrdup(ctx->leaf_fullpath, GFP_KERNEL); + if (!tcp_ses->leaf_fullpath) { + rc = -ENOMEM; + goto out_err; + } + } + if (ctx->dns_dom) + strscpy(tcp_ses->dns_dom, ctx->dns_dom, sizeof(tcp_ses->dns_dom)); + if (ctx->nosharesock) tcp_ses->nosharesock = true; + tcp_ses->dfs_conn = ctx->dfs_conn; tcp_ses->ops = ctx->ops; tcp_ses->vals = ctx->vals; @@ -1608,9 +1624,6 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx, INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server); mutex_init(&tcp_ses->reconnect_mutex); -#ifdef CONFIG_CIFS_DFS_UPCALL - mutex_init(&tcp_ses->refpath_lock); -#endif memcpy(&tcp_ses->srcaddr, &ctx->srcaddr, sizeof(tcp_ses->srcaddr)); memcpy(&tcp_ses->dstaddr, &ctx->dstaddr, @@ -1714,6 +1727,7 @@ out_err: if (CIFS_SERVER_IS_CHAN(tcp_ses)) cifs_put_tcp_session(tcp_ses->primary_server, false); kfree(tcp_ses->hostname); + kfree(tcp_ses->leaf_fullpath); if (tcp_ses->ssocket) sock_release(tcp_ses->ssocket); kfree(tcp_ses); @@ -1722,10 +1736,14 @@ out_err: } /* this function must be called with ses_lock held */ -static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) +static int match_session(struct cifs_ses *ses, + struct smb3_fs_context *ctx, + bool match_super) { - if (ctx->sectype != Unspecified && - ctx->sectype != ses->sectype) + struct TCP_Server_Info *server = ses->server; + enum securityEnum ctx_sec, ses_sec; + + if (!match_super && ctx->dfs_root_ses != ses->dfs_root_ses) return 0; /* @@ -1735,11 +1753,19 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) if (ses->chan_max < ctx->max_channels) return 0; - switch (ses->sectype) { + ctx_sec = server->ops->select_sectype(server, ctx->sectype); + ses_sec = server->ops->select_sectype(server, ses->sectype); + + if (ctx_sec != ses_sec) + return 0; + + switch (ctx_sec) { case Kerberos: if (!uid_eq(ctx->cred_uid, ses->cred_uid)) return 0; break; + case NTLMv2: + case RawNTLMSSP: default: /* NULL username means anonymous session */ if (ses->user_name == NULL) { @@ -1760,44 +1786,40 @@ static int match_session(struct cifs_ses *ses, struct smb3_fs_context *ctx) CIFS_MAX_PASSWORD_LEN)) return 0; } + + if (strcmp(ctx->local_nls->charset, ses->local_nls->charset)) + return 0; + return 1; } /** * cifs_setup_ipc - helper to setup the IPC tcon for the session * @ses: smb session to issue the request on - * @ctx: the superblock configuration context to use for building the - * new tree connection for the IPC (interprocess communication RPC) + * @seal: if encryption is requested * * A new IPC connection is made and stored in the session * tcon_ipc. The IPC tcon has the same lifetime as the session. */ -static int -cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) +struct cifs_tcon *cifs_setup_ipc(struct cifs_ses *ses, bool seal) { int rc = 0, xid; struct cifs_tcon *tcon; char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0}; - bool seal = false; struct TCP_Server_Info *server = ses->server; /* * If the mount request that resulted in the creation of the * session requires encryption, force IPC to be encrypted too. */ - if (ctx->seal) { - if (server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION) - seal = true; - else { - cifs_server_dbg(VFS, - "IPC: server doesn't support encryption\n"); - return -EOPNOTSUPP; - } + if (seal && !(server->capabilities & SMB2_GLOBAL_CAP_ENCRYPTION)) { + cifs_server_dbg(VFS, "IPC: server doesn't support encryption\n"); + return ERR_PTR(-EOPNOTSUPP); } tcon = tconInfoAlloc(); if (tcon == NULL) - return -ENOMEM; + return ERR_PTR(-ENOMEM); spin_lock(&server->srv_lock); scnprintf(unc, sizeof(unc), "\\\\%s\\IPC$", server->hostname); @@ -1807,13 +1829,13 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) tcon->ses = ses; tcon->ipc = true; tcon->seal = seal; - rc = server->ops->tree_connect(xid, ses, unc, tcon, ctx->local_nls); + rc = server->ops->tree_connect(xid, ses, unc, tcon, ses->local_nls); free_xid(xid); if (rc) { - cifs_server_dbg(VFS, "failed to connect to IPC (rc=%d)\n", rc); + cifs_server_dbg(VFS | ONCE, "failed to connect to IPC (rc=%d)\n", rc); tconInfoFree(tcon); - goto out; + return ERR_PTR(rc); } cifs_dbg(FYI, "IPC tcon rc = %d ipc tid = %d\n", rc, tcon->tid); @@ -1821,15 +1843,13 @@ cifs_setup_ipc(struct cifs_ses *ses, struct smb3_fs_context *ctx) spin_lock(&tcon->tc_lock); tcon->status = TID_GOOD; spin_unlock(&tcon->tc_lock); - ses->tcon_ipc = tcon; -out: - return rc; + return tcon; } static struct cifs_ses * cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { - struct cifs_ses *ses; + struct cifs_ses *ses, *ret = NULL; spin_lock(&cifs_tcp_ses_lock); list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { @@ -1839,23 +1859,22 @@ cifs_find_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) continue; } spin_lock(&ses->chan_lock); - if (!match_session(ses, ctx)) { + if (match_session(ses, ctx, false)) { spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); - continue; + ret = ses; + break; } spin_unlock(&ses->chan_lock); spin_unlock(&ses->ses_lock); - - ++ses->ses_count; - spin_unlock(&cifs_tcp_ses_lock); - return ses; } + if (ret) + cifs_smb_ses_inc_refcount(ret); spin_unlock(&cifs_tcp_ses_lock); - return NULL; + return ret; } -void cifs_put_smb_ses(struct cifs_ses *ses) +void __cifs_put_smb_ses(struct cifs_ses *ses) { struct TCP_Server_Info *server = ses->server; unsigned int chan_count; @@ -2100,11 +2119,13 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx __attribute__((unused)), struct cifs_ses * cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) { - int rc = -ENOMEM; - unsigned int xid; - struct cifs_ses *ses; - struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; struct sockaddr_in6 *addr6 = (struct sockaddr_in6 *)&server->dstaddr; + struct sockaddr_in *addr = (struct sockaddr_in *)&server->dstaddr; + struct cifs_tcon *ipc; + struct cifs_ses *ses; + unsigned int xid; + size_t len; + int rc = 0; xid = get_xid(); @@ -2149,6 +2170,8 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) return ses; } + rc = -ENOMEM; + cifs_dbg(FYI, "Existing smb sess not found\n"); ses = sesInfoAlloc(); if (ses == NULL) @@ -2177,6 +2200,14 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL); if (!ses->domainName) goto get_ses_fail; + + len = strnlen(ctx->domainname, CIFS_MAX_DOMAINNAME_LEN); + if (!cifs_netbios_name(ctx->domainname, len)) { + ses->dns_dom = kstrndup(ctx->domainname, + len, GFP_KERNEL); + if (!ses->dns_dom) + goto get_ses_fail; + } } strscpy(ses->workstation_name, ctx->workstation_name, sizeof(ses->workstation_name)); @@ -2188,6 +2219,7 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) ses->sectype = ctx->sectype; ses->sign = ctx->sign; + ses->local_nls = load_nls(ctx->local_nls->charset); /* add server as first channel */ spin_lock(&ses->chan_lock); @@ -2218,10 +2250,16 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) * need to lock before changing something in the session. */ spin_lock(&cifs_tcp_ses_lock); + ses->dfs_root_ses = ctx->dfs_root_ses; list_add(&ses->smb_ses_list, &server->smb_ses_list); spin_unlock(&cifs_tcp_ses_lock); - cifs_setup_ipc(ses, ctx); + ipc = cifs_setup_ipc(ses, ctx->seal); + spin_lock(&cifs_tcp_ses_lock); + spin_lock(&ses->ses_lock); + ses->tcon_ipc = !IS_ERR(ipc) ? ipc : NULL; + spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); free_xid(xid); @@ -2236,10 +2274,20 @@ get_ses_fail: /* this function must be called with tc_lock held */ static int match_tcon(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) { + struct TCP_Server_Info *server = tcon->ses->server; + if (tcon->status == TID_EXITING) return 0; - if (strncmp(tcon->treeName, ctx->UNC, MAX_TREE_SIZE)) + + if (tcon->origin_fullpath) { + if (!ctx->source || + !dfs_src_pathname_equal(ctx->source, + tcon->origin_fullpath)) + return 0; + } else if (!server->leaf_fullpath && + strncmp(tcon->treeName, ctx->UNC, MAX_TREE_SIZE)) { return 0; + } if (tcon->seal != ctx->seal) return 0; if (tcon->snapshot_time != ctx->snapshot_time) @@ -2279,6 +2327,7 @@ cifs_put_tcon(struct cifs_tcon *tcon) { unsigned int xid; struct cifs_ses *ses; + LIST_HEAD(ses_list); /* * IPC tcon share the lifetime of their session and are @@ -2301,11 +2350,16 @@ cifs_put_tcon(struct cifs_tcon *tcon) WARN_ON(tcon->tc_count < 0); list_del_init(&tcon->tcon_list); + tcon->status = TID_EXITING; spin_unlock(&tcon->tc_lock); spin_unlock(&cifs_tcp_ses_lock); /* cancel polling of interfaces */ cancel_delayed_work_sync(&tcon->query_interfaces); +#ifdef CONFIG_CIFS_DFS_UPCALL + cancel_delayed_work_sync(&tcon->dfs_cache_work); + list_replace_init(&tcon->dfs_ses_list, &ses_list); +#endif if (tcon->use_witness) { int rc; @@ -2325,6 +2379,9 @@ cifs_put_tcon(struct cifs_tcon *tcon) cifs_fscache_release_super_cookie(tcon); tconInfoFree(tcon); cifs_put_smb_ses(ses); +#ifdef CONFIG_CIFS_DFS_UPCALL + dfs_put_root_smb_sessions(&ses_list); +#endif } /** @@ -2543,7 +2600,9 @@ cifs_get_tcon(struct cifs_ses *ses, struct smb3_fs_context *ctx) queue_delayed_work(cifsiod_wq, &tcon->query_interfaces, (SMB_INTERFACE_POLL_INTERVAL * HZ)); } - +#ifdef CONFIG_CIFS_DFS_UPCALL + INIT_DELAYED_WORK(&tcon->dfs_cache_work, dfs_cache_refresh); +#endif spin_lock(&cifs_tcp_ses_lock); list_add(&tcon->tcon_list, &ses->tcon_list); spin_unlock(&cifs_tcp_ses_lock); @@ -2619,9 +2678,11 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data) return 1; } -static int -match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) +static int match_prepath(struct super_block *sb, + struct cifs_tcon *tcon, + struct cifs_mnt_data *mnt_data) { + struct smb3_fs_context *ctx = mnt_data->ctx; struct cifs_sb_info *old = CIFS_SB(sb); struct cifs_sb_info *new = mnt_data->cifs_sb; bool old_set = (old->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && @@ -2629,6 +2690,10 @@ match_prepath(struct super_block *sb, struct cifs_mnt_data *mnt_data) bool new_set = (new->mnt_cifs_flags & CIFS_MOUNT_USE_PREFIX_PATH) && new->prepath; + if (tcon->origin_fullpath && + dfs_src_pathname_equal(tcon->origin_fullpath, ctx->source)) + return 1; + if (old_set && new_set && !strcmp(new->prepath, old->prepath)) return 1; else if (!old_set && !new_set) @@ -2652,8 +2717,9 @@ cifs_match_super(struct super_block *sb, void *data) spin_lock(&cifs_tcp_ses_lock); cifs_sb = CIFS_SB(sb); tlink = cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); - if (tlink == NULL) { - /* can not match superblock if tlink were ever null */ + if (IS_ERR_OR_NULL(tlink)) { + pr_warn_once("%s: skip super matching due to bad tlink(%p)\n", + __func__, tlink); spin_unlock(&cifs_tcp_ses_lock); return 0; } @@ -2667,10 +2733,10 @@ cifs_match_super(struct super_block *sb, void *data) spin_lock(&ses->ses_lock); spin_lock(&ses->chan_lock); spin_lock(&tcon->tc_lock); - if (!match_server(tcp_srv, ctx) || - !match_session(ses, ctx) || + if (!match_server(tcp_srv, ctx, true) || + !match_session(ses, ctx, true) || !match_tcon(tcon, ctx) || - !match_prepath(sb, mnt_data)) { + !match_prepath(sb, tcon, mnt_data)) { rc = 0; goto out; } @@ -3127,7 +3193,7 @@ int cifs_setup_cifs_sb(struct cifs_sb_info *cifs_sb) } /* Release all succeed connections */ -static inline void mount_put_conns(struct mount_ctx *mnt_ctx) +void cifs_mount_put_conns(struct cifs_mount_ctx *mnt_ctx) { int rc = 0; @@ -3137,23 +3203,29 @@ static inline void mount_put_conns(struct mount_ctx *mnt_ctx) cifs_put_smb_ses(mnt_ctx->ses); else if (mnt_ctx->server) cifs_put_tcp_session(mnt_ctx->server, 0); + mnt_ctx->ses = NULL; + mnt_ctx->tcon = NULL; + mnt_ctx->server = NULL; mnt_ctx->cifs_sb->mnt_cifs_flags &= ~CIFS_MOUNT_POSIX_PATHS; free_xid(mnt_ctx->xid); } -/* Get connections for tcp, ses and tcon */ -static int mount_get_conns(struct mount_ctx *mnt_ctx) +int cifs_mount_get_session(struct cifs_mount_ctx *mnt_ctx) { - int rc = 0; struct TCP_Server_Info *server = NULL; + struct smb3_fs_context *ctx; struct cifs_ses *ses = NULL; - struct cifs_tcon *tcon = NULL; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; unsigned int xid; + int rc = 0; xid = get_xid(); + if (WARN_ON_ONCE(!mnt_ctx || !mnt_ctx->fs_ctx)) { + rc = -EINVAL; + goto out; + } + ctx = mnt_ctx->fs_ctx; + /* get a reference to a tcp session */ server = cifs_get_tcp_session(ctx, NULL); if (IS_ERR(server)) { @@ -3174,11 +3246,36 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) SMB2_GLOBAL_CAP_PERSISTENT_HANDLES))) { cifs_server_dbg(VFS, "persistent handles not supported by server\n"); rc = -EOPNOTSUPP; - goto out; } +out: + mnt_ctx->xid = xid; + mnt_ctx->server = server; + mnt_ctx->ses = ses; + mnt_ctx->tcon = NULL; + + return rc; +} + +int cifs_mount_get_tcon(struct cifs_mount_ctx *mnt_ctx) +{ + struct TCP_Server_Info *server; + struct cifs_sb_info *cifs_sb; + struct smb3_fs_context *ctx; + struct cifs_tcon *tcon = NULL; + int rc = 0; + + if (WARN_ON_ONCE(!mnt_ctx || !mnt_ctx->server || !mnt_ctx->ses || !mnt_ctx->fs_ctx || + !mnt_ctx->cifs_sb)) { + rc = -EINVAL; + goto out; + } + server = mnt_ctx->server; + ctx = mnt_ctx->fs_ctx; + cifs_sb = mnt_ctx->cifs_sb; + /* search for existing tcon to this server share */ - tcon = cifs_get_tcon(ses, ctx); + tcon = cifs_get_tcon(mnt_ctx->ses, ctx); if (IS_ERR(tcon)) { rc = PTR_ERR(tcon); tcon = NULL; @@ -3195,7 +3292,7 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) * reset of caps checks mount to see if unix extensions disabled * for just this mount. */ - reset_cifs_unix_caps(xid, tcon, cifs_sb, ctx); + reset_cifs_unix_caps(mnt_ctx->xid, tcon, cifs_sb, ctx); spin_lock(&tcon->ses->server->srv_lock); if ((tcon->ses->server->tcpStatus == CifsNeedReconnect) && (le64_to_cpu(tcon->fsUnixInfo.Capability) & @@ -3210,7 +3307,7 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) /* do not care if a following call succeed - informational */ if (!tcon->pipe && server->ops->qfs_tcon) { - server->ops->qfs_tcon(xid, tcon, cifs_sb); + server->ops->qfs_tcon(mnt_ctx->xid, tcon, cifs_sb); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RO_CACHE) { if (tcon->fsDevInfo.DeviceCharacteristics & cpu_to_le32(FILE_READ_ONLY_DEVICE)) @@ -3241,11 +3338,7 @@ static int mount_get_conns(struct mount_ctx *mnt_ctx) cifs_sb->ctx->rsize = server->ops->negotiate_rsize(tcon, ctx); out: - mnt_ctx->server = server; - mnt_ctx->ses = ses; mnt_ctx->tcon = tcon; - mnt_ctx->xid = xid; - return rc; } @@ -3275,146 +3368,6 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses, return 0; } -#ifdef CONFIG_CIFS_DFS_UPCALL -/* Get unique dfs connections */ -static int mount_get_dfs_conns(struct mount_ctx *mnt_ctx) -{ - int rc; - - mnt_ctx->fs_ctx->nosharesock = true; - rc = mount_get_conns(mnt_ctx); - if (mnt_ctx->server) { - cifs_dbg(FYI, "%s: marking tcp session as a dfs connection\n", __func__); - spin_lock(&mnt_ctx->server->srv_lock); - mnt_ctx->server->is_dfs_conn = true; - spin_unlock(&mnt_ctx->server->srv_lock); - } - return rc; -} - -/* - * cifs_build_path_to_root returns full path to root when we do not have an - * existing connection (tcon) - */ -static char * -build_unc_path_to_root(const struct smb3_fs_context *ctx, - const struct cifs_sb_info *cifs_sb, bool useppath) -{ - char *full_path, *pos; - unsigned int pplen = useppath && ctx->prepath ? - strlen(ctx->prepath) + 1 : 0; - unsigned int unc_len = strnlen(ctx->UNC, MAX_TREE_SIZE + 1); - - if (unc_len > MAX_TREE_SIZE) - return ERR_PTR(-EINVAL); - - full_path = kmalloc(unc_len + pplen + 1, GFP_KERNEL); - if (full_path == NULL) - return ERR_PTR(-ENOMEM); - - memcpy(full_path, ctx->UNC, unc_len); - pos = full_path + unc_len; - - if (pplen) { - *pos = CIFS_DIR_SEP(cifs_sb); - memcpy(pos + 1, ctx->prepath, pplen); - pos += pplen; - } - - *pos = '\0'; /* add trailing null */ - convert_delimiter(full_path, CIFS_DIR_SEP(cifs_sb)); - cifs_dbg(FYI, "%s: full_path=%s\n", __func__, full_path); - return full_path; -} - -/* - * expand_dfs_referral - Update cifs_sb from dfs referral path - * - * cifs_sb->ctx->mount_options will be (re-)allocated to a string containing updated options for the - * submount. Otherwise it will be left untouched. - */ -static int expand_dfs_referral(struct mount_ctx *mnt_ctx, const char *full_path, - struct dfs_info3_param *referral) -{ - int rc; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - char *fake_devname = NULL, *mdata = NULL; - - mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, referral, - &fake_devname); - if (IS_ERR(mdata)) { - rc = PTR_ERR(mdata); - mdata = NULL; - } else { - /* - * We can not clear out the whole structure since we no longer have an explicit - * function to parse a mount-string. Instead we need to clear out the individual - * fields that are no longer valid. - */ - kfree(ctx->prepath); - ctx->prepath = NULL; - rc = cifs_setup_volume_info(ctx, mdata, fake_devname); - } - kfree(fake_devname); - kfree(cifs_sb->ctx->mount_options); - cifs_sb->ctx->mount_options = mdata; - - return rc; -} -#endif - -/* TODO: all callers to this are broken. We are not parsing mount_options here - * we should pass a clone of the original context? - */ -int -cifs_setup_volume_info(struct smb3_fs_context *ctx, const char *mntopts, const char *devname) -{ - int rc; - - if (devname) { - cifs_dbg(FYI, "%s: devname=%s\n", __func__, devname); - rc = smb3_parse_devname(devname, ctx); - if (rc) { - cifs_dbg(VFS, "%s: failed to parse %s: %d\n", __func__, devname, rc); - return rc; - } - } - - if (mntopts) { - char *ip; - - rc = smb3_parse_opt(mntopts, "ip", &ip); - if (rc) { - cifs_dbg(VFS, "%s: failed to parse ip options: %d\n", __func__, rc); - return rc; - } - - rc = cifs_convert_address((struct sockaddr *)&ctx->dstaddr, ip, strlen(ip)); - kfree(ip); - if (!rc) { - cifs_dbg(VFS, "%s: failed to convert ip address\n", __func__); - return -EINVAL; - } - } - - if (ctx->nullauth) { - cifs_dbg(FYI, "Anonymous login\n"); - kfree(ctx->username); - ctx->username = NULL; - } else if (ctx->username) { - /* BB fixme parse for domain name here */ - cifs_dbg(FYI, "Username: %s\n", ctx->username); - } else { - cifs_dbg(VFS, "No username specified\n"); - /* In userspace mount helper we can get user name from alternate - locations such as env variables and files on disk */ - return -EINVAL; - } - - return 0; -} - static int cifs_are_all_path_components_accessible(struct TCP_Server_Info *server, unsigned int xid, @@ -3467,7 +3420,7 @@ cifs_are_all_path_components_accessible(struct TCP_Server_Info *server, * * Return -EREMOTE if it is, otherwise 0 or -errno. */ -static int is_path_remote(struct mount_ctx *mnt_ctx) +int cifs_is_path_remote(struct cifs_mount_ctx *mnt_ctx) { int rc; struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; @@ -3512,253 +3465,17 @@ static int is_path_remote(struct mount_ctx *mnt_ctx) } #ifdef CONFIG_CIFS_DFS_UPCALL -static void set_root_ses(struct mount_ctx *mnt_ctx) -{ - if (mnt_ctx->ses) { - spin_lock(&cifs_tcp_ses_lock); - mnt_ctx->ses->ses_count++; - spin_unlock(&cifs_tcp_ses_lock); - dfs_cache_add_refsrv_session(&mnt_ctx->mount_id, mnt_ctx->ses); - } - mnt_ctx->root_ses = mnt_ctx->ses; -} - -static int is_dfs_mount(struct mount_ctx *mnt_ctx, bool *isdfs, struct dfs_cache_tgt_list *root_tl) -{ - int rc; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - - *isdfs = true; - - rc = mount_get_conns(mnt_ctx); - /* - * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally - * try to get an DFS referral (even cached) to determine whether it is an DFS mount. - * - * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem - * to respond with PATH_NOT_COVERED to requests that include the prefix. - */ - if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) || - dfs_cache_find(mnt_ctx->xid, mnt_ctx->ses, cifs_sb->local_nls, cifs_remap(cifs_sb), - ctx->UNC + 1, NULL, root_tl)) { - if (rc) - return rc; - /* Check if it is fully accessible and then mount it */ - rc = is_path_remote(mnt_ctx); - if (!rc) - *isdfs = false; - else if (rc != -EREMOTE) - return rc; - } - return 0; -} - -static int connect_dfs_target(struct mount_ctx *mnt_ctx, const char *full_path, - const char *ref_path, struct dfs_cache_tgt_iterator *tit) -{ - int rc; - struct dfs_info3_param ref = {}; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - char *oldmnt = cifs_sb->ctx->mount_options; - - cifs_dbg(FYI, "%s: full_path=%s ref_path=%s target=%s\n", __func__, full_path, ref_path, - dfs_cache_get_tgt_name(tit)); - - rc = dfs_cache_get_tgt_referral(ref_path, tit, &ref); - if (rc) - goto out; - - rc = expand_dfs_referral(mnt_ctx, full_path, &ref); - if (rc) - goto out; - - /* Connect to new target only if we were redirected (e.g. mount options changed) */ - if (oldmnt != cifs_sb->ctx->mount_options) { - mount_put_conns(mnt_ctx); - rc = mount_get_dfs_conns(mnt_ctx); - } - if (!rc) { - if (cifs_is_referral_server(mnt_ctx->tcon, &ref)) - set_root_ses(mnt_ctx); - rc = dfs_cache_update_tgthint(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls, - cifs_remap(cifs_sb), ref_path, tit); - } - -out: - free_dfs_info_param(&ref); - return rc; -} - -static int connect_dfs_root(struct mount_ctx *mnt_ctx, struct dfs_cache_tgt_list *root_tl) -{ - int rc; - char *full_path; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - struct dfs_cache_tgt_iterator *tit; - - /* Put initial connections as they might be shared with other mounts. We need unique dfs - * connections per mount to properly failover, so mount_get_dfs_conns() must be used from - * now on. - */ - mount_put_conns(mnt_ctx); - mount_get_dfs_conns(mnt_ctx); - set_root_ses(mnt_ctx); - - full_path = build_unc_path_to_root(ctx, cifs_sb, true); - if (IS_ERR(full_path)) - return PTR_ERR(full_path); - - mnt_ctx->origin_fullpath = dfs_cache_canonical_path(ctx->UNC, cifs_sb->local_nls, - cifs_remap(cifs_sb)); - if (IS_ERR(mnt_ctx->origin_fullpath)) { - rc = PTR_ERR(mnt_ctx->origin_fullpath); - mnt_ctx->origin_fullpath = NULL; - goto out; - } - - /* Try all dfs root targets */ - for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(root_tl); - tit; tit = dfs_cache_get_next_tgt(root_tl, tit)) { - rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->origin_fullpath + 1, tit); - if (!rc) { - mnt_ctx->leaf_fullpath = kstrdup(mnt_ctx->origin_fullpath, GFP_KERNEL); - if (!mnt_ctx->leaf_fullpath) - rc = -ENOMEM; - break; - } - } - -out: - kfree(full_path); - return rc; -} - -static int __follow_dfs_link(struct mount_ctx *mnt_ctx) -{ - int rc; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - char *full_path; - struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); - struct dfs_cache_tgt_iterator *tit; - - full_path = build_unc_path_to_root(ctx, cifs_sb, true); - if (IS_ERR(full_path)) - return PTR_ERR(full_path); - - kfree(mnt_ctx->leaf_fullpath); - mnt_ctx->leaf_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls, - cifs_remap(cifs_sb)); - if (IS_ERR(mnt_ctx->leaf_fullpath)) { - rc = PTR_ERR(mnt_ctx->leaf_fullpath); - mnt_ctx->leaf_fullpath = NULL; - goto out; - } - - /* Get referral from dfs link */ - rc = dfs_cache_find(mnt_ctx->xid, mnt_ctx->root_ses, cifs_sb->local_nls, - cifs_remap(cifs_sb), mnt_ctx->leaf_fullpath + 1, NULL, &tl); - if (rc) - goto out; - - /* Try all dfs link targets. If an I/O fails from currently connected DFS target with an - * error other than STATUS_PATH_NOT_COVERED (-EREMOTE), then retry it from other targets as - * specified in MS-DFSC "3.1.5.2 I/O Operation to Target Fails with an Error Other Than - * STATUS_PATH_NOT_COVERED." - */ - for (rc = -ENOENT, tit = dfs_cache_get_tgt_iterator(&tl); - tit; tit = dfs_cache_get_next_tgt(&tl, tit)) { - rc = connect_dfs_target(mnt_ctx, full_path, mnt_ctx->leaf_fullpath + 1, tit); - if (!rc) { - rc = is_path_remote(mnt_ctx); - if (!rc || rc == -EREMOTE) - break; - } - } - -out: - kfree(full_path); - dfs_cache_free_tgts(&tl); - return rc; -} - -static int follow_dfs_link(struct mount_ctx *mnt_ctx) -{ - int rc; - struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; - struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; - char *full_path; - int num_links = 0; - - full_path = build_unc_path_to_root(ctx, cifs_sb, true); - if (IS_ERR(full_path)) - return PTR_ERR(full_path); - - kfree(mnt_ctx->origin_fullpath); - mnt_ctx->origin_fullpath = dfs_cache_canonical_path(full_path, cifs_sb->local_nls, - cifs_remap(cifs_sb)); - kfree(full_path); - - if (IS_ERR(mnt_ctx->origin_fullpath)) { - rc = PTR_ERR(mnt_ctx->origin_fullpath); - mnt_ctx->origin_fullpath = NULL; - return rc; - } - - do { - rc = __follow_dfs_link(mnt_ctx); - if (!rc || rc != -EREMOTE) - break; - } while (rc = -ELOOP, ++num_links < MAX_NESTED_LINKS); - - return rc; -} - -/* Set up DFS referral paths for failover */ -static void setup_server_referral_paths(struct mount_ctx *mnt_ctx) -{ - struct TCP_Server_Info *server = mnt_ctx->server; - - mutex_lock(&server->refpath_lock); - server->origin_fullpath = mnt_ctx->origin_fullpath; - server->leaf_fullpath = mnt_ctx->leaf_fullpath; - server->current_fullpath = mnt_ctx->leaf_fullpath; - mutex_unlock(&server->refpath_lock); - mnt_ctx->origin_fullpath = mnt_ctx->leaf_fullpath = NULL; -} - int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) { + struct cifs_mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, }; int rc; - struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, }; - struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); - bool isdfs; - rc = is_dfs_mount(&mnt_ctx, &isdfs, &tl); + rc = dfs_mount_share(&mnt_ctx); if (rc) goto error; - if (!isdfs) + if (!ctx->dfs_conn) goto out; - /* proceed as DFS mount */ - uuid_gen(&mnt_ctx.mount_id); - rc = connect_dfs_root(&mnt_ctx, &tl); - dfs_cache_free_tgts(&tl); - - if (rc) - goto error; - - rc = is_path_remote(&mnt_ctx); - if (rc) - rc = follow_dfs_link(&mnt_ctx); - if (rc || !mnt_ctx.tcon || !mnt_ctx.ses) { - rc = rc ? rc : -ENOENT; - goto error; - } - - setup_server_referral_paths(&mnt_ctx); /* * After reconnecting to a different server, unique ids won't match anymore, so we disable * serverino. This prevents dentry revalidation to think the dentry are stale (ESTALE). @@ -3772,7 +3489,6 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) kfree(cifs_sb->prepath); cifs_sb->prepath = ctx->prepath; ctx->prepath = NULL; - uuid_copy(&cifs_sb->dfs_mount_id, &mnt_ctx.mount_id); out: cifs_try_adding_channels(cifs_sb, mnt_ctx.ses); @@ -3784,29 +3500,40 @@ out: return rc; error: - dfs_cache_put_refsrv_sessions(&mnt_ctx.mount_id); - kfree(mnt_ctx.origin_fullpath); - kfree(mnt_ctx.leaf_fullpath); - mount_put_conns(&mnt_ctx); + cifs_mount_put_conns(&mnt_ctx); return rc; } #else int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) { int rc = 0; - struct mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, }; + struct cifs_mount_ctx mnt_ctx = { .cifs_sb = cifs_sb, .fs_ctx = ctx, }; - rc = mount_get_conns(&mnt_ctx); + rc = cifs_mount_get_session(&mnt_ctx); if (rc) goto error; - if (mnt_ctx.tcon) { - rc = is_path_remote(&mnt_ctx); - if (rc == -EREMOTE) - rc = -EOPNOTSUPP; - if (rc) - goto error; + rc = cifs_mount_get_tcon(&mnt_ctx); + if (!rc) { + /* + * Prevent superblock from being created with any missing + * connections. + */ + if (WARN_ON(!mnt_ctx.server)) + rc = -EHOSTDOWN; + else if (WARN_ON(!mnt_ctx.ses)) + rc = -EACCES; + else if (WARN_ON(!mnt_ctx.tcon)) + rc = -ENOENT; } + if (rc) + goto error; + + rc = cifs_is_path_remote(&mnt_ctx); + if (rc == -EREMOTE) + rc = -EOPNOTSUPP; + if (rc) + goto error; rc = mount_setup_tlink(cifs_sb, mnt_ctx.ses, mnt_ctx.tcon); if (rc) @@ -3816,7 +3543,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx) return rc; error: - mount_put_conns(&mnt_ctx); + cifs_mount_put_conns(&mnt_ctx); return rc; } #endif @@ -4013,9 +3740,6 @@ cifs_umount(struct cifs_sb_info *cifs_sb) spin_unlock(&cifs_sb->tlink_tree_lock); kfree(cifs_sb->prepath); -#ifdef CONFIG_CIFS_DFS_UPCALL - dfs_cache_put_refsrv_sessions(&cifs_sb->dfs_mount_id); -#endif call_rcu(&cifs_sb->rcu, delayed_free); } @@ -4179,6 +3903,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) struct cifs_ses *ses; struct cifs_tcon *tcon = NULL; struct smb3_fs_context *ctx; + char *origin_fullpath = NULL; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); if (ctx == NULL) @@ -4202,6 +3927,7 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) ctx->sign = master_tcon->ses->sign; ctx->seal = master_tcon->seal; ctx->witness = master_tcon->use_witness; + ctx->dfs_root_ses = master_tcon->ses->dfs_root_ses; rc = cifs_set_vol_auth(ctx, master_tcon->ses); if (rc) { @@ -4221,18 +3947,46 @@ cifs_construct_tcon(struct cifs_sb_info *cifs_sb, kuid_t fsuid) goto out; } +#ifdef CONFIG_CIFS_DFS_UPCALL + spin_lock(&master_tcon->tc_lock); + if (master_tcon->origin_fullpath) { + spin_unlock(&master_tcon->tc_lock); + origin_fullpath = dfs_get_path(cifs_sb, cifs_sb->ctx->source); + if (IS_ERR(origin_fullpath)) { + tcon = ERR_CAST(origin_fullpath); + origin_fullpath = NULL; + cifs_put_smb_ses(ses); + goto out; + } + } else { + spin_unlock(&master_tcon->tc_lock); + } +#endif + tcon = cifs_get_tcon(ses, ctx); if (IS_ERR(tcon)) { cifs_put_smb_ses(ses); goto out; } +#ifdef CONFIG_CIFS_DFS_UPCALL + if (origin_fullpath) { + spin_lock(&tcon->tc_lock); + tcon->origin_fullpath = origin_fullpath; + spin_unlock(&tcon->tc_lock); + origin_fullpath = NULL; + queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, + dfs_cache_get_ttl() * HZ); + } +#endif + if (cap_unix(ses)) reset_cifs_unix_caps(0, tcon, NULL, ctx); out: kfree(ctx->username); kfree_sensitive(ctx->password); + kfree(origin_fullpath); kfree(ctx); return tcon; @@ -4304,9 +4058,9 @@ tlink_rb_insert(struct rb_root *root, struct tcon_link *new_tlink) struct tcon_link * cifs_sb_tlink(struct cifs_sb_info *cifs_sb) { - int ret; - kuid_t fsuid = current_fsuid(); struct tcon_link *tlink, *newtlink; + kuid_t fsuid = current_fsuid(); + int err; if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MULTIUSER)) return cifs_get_tlink(cifs_sb_master_tlink(cifs_sb)); @@ -4341,9 +4095,9 @@ cifs_sb_tlink(struct cifs_sb_info *cifs_sb) spin_unlock(&cifs_sb->tlink_tree_lock); } else { wait_for_construction: - ret = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING, + err = wait_on_bit(&tlink->tl_flags, TCON_LINK_PENDING, TASK_INTERRUPTIBLE); - if (ret) { + if (err) { cifs_put_tlink(tlink); return ERR_PTR(-ERESTARTSYS); } @@ -4354,8 +4108,9 @@ wait_for_construction: /* return error if we tried this already recently */ if (time_before(jiffies, tlink->tl_time + TLINK_ERROR_EXPIRE)) { + err = PTR_ERR(tlink->tl_tcon); cifs_put_tlink(tlink); - return ERR_PTR(-EACCES); + return ERR_PTR(err); } if (test_and_set_bit(TCON_LINK_PENDING, &tlink->tl_flags)) @@ -4367,8 +4122,11 @@ wait_for_construction: wake_up_bit(&tlink->tl_flags, TCON_LINK_PENDING); if (IS_ERR(tlink->tl_tcon)) { + err = PTR_ERR(tlink->tl_tcon); + if (err == -ENOKEY) + err = -EACCES; cifs_put_tlink(tlink); - return ERR_PTR(-EACCES); + return ERR_PTR(err); } return tlink; @@ -4421,185 +4179,35 @@ cifs_prune_tlinks(struct work_struct *work) TLINK_IDLE_EXPIRE); } -#ifdef CONFIG_CIFS_DFS_UPCALL -static int target_share_matches_server(struct TCP_Server_Info *server, const char *tcp_host, - size_t tcp_host_len, char *share, bool *target_match) +#ifndef CONFIG_CIFS_DFS_UPCALL +int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon) { - int rc = 0; - const char *dfs_host; - size_t dfs_host_len; - - *target_match = true; - extract_unc_hostname(share, &dfs_host, &dfs_host_len); - - /* Check if hostnames or addresses match */ - cifs_server_lock(server); - if (dfs_host_len != tcp_host_len || strncasecmp(dfs_host, tcp_host, dfs_host_len) != 0) { - cifs_dbg(FYI, "%s: %.*s doesn't match %.*s\n", __func__, (int)dfs_host_len, - dfs_host, (int)tcp_host_len, tcp_host); - rc = match_target_ip(server, dfs_host, dfs_host_len, target_match); - if (rc) - cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc); - } - cifs_server_unlock(server); - return rc; -} - -static int tree_connect_dfs_target(const unsigned int xid, struct cifs_tcon *tcon, - struct cifs_sb_info *cifs_sb, char *tree, bool islink, - struct dfs_cache_tgt_list *tl) -{ - int rc; - struct TCP_Server_Info *server = tcon->ses->server; - const struct smb_version_operations *ops = server->ops; - struct cifs_tcon *ipc = tcon->ses->tcon_ipc; - char *share = NULL, *prefix = NULL; - const char *tcp_host; - size_t tcp_host_len; - struct dfs_cache_tgt_iterator *tit; - bool target_match; - - cifs_server_lock(server); - extract_unc_hostname(server->hostname, &tcp_host, &tcp_host_len); - cifs_server_unlock(server); - - tit = dfs_cache_get_tgt_iterator(tl); - if (!tit) { - rc = -ENOENT; - goto out; - } - - /* Try to tree connect to all dfs targets */ - for (; tit; tit = dfs_cache_get_next_tgt(tl, tit)) { - kfree(share); - kfree(prefix); - share = prefix = NULL; - - /* Check if share matches with tcp ses */ - rc = dfs_cache_get_tgt_share(server->current_fullpath + 1, tit, &share, &prefix); - if (rc) { - cifs_dbg(VFS, "%s: failed to parse target share: %d\n", __func__, rc); - break; - } - - rc = target_share_matches_server(server, tcp_host, tcp_host_len, share, - &target_match); - if (rc) - break; - if (!target_match) { - rc = -EHOSTUNREACH; - continue; - } - - dfs_cache_noreq_update_tgthint(server->current_fullpath + 1, tit); - if (ipc->need_reconnect) { - scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); - ops->tree_connect(xid, ipc->ses, tree, ipc, cifs_sb->local_nls); - } - - scnprintf(tree, MAX_TREE_SIZE, "\\%s", share); - rc = ops->tree_connect(xid, tcon->ses, tree, tcon, cifs_sb->local_nls); - if (islink && !rc) - rc = cifs_update_super_prepath(cifs_sb, prefix); - break; - } - -out: - kfree(share); - kfree(prefix); - dfs_cache_free_tgts(tl); - return rc; -} - -int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc) -{ - int rc; - struct TCP_Server_Info *server = tcon->ses->server; - const struct smb_version_operations *ops = server->ops; - struct super_block *sb = NULL; - struct cifs_sb_info *cifs_sb = NULL; - struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); - char *tree; - struct dfs_info3_param ref = {0}; - - /* only send once per connect */ - spin_lock(&tcon->tc_lock); - if (tcon->ses->ses_status != SES_GOOD || - (tcon->status != TID_NEW && - tcon->status != TID_NEED_TCON)) { - spin_unlock(&tcon->tc_lock); - return 0; - } - tcon->status = TID_IN_TCON; - spin_unlock(&tcon->tc_lock); - - tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL); - if (!tree) { - rc = -ENOMEM; - goto out; - } - - if (tcon->ipc) { - cifs_server_lock(server); - scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); - cifs_server_unlock(server); - rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc); - goto out; - } - - sb = cifs_get_tcp_super(server); - if (!IS_ERR(sb)) - cifs_sb = CIFS_SB(sb); - - /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */ - if (!cifs_sb || !server->current_fullpath || - dfs_cache_noreq_find(server->current_fullpath + 1, &ref, &tl)) { - rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, - cifs_sb ? cifs_sb->local_nls : nlsc); - goto out; - } - - rc = tree_connect_dfs_target(xid, tcon, cifs_sb, tree, ref.server_type == DFS_TYPE_LINK, - &tl); - free_dfs_info_param(&ref); - -out: - kfree(tree); - cifs_put_tcp_super(sb); - - if (rc) { - spin_lock(&tcon->tc_lock); - if (tcon->status == TID_IN_TCON) - tcon->status = TID_NEED_TCON; - spin_unlock(&tcon->tc_lock); - } else { - spin_lock(&tcon->tc_lock); - if (tcon->status == TID_IN_TCON) - tcon->status = TID_GOOD; - spin_unlock(&tcon->tc_lock); - tcon->need_reconnect = false; - } - - return rc; -} -#else -int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const struct nls_table *nlsc) -{ - int rc; const struct smb_version_operations *ops = tcon->ses->server->ops; + int rc; /* only send once per connect */ spin_lock(&tcon->tc_lock); - if (tcon->ses->ses_status != SES_GOOD || - (tcon->status != TID_NEW && - tcon->status != TID_NEED_TCON)) { + + /* if tcon is marked for needing reconnect, update state */ + if (tcon->need_reconnect) + tcon->status = TID_NEED_TCON; + + if (tcon->status == TID_GOOD) { spin_unlock(&tcon->tc_lock); return 0; } + + if (tcon->status != TID_NEW && + tcon->status != TID_NEED_TCON) { + spin_unlock(&tcon->tc_lock); + return -EHOSTDOWN; + } + tcon->status = TID_IN_TCON; spin_unlock(&tcon->tc_lock); - rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, tcon, nlsc); + rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, + tcon, tcon->ses->local_nls); if (rc) { spin_lock(&tcon->tc_lock); if (tcon->status == TID_IN_TCON) diff --git a/fs/cifs/dfs.c b/fs/cifs/dfs.c new file mode 100644 index 0000000000..974f69463f --- /dev/null +++ b/fs/cifs/dfs.c @@ -0,0 +1,467 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2022 Paulo Alcantara + */ + +#include "cifsproto.h" +#include "cifs_debug.h" +#include "dns_resolve.h" +#include "fs_context.h" +#include "dfs.h" + +#define DFS_DOM(ctx) (ctx->dfs_root_ses ? ctx->dfs_root_ses->dns_dom : NULL) + +/** + * dfs_parse_target_referral - set fs context for dfs target referral + * + * @full_path: full path in UNC format. + * @ref: dfs referral pointer. + * @ctx: smb3 fs context pointer. + * + * Return zero if dfs referral was parsed correctly, otherwise non-zero. + */ +int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref, + struct smb3_fs_context *ctx) +{ + int rc; + const char *prepath = NULL; + char *path; + + if (!full_path || !*full_path || !ref || !ctx) + return -EINVAL; + + if (WARN_ON_ONCE(!ref->node_name || ref->path_consumed < 0)) + return -EINVAL; + + if (strlen(full_path) - ref->path_consumed) { + prepath = full_path + ref->path_consumed; + /* skip initial delimiter */ + if (*prepath == '/' || *prepath == '\\') + prepath++; + } + + path = cifs_build_devname(ref->node_name, prepath); + if (IS_ERR(path)) + return PTR_ERR(path); + + rc = smb3_parse_devname(path, ctx); + if (rc) + goto out; + + rc = dns_resolve_unc(DFS_DOM(ctx), path, + (struct sockaddr *)&ctx->dstaddr); +out: + kfree(path); + return rc; +} + +static int get_session(struct cifs_mount_ctx *mnt_ctx, const char *full_path) +{ + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + int rc; + + ctx->leaf_fullpath = (char *)full_path; + ctx->dns_dom = DFS_DOM(ctx); + rc = cifs_mount_get_session(mnt_ctx); + ctx->leaf_fullpath = ctx->dns_dom = NULL; + + return rc; +} + +/* + * Get an active reference of @ses so that next call to cifs_put_tcon() won't + * release it as any new DFS referrals must go through its IPC tcon. + */ +static void set_root_smb_session(struct cifs_mount_ctx *mnt_ctx) +{ + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + struct cifs_ses *ses = mnt_ctx->ses; + + if (ses) { + spin_lock(&cifs_tcp_ses_lock); + cifs_smb_ses_inc_refcount(ses); + spin_unlock(&cifs_tcp_ses_lock); + } + ctx->dfs_root_ses = ses; +} + +static inline int parse_dfs_target(struct smb3_fs_context *ctx, + struct dfs_ref_walk *rw, + struct dfs_info3_param *tgt) +{ + int rc; + const char *fpath = ref_walk_fpath(rw) + 1; + + rc = ref_walk_get_tgt(rw, tgt); + if (!rc) + rc = dfs_parse_target_referral(fpath, tgt, ctx); + return rc; +} + +static int setup_dfs_ref(struct dfs_info3_param *tgt, struct dfs_ref_walk *rw) +{ + struct cifs_sb_info *cifs_sb = rw->mnt_ctx->cifs_sb; + struct smb3_fs_context *ctx = rw->mnt_ctx->fs_ctx; + char *ref_path, *full_path; + int rc; + + set_root_smb_session(rw->mnt_ctx); + ref_walk_ses(rw) = ctx->dfs_root_ses; + + full_path = smb3_fs_context_fullpath(ctx, CIFS_DIR_SEP(cifs_sb)); + if (IS_ERR(full_path)) + return PTR_ERR(full_path); + + if (!tgt || (tgt->server_type == DFS_TYPE_LINK && + DFS_INTERLINK(tgt->flags))) + ref_path = dfs_get_path(cifs_sb, ctx->UNC); + else + ref_path = dfs_get_path(cifs_sb, full_path); + if (IS_ERR(ref_path)) { + rc = PTR_ERR(ref_path); + kfree(full_path); + return rc; + } + ref_walk_path(rw) = ref_path; + ref_walk_fpath(rw) = full_path; + + return dfs_get_referral(rw->mnt_ctx, + ref_walk_path(rw) + 1, + ref_walk_tl(rw)); +} + +static int __dfs_referral_walk(struct dfs_ref_walk *rw) +{ + struct smb3_fs_context *ctx = rw->mnt_ctx->fs_ctx; + struct cifs_mount_ctx *mnt_ctx = rw->mnt_ctx; + struct dfs_info3_param tgt = {}; + int rc = -ENOENT; + +again: + do { + ctx->dfs_root_ses = ref_walk_ses(rw); + while (ref_walk_next_tgt(rw)) { + rc = parse_dfs_target(ctx, rw, &tgt); + if (rc) + continue; + + cifs_mount_put_conns(mnt_ctx); + rc = get_session(mnt_ctx, ref_walk_path(rw)); + if (rc) + continue; + + rc = cifs_mount_get_tcon(mnt_ctx); + if (rc) { + if (tgt.server_type == DFS_TYPE_LINK && + DFS_INTERLINK(tgt.flags)) + rc = -EREMOTE; + } else { + rc = cifs_is_path_remote(mnt_ctx); + if (!rc) { + ref_walk_set_tgt_hint(rw); + break; + } + } + if (rc == -EREMOTE) { + rc = ref_walk_advance(rw); + if (!rc) { + rc = setup_dfs_ref(&tgt, rw); + if (rc) + break; + ref_walk_mark_end(rw); + goto again; + } + } + } + } while (rc && ref_walk_descend(rw)); + + free_dfs_info_param(&tgt); + return rc; +} + +static int dfs_referral_walk(struct cifs_mount_ctx *mnt_ctx, + struct dfs_ref_walk **rw) +{ + int rc; + + *rw = ref_walk_alloc(); + if (IS_ERR(*rw)) { + rc = PTR_ERR(*rw); + *rw = NULL; + return rc; + } + + ref_walk_init(*rw, mnt_ctx); + rc = setup_dfs_ref(NULL, *rw); + if (!rc) + rc = __dfs_referral_walk(*rw); + return rc; +} + +static int __dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) +{ + struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + struct dfs_ref_walk *rw = NULL; + struct cifs_tcon *tcon; + char *origin_fullpath; + int rc; + + origin_fullpath = dfs_get_path(cifs_sb, ctx->source); + if (IS_ERR(origin_fullpath)) + return PTR_ERR(origin_fullpath); + + rc = dfs_referral_walk(mnt_ctx, &rw); + if (!rc) { + /* + * Prevent superblock from being created with any missing + * connections. + */ + if (WARN_ON(!mnt_ctx->server)) + rc = -EHOSTDOWN; + else if (WARN_ON(!mnt_ctx->ses)) + rc = -EACCES; + else if (WARN_ON(!mnt_ctx->tcon)) + rc = -ENOENT; + } + if (rc) + goto out; + + tcon = mnt_ctx->tcon; + spin_lock(&tcon->tc_lock); + tcon->origin_fullpath = origin_fullpath; + origin_fullpath = NULL; + ref_walk_set_tcon(rw, tcon); + spin_unlock(&tcon->tc_lock); + queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, + dfs_cache_get_ttl() * HZ); + +out: + kfree(origin_fullpath); + ref_walk_free(rw); + return rc; +} + +/* + * If @ctx->dfs_automount, then update @ctx->dstaddr earlier with the DFS root + * server from where we'll start following any referrals. Otherwise rely on the + * value provided by mount(2) as the user might not have dns_resolver key set up + * and therefore failing to upcall to resolve UNC hostname under @ctx->source. + */ +static int update_fs_context_dstaddr(struct smb3_fs_context *ctx) +{ + struct sockaddr *addr = (struct sockaddr *)&ctx->dstaddr; + int rc = 0; + + if (!ctx->nodfs && ctx->dfs_automount) { + rc = dns_resolve_unc(NULL, ctx->source, addr); + if (!rc) + cifs_set_port(addr, ctx->port); + ctx->dfs_automount = false; + } + return rc; +} + +int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx) +{ + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + bool nodfs = ctx->nodfs; + int rc; + + rc = update_fs_context_dstaddr(ctx); + if (rc) + return rc; + + rc = get_session(mnt_ctx, NULL); + if (rc) + return rc; + + /* + * If called with 'nodfs' mount option, then skip DFS resolving. Otherwise unconditionally + * try to get an DFS referral (even cached) to determine whether it is an DFS mount. + * + * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem + * to respond with PATH_NOT_COVERED to requests that include the prefix. + */ + if (!nodfs) { + rc = dfs_get_referral(mnt_ctx, ctx->UNC + 1, NULL); + if (rc) { + cifs_dbg(FYI, "%s: no dfs referral for %s: %d\n", + __func__, ctx->UNC + 1, rc); + cifs_dbg(FYI, "%s: assuming non-dfs mount...\n", __func__); + nodfs = true; + } + } + if (nodfs) { + rc = cifs_mount_get_tcon(mnt_ctx); + if (!rc) + rc = cifs_is_path_remote(mnt_ctx); + return rc; + } + + if (!ctx->dfs_conn) { + ctx->dfs_conn = true; + cifs_mount_put_conns(mnt_ctx); + rc = get_session(mnt_ctx, NULL); + } + if (!rc) + rc = __dfs_mount_share(mnt_ctx); + return rc; +} + +static int target_share_matches_server(struct TCP_Server_Info *server, char *share, + bool *target_match) +{ + int rc = 0; + const char *dfs_host; + size_t dfs_host_len; + + *target_match = true; + extract_unc_hostname(share, &dfs_host, &dfs_host_len); + + /* Check if hostnames or addresses match */ + cifs_server_lock(server); + if (dfs_host_len != strlen(server->hostname) || + strncasecmp(dfs_host, server->hostname, dfs_host_len)) { + cifs_dbg(FYI, "%s: %.*s doesn't match %s\n", __func__, + (int)dfs_host_len, dfs_host, server->hostname); + rc = match_target_ip(server, dfs_host, dfs_host_len, target_match); + if (rc) + cifs_dbg(VFS, "%s: failed to match target ip: %d\n", __func__, rc); + } + cifs_server_unlock(server); + return rc; +} + +static int tree_connect_dfs_target(const unsigned int xid, + struct cifs_tcon *tcon, + struct cifs_sb_info *cifs_sb, + char *tree, bool islink, + struct dfs_cache_tgt_list *tl) +{ + const struct smb_version_operations *ops = tcon->ses->server->ops; + struct TCP_Server_Info *server = tcon->ses->server; + struct dfs_cache_tgt_iterator *tit; + char *share = NULL, *prefix = NULL; + bool target_match; + int rc = -ENOENT; + + /* Try to tree connect to all dfs targets */ + for (tit = dfs_cache_get_tgt_iterator(tl); + tit; tit = dfs_cache_get_next_tgt(tl, tit)) { + kfree(share); + kfree(prefix); + share = prefix = NULL; + + /* Check if share matches with tcp ses */ + rc = dfs_cache_get_tgt_share(server->leaf_fullpath + 1, tit, &share, &prefix); + if (rc) { + cifs_dbg(VFS, "%s: failed to parse target share: %d\n", __func__, rc); + break; + } + + rc = target_share_matches_server(server, share, &target_match); + if (rc) + break; + if (!target_match) { + rc = -EHOSTUNREACH; + continue; + } + + dfs_cache_noreq_update_tgthint(server->leaf_fullpath + 1, tit); + scnprintf(tree, MAX_TREE_SIZE, "\\%s", share); + rc = ops->tree_connect(xid, tcon->ses, tree, + tcon, tcon->ses->local_nls); + if (islink && !rc && cifs_sb) + rc = cifs_update_super_prepath(cifs_sb, prefix); + break; + } + + kfree(share); + kfree(prefix); + dfs_cache_free_tgts(tl); + return rc; +} + +int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon) +{ + int rc; + struct TCP_Server_Info *server = tcon->ses->server; + const struct smb_version_operations *ops = server->ops; + DFS_CACHE_TGT_LIST(tl); + struct cifs_sb_info *cifs_sb = NULL; + struct super_block *sb = NULL; + struct dfs_info3_param ref = {0}; + char *tree; + + /* only send once per connect */ + spin_lock(&tcon->tc_lock); + + /* if tcon is marked for needing reconnect, update state */ + if (tcon->need_reconnect) + tcon->status = TID_NEED_TCON; + + if (tcon->status == TID_GOOD) { + spin_unlock(&tcon->tc_lock); + return 0; + } + + if (tcon->status != TID_NEW && + tcon->status != TID_NEED_TCON) { + spin_unlock(&tcon->tc_lock); + return -EHOSTDOWN; + } + + tcon->status = TID_IN_TCON; + spin_unlock(&tcon->tc_lock); + + tree = kzalloc(MAX_TREE_SIZE, GFP_KERNEL); + if (!tree) { + rc = -ENOMEM; + goto out; + } + + if (tcon->ipc) { + cifs_server_lock(server); + scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname); + cifs_server_unlock(server); + rc = ops->tree_connect(xid, tcon->ses, tree, + tcon, tcon->ses->local_nls); + goto out; + } + + sb = cifs_get_dfs_tcon_super(tcon); + if (!IS_ERR(sb)) + cifs_sb = CIFS_SB(sb); + + /* Tree connect to last share in @tcon->tree_name if no DFS referral */ + if (!server->leaf_fullpath || + dfs_cache_noreq_find(server->leaf_fullpath + 1, &ref, &tl)) { + rc = ops->tree_connect(xid, tcon->ses, tcon->treeName, + tcon, tcon->ses->local_nls); + goto out; + } + + rc = tree_connect_dfs_target(xid, tcon, cifs_sb, tree, ref.server_type == DFS_TYPE_LINK, + &tl); + free_dfs_info_param(&ref); + +out: + kfree(tree); + cifs_put_tcp_super(sb); + + if (rc) { + spin_lock(&tcon->tc_lock); + if (tcon->status == TID_IN_TCON) + tcon->status = TID_NEED_TCON; + spin_unlock(&tcon->tc_lock); + } else { + spin_lock(&tcon->tc_lock); + if (tcon->status == TID_IN_TCON) + tcon->status = TID_GOOD; + tcon->need_reconnect = false; + spin_unlock(&tcon->tc_lock); + } + + return rc; +} diff --git a/fs/cifs/dfs.h b/fs/cifs/dfs.h new file mode 100644 index 0000000000..e60f0a24a8 --- /dev/null +++ b/fs/cifs/dfs.h @@ -0,0 +1,198 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (c) 2022 Paulo Alcantara + */ + +#ifndef _CIFS_DFS_H +#define _CIFS_DFS_H + +#include "cifsglob.h" +#include "cifsproto.h" +#include "fs_context.h" +#include "dfs_cache.h" +#include "cifs_unicode.h" +#include +#include + +#define DFS_INTERLINK(v) \ + (((v) & DFSREF_REFERRAL_SERVER) && !((v) & DFSREF_STORAGE_SERVER)) + +struct dfs_ref { + char *path; + char *full_path; + struct cifs_ses *ses; + struct dfs_cache_tgt_list tl; + struct dfs_cache_tgt_iterator *tit; +}; + +struct dfs_ref_walk { + struct cifs_mount_ctx *mnt_ctx; + struct dfs_ref *ref; + struct dfs_ref refs[MAX_NESTED_LINKS]; +}; + +#define ref_walk_start(w) ((w)->refs) +#define ref_walk_end(w) (&(w)->refs[ARRAY_SIZE((w)->refs) - 1]) +#define ref_walk_cur(w) ((w)->ref) +#define ref_walk_descend(w) (--ref_walk_cur(w) >= ref_walk_start(w)) + +#define ref_walk_tit(w) (ref_walk_cur(w)->tit) +#define ref_walk_path(w) (ref_walk_cur(w)->path) +#define ref_walk_fpath(w) (ref_walk_cur(w)->full_path) +#define ref_walk_tl(w) (&ref_walk_cur(w)->tl) +#define ref_walk_ses(w) (ref_walk_cur(w)->ses) + +static inline struct dfs_ref_walk *ref_walk_alloc(void) +{ + struct dfs_ref_walk *rw; + + rw = kmalloc(sizeof(*rw), GFP_KERNEL); + if (!rw) + return ERR_PTR(-ENOMEM); + return rw; +} + +static inline void ref_walk_init(struct dfs_ref_walk *rw, + struct cifs_mount_ctx *mnt_ctx) +{ + memset(rw, 0, sizeof(*rw)); + rw->mnt_ctx = mnt_ctx; + ref_walk_cur(rw) = ref_walk_start(rw); +} + +static inline void __ref_walk_free(struct dfs_ref *ref) +{ + kfree(ref->path); + kfree(ref->full_path); + dfs_cache_free_tgts(&ref->tl); + if (ref->ses) + cifs_put_smb_ses(ref->ses); + memset(ref, 0, sizeof(*ref)); +} + +static inline void ref_walk_free(struct dfs_ref_walk *rw) +{ + struct dfs_ref *ref; + + if (!rw) + return; + + for (ref = ref_walk_start(rw); ref <= ref_walk_end(rw); ref++) + __ref_walk_free(ref); + kfree(rw); +} + +static inline int ref_walk_advance(struct dfs_ref_walk *rw) +{ + struct dfs_ref *ref = ref_walk_cur(rw) + 1; + + if (ref > ref_walk_end(rw)) + return -ELOOP; + __ref_walk_free(ref); + ref_walk_cur(rw) = ref; + return 0; +} + +static inline struct dfs_cache_tgt_iterator * +ref_walk_next_tgt(struct dfs_ref_walk *rw) +{ + struct dfs_ref *ref = ref_walk_cur(rw); + struct dfs_cache_tgt_iterator *tit; + + if (IS_ERR(ref->tit)) + return NULL; + + if (!ref->tit) + tit = dfs_cache_get_tgt_iterator(&ref->tl); + else + tit = dfs_cache_get_next_tgt(&ref->tl, ref->tit); + + if (!tit) { + ref->tit = ERR_PTR(-ENOENT); + return NULL; + } + ref->tit = tit; + return ref->tit; +} + +static inline int ref_walk_get_tgt(struct dfs_ref_walk *rw, + struct dfs_info3_param *tgt) +{ + zfree_dfs_info_param(tgt); + return dfs_cache_get_tgt_referral(ref_walk_path(rw) + 1, + ref_walk_tit(rw), tgt); +} + +static inline void ref_walk_set_tgt_hint(struct dfs_ref_walk *rw) +{ + dfs_cache_noreq_update_tgthint(ref_walk_path(rw) + 1, + ref_walk_tit(rw)); +} + +static inline void ref_walk_set_tcon(struct dfs_ref_walk *rw, + struct cifs_tcon *tcon) +{ + struct dfs_ref *ref = ref_walk_start(rw); + + for (; ref <= ref_walk_cur(rw); ref++) { + if (WARN_ON_ONCE(!ref->ses)) + continue; + list_add(&ref->ses->dlist, &tcon->dfs_ses_list); + ref->ses = NULL; + } +} + +static inline void ref_walk_mark_end(struct dfs_ref_walk *rw) +{ + struct dfs_ref *ref = ref_walk_cur(rw) - 1; + + WARN_ON_ONCE(ref < ref_walk_start(rw)); + dfs_cache_noreq_update_tgthint(ref->path + 1, ref->tit); + ref->tit = ERR_PTR(-ENOENT); /* end marker */ +} + +int dfs_parse_target_referral(const char *full_path, const struct dfs_info3_param *ref, + struct smb3_fs_context *ctx); +int dfs_mount_share(struct cifs_mount_ctx *mnt_ctx); + +static inline char *dfs_get_path(struct cifs_sb_info *cifs_sb, const char *path) +{ + return dfs_cache_canonical_path(path, cifs_sb->local_nls, cifs_remap(cifs_sb)); +} + +static inline int dfs_get_referral(struct cifs_mount_ctx *mnt_ctx, + const char *path, + struct dfs_cache_tgt_list *tl) +{ + struct smb3_fs_context *ctx = mnt_ctx->fs_ctx; + struct cifs_sb_info *cifs_sb = mnt_ctx->cifs_sb; + struct cifs_ses *rses = ctx->dfs_root_ses ?: mnt_ctx->ses; + + return dfs_cache_find(mnt_ctx->xid, rses, cifs_sb->local_nls, + cifs_remap(cifs_sb), path, NULL, tl); +} + +/* + * cifs_get_smb_ses() already guarantees an active reference of + * @ses->dfs_root_ses when a new session is created, so we need to put extra + * references of all DFS root sessions that were used across the mount process + * in dfs_mount_share(). + */ +static inline void dfs_put_root_smb_sessions(struct list_head *head) +{ + struct cifs_ses *ses, *n; + + list_for_each_entry_safe(ses, n, head, dlist) { + list_del_init(&ses->dlist); + cifs_put_smb_ses(ses); + } +} + +static inline const char *dfs_ses_refpath(struct cifs_ses *ses) +{ + const char *path = ses->server->leaf_fullpath; + + return path ? path + 1 : ERR_PTR(-ENOENT); +} + +#endif /* _CIFS_DFS_H */ diff --git a/fs/cifs/dfs_cache.c b/fs/cifs/dfs_cache.c index f047927707..bb0615407b 100644 --- a/fs/cifs/dfs_cache.c +++ b/fs/cifs/dfs_cache.c @@ -19,14 +19,14 @@ #include "cifs_unicode.h" #include "smb2glob.h" #include "dns_resolve.h" +#include "dfs.h" #include "dfs_cache.h" -#define CACHE_HTABLE_SIZE 32 -#define CACHE_MAX_ENTRIES 64 -#define CACHE_MIN_TTL 120 /* 2 minutes */ - -#define IS_DFS_INTERLINK(v) (((v) & DFSREF_REFERRAL_SERVER) && !((v) & DFSREF_STORAGE_SERVER)) +#define CACHE_HTABLE_SIZE 512 +#define CACHE_MAX_ENTRIES 1024 +#define CACHE_MIN_TTL 120 /* 2 minutes */ +#define CACHE_DEFAULT_TTL 300 /* 5 minutes */ struct cache_dfs_tgt { char *name; @@ -48,22 +48,10 @@ struct cache_entry { struct cache_dfs_tgt *tgthint; }; -/* List of referral server sessions per dfs mount */ -struct mount_group { - struct list_head list; - uuid_t id; - struct cifs_ses *sessions[CACHE_MAX_ENTRIES]; - int num_sessions; - spinlock_t lock; - struct list_head refresh_list; - struct kref refcount; -}; - static struct kmem_cache *cache_slab __read_mostly; -static struct workqueue_struct *dfscache_wq __read_mostly; +struct workqueue_struct *dfscache_wq; -static int cache_ttl; -static DEFINE_SPINLOCK(cache_ttl_lock); +atomic_t dfs_cache_ttl; static struct nls_table *cache_cp; @@ -75,106 +63,6 @@ static atomic_t cache_count; static struct hlist_head cache_htable[CACHE_HTABLE_SIZE]; static DECLARE_RWSEM(htable_rw_lock); -static LIST_HEAD(mount_group_list); -static DEFINE_MUTEX(mount_group_list_lock); - -static void refresh_cache_worker(struct work_struct *work); - -static DECLARE_DELAYED_WORK(refresh_task, refresh_cache_worker); - -static void get_ipc_unc(const char *ref_path, char *ipc, size_t ipclen) -{ - const char *host; - size_t len; - - extract_unc_hostname(ref_path, &host, &len); - scnprintf(ipc, ipclen, "\\\\%.*s\\IPC$", (int)len, host); -} - -static struct cifs_ses *find_ipc_from_server_path(struct cifs_ses **ses, const char *path) -{ - char unc[SERVER_NAME_LENGTH + sizeof("//x/IPC$")] = {0}; - - get_ipc_unc(path, unc, sizeof(unc)); - for (; *ses; ses++) { - if (!strcasecmp(unc, (*ses)->tcon_ipc->treeName)) - return *ses; - } - return ERR_PTR(-ENOENT); -} - -static void __mount_group_release(struct mount_group *mg) -{ - int i; - - for (i = 0; i < mg->num_sessions; i++) - cifs_put_smb_ses(mg->sessions[i]); - kfree(mg); -} - -static void mount_group_release(struct kref *kref) -{ - struct mount_group *mg = container_of(kref, struct mount_group, refcount); - - mutex_lock(&mount_group_list_lock); - list_del(&mg->list); - mutex_unlock(&mount_group_list_lock); - __mount_group_release(mg); -} - -static struct mount_group *find_mount_group_locked(const uuid_t *id) -{ - struct mount_group *mg; - - list_for_each_entry(mg, &mount_group_list, list) { - if (uuid_equal(&mg->id, id)) - return mg; - } - return ERR_PTR(-ENOENT); -} - -static struct mount_group *__get_mount_group_locked(const uuid_t *id) -{ - struct mount_group *mg; - - mg = find_mount_group_locked(id); - if (!IS_ERR(mg)) - return mg; - - mg = kmalloc(sizeof(*mg), GFP_KERNEL); - if (!mg) - return ERR_PTR(-ENOMEM); - kref_init(&mg->refcount); - uuid_copy(&mg->id, id); - mg->num_sessions = 0; - spin_lock_init(&mg->lock); - list_add(&mg->list, &mount_group_list); - return mg; -} - -static struct mount_group *get_mount_group(const uuid_t *id) -{ - struct mount_group *mg; - - mutex_lock(&mount_group_list_lock); - mg = __get_mount_group_locked(id); - if (!IS_ERR(mg)) - kref_get(&mg->refcount); - mutex_unlock(&mount_group_list_lock); - - return mg; -} - -static void free_mount_group_list(void) -{ - struct mount_group *mg, *tmp_mg; - - list_for_each_entry_safe(mg, tmp_mg, &mount_group_list, list) { - list_del_init(&mg->list); - __mount_group_release(mg); - } -} - /** * dfs_cache_canonical_path - get a canonical DFS path * @@ -237,6 +125,7 @@ static inline void free_tgts(struct cache_entry *ce) static inline void flush_cache_ent(struct cache_entry *ce) { + cifs_dbg(FYI, "%s: %s\n", __func__, ce->path); hlist_del_init(&ce->hlist); kfree(ce->path); free_tgts(ce); @@ -283,7 +172,7 @@ static int dfscache_proc_show(struct seq_file *m, void *v) "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n", ce->path, ce->srvtype == DFS_TYPE_ROOT ? "root" : "link", ce->ttl, ce->etime.tv_nsec, ce->hdr_flags, ce->ref_flags, - IS_DFS_INTERLINK(ce->hdr_flags) ? "yes" : "no", + DFS_INTERLINK(ce->hdr_flags) ? "yes" : "no", ce->path_consumed, cache_entry_expired(ce) ? "yes" : "no"); list_for_each_entry(t, &ce->tlist, list) { @@ -352,7 +241,7 @@ static inline void dump_ce(const struct cache_entry *ce) ce->srvtype == DFS_TYPE_ROOT ? "root" : "link", ce->ttl, ce->etime.tv_nsec, ce->hdr_flags, ce->ref_flags, - IS_DFS_INTERLINK(ce->hdr_flags) ? "yes" : "no", + DFS_INTERLINK(ce->hdr_flags) ? "yes" : "no", ce->path_consumed, cache_entry_expired(ce) ? "yes" : "no"); dump_tgts(ce); @@ -396,7 +285,9 @@ int dfs_cache_init(void) int rc; int i; - dfscache_wq = alloc_workqueue("cifs-dfscache", WQ_FREEZABLE | WQ_UNBOUND, 1); + dfscache_wq = alloc_workqueue("cifs-dfscache", + WQ_UNBOUND|WQ_FREEZABLE|WQ_MEM_RECLAIM, + 0); if (!dfscache_wq) return -ENOMEM; @@ -412,6 +303,7 @@ int dfs_cache_init(void) INIT_HLIST_HEAD(&cache_htable[i]); atomic_set(&cache_count, 0); + atomic_set(&dfs_cache_ttl, CACHE_DEFAULT_TTL); cache_cp = load_nls("utf8"); if (!cache_cp) cache_cp = load_nls_default(); @@ -549,34 +441,31 @@ static struct cache_entry *alloc_cache_entry(struct dfs_info3_param *refs, int n return ce; } -static void remove_oldest_entry_locked(void) +/* Remove all referrals that have a single target or oldest entry */ +static void purge_cache(void) { int i; struct cache_entry *ce; - struct cache_entry *to_del = NULL; - - WARN_ON(!rwsem_is_locked(&htable_rw_lock)); + struct cache_entry *oldest = NULL; for (i = 0; i < CACHE_HTABLE_SIZE; i++) { struct hlist_head *l = &cache_htable[i]; + struct hlist_node *n; - hlist_for_each_entry(ce, l, hlist) { + hlist_for_each_entry_safe(ce, n, l, hlist) { if (hlist_unhashed(&ce->hlist)) continue; - if (!to_del || timespec64_compare(&ce->etime, - &to_del->etime) < 0) - to_del = ce; + if (ce->numtgts == 1) + flush_cache_ent(ce); + else if (!oldest || + timespec64_compare(&ce->etime, + &oldest->etime) < 0) + oldest = ce; } } - if (!to_del) { - cifs_dbg(FYI, "%s: no entry to remove\n", __func__); - return; - } - - cifs_dbg(FYI, "%s: removing entry\n", __func__); - dump_ce(to_del); - flush_cache_ent(to_del); + if (atomic_read(&cache_count) >= CACHE_MAX_ENTRIES && oldest) + flush_cache_ent(oldest); } /* Add a new DFS cache entry */ @@ -586,12 +475,13 @@ static struct cache_entry *add_cache_entry_locked(struct dfs_info3_param *refs, int rc; struct cache_entry *ce; unsigned int hash; + int ttl; WARN_ON(!rwsem_is_locked(&htable_rw_lock)); if (atomic_read(&cache_count) >= CACHE_MAX_ENTRIES) { cifs_dbg(FYI, "%s: reached max cache size (%d)\n", __func__, CACHE_MAX_ENTRIES); - remove_oldest_entry_locked(); + purge_cache(); } rc = cache_entry_hash(refs[0].path_name, strlen(refs[0].path_name), &hash); @@ -602,15 +492,8 @@ static struct cache_entry *add_cache_entry_locked(struct dfs_info3_param *refs, if (IS_ERR(ce)) return ce; - spin_lock(&cache_ttl_lock); - if (!cache_ttl) { - cache_ttl = ce->ttl; - queue_delayed_work(dfscache_wq, &refresh_task, cache_ttl * HZ); - } else { - cache_ttl = min_t(int, cache_ttl, ce->ttl); - mod_delayed_work(dfscache_wq, &refresh_task, cache_ttl * HZ); - } - spin_unlock(&cache_ttl_lock); + ttl = min_t(int, atomic_read(&dfs_cache_ttl), ce->ttl); + atomic_set(&dfs_cache_ttl, ttl); hlist_add_head(&ce->hlist, &cache_htable[hash]); dump_ce(ce); @@ -722,9 +605,7 @@ static struct cache_entry *lookup_cache_entry(const char *path) */ void dfs_cache_destroy(void) { - cancel_delayed_work_sync(&refresh_task); unload_nls(cache_cp); - free_mount_group_list(); flush_cache_ents(); kmem_cache_destroy(cache_slab); destroy_workqueue(dfscache_wq); @@ -765,8 +646,6 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const int rc; int i; - cifs_dbg(FYI, "%s: get an DFS referral for %s\n", __func__, path); - *refs = NULL; *numrefs = 0; @@ -775,6 +654,7 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const if (unlikely(!cache_cp)) return -EINVAL; + cifs_dbg(FYI, "%s: ipc=%s referral=%s\n", __func__, ses->tcon_ipc->treeName, path); rc = ses->server->ops->get_dfs_refer(xid, ses, path, refs, numrefs, cache_cp, NO_MAP_UNI_RSVD); if (!rc) { @@ -799,7 +679,8 @@ static int get_dfs_referral(const unsigned int xid, struct cifs_ses *ses, const */ static struct cache_entry *cache_refresh_path(const unsigned int xid, struct cifs_ses *ses, - const char *path) + const char *path, + bool force_refresh) { struct dfs_info3_param *refs = NULL; struct cache_entry *ce; @@ -812,7 +693,7 @@ static struct cache_entry *cache_refresh_path(const unsigned int xid, ce = lookup_cache_entry(path); if (!IS_ERR(ce)) { - if (!cache_entry_expired(ce)) + if (!force_refresh && !cache_entry_expired(ce)) return ce; } else if (PTR_ERR(ce) != -ENOENT) { up_read(&htable_rw_lock); @@ -828,7 +709,8 @@ static struct cache_entry *cache_refresh_path(const unsigned int xid, up_read(&htable_rw_lock); /* - * Either the entry was not found, or it is expired. + * Either the entry was not found, or it is expired, or it is a forced + * refresh. * Request a new DFS referral in order to create or update a cache entry. */ rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); @@ -843,7 +725,7 @@ static struct cache_entry *cache_refresh_path(const unsigned int xid, /* Re-check as another task might have it added or refreshed already */ ce = lookup_cache_entry(path); if (!IS_ERR(ce)) { - if (cache_entry_expired(ce)) { + if (force_refresh || cache_entry_expired(ce)) { rc = update_cache_entry_locked(ce, refs, numrefs); if (rc) ce = ERR_PTR(rc); @@ -980,7 +862,7 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nl if (IS_ERR(npath)) return PTR_ERR(npath); - ce = cache_refresh_path(xid, ses, npath); + ce = cache_refresh_path(xid, ses, npath, false); if (IS_ERR(ce)) { rc = PTR_ERR(ce); goto out_free_path; @@ -1044,66 +926,6 @@ out_unlock: return rc; } -/** - * dfs_cache_update_tgthint - update target hint of a DFS cache entry - * - * If it doesn't find the cache entry, then it will get a DFS referral for @path - * and create a new entry. - * - * In case the cache entry exists but expired, it will get a DFS referral - * for @path and then update the respective cache entry. - * - * @xid: syscall id - * @ses: smb session - * @cp: codepage - * @remap: type of character remapping for paths - * @path: path to lookup in DFS referral cache - * @it: DFS target iterator - * - * Return zero if the target hint was updated successfully, otherwise non-zero. - */ -int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses, - const struct nls_table *cp, int remap, const char *path, - const struct dfs_cache_tgt_iterator *it) -{ - struct cache_dfs_tgt *t; - struct cache_entry *ce; - const char *npath; - int rc = 0; - - npath = dfs_cache_canonical_path(path, cp, remap); - if (IS_ERR(npath)) - return PTR_ERR(npath); - - cifs_dbg(FYI, "%s: update target hint - path: %s\n", __func__, npath); - - ce = cache_refresh_path(xid, ses, npath); - if (IS_ERR(ce)) { - rc = PTR_ERR(ce); - goto out_free_path; - } - - t = READ_ONCE(ce->tgthint); - - if (likely(!strcasecmp(it->it_name, t->name))) - goto out_unlock; - - list_for_each_entry(t, &ce->tlist, list) { - if (!strcasecmp(t->name, it->it_name)) { - WRITE_ONCE(ce->tgthint, t); - cifs_dbg(FYI, "%s: new target hint: %s\n", __func__, - it->it_name); - break; - } - } - -out_unlock: - up_read(&htable_rw_lock); -out_free_path: - kfree(npath); - return rc; -} - /** * dfs_cache_noreq_update_tgthint - update target hint of a DFS cache entry * without sending any requests to the currently connected server. @@ -1118,26 +940,22 @@ out_free_path: * * Return zero if the target hint was updated successfully, otherwise non-zero. */ -int dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it) +void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it) { - int rc; - struct cache_entry *ce; struct cache_dfs_tgt *t; + struct cache_entry *ce; - if (!it) - return -EINVAL; + if (!path || !it) + return; cifs_dbg(FYI, "%s: path: %s\n", __func__, path); down_read(&htable_rw_lock); ce = lookup_cache_entry(path); - if (IS_ERR(ce)) { - rc = PTR_ERR(ce); + if (IS_ERR(ce)) goto out_unlock; - } - rc = 0; t = READ_ONCE(ce->tgthint); if (unlikely(!strcasecmp(it->it_name, t->name))) @@ -1154,7 +972,6 @@ int dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_ out_unlock: up_read(&htable_rw_lock); - return rc; } /** @@ -1195,54 +1012,6 @@ out_unlock: return rc; } -/** - * dfs_cache_add_refsrv_session - add SMB session of referral server - * - * @mount_id: mount group uuid to lookup. - * @ses: reference counted SMB session of referral server. - */ -void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses) -{ - struct mount_group *mg; - - if (WARN_ON_ONCE(!mount_id || uuid_is_null(mount_id) || !ses)) - return; - - mg = get_mount_group(mount_id); - if (WARN_ON_ONCE(IS_ERR(mg))) - return; - - spin_lock(&mg->lock); - if (mg->num_sessions < ARRAY_SIZE(mg->sessions)) - mg->sessions[mg->num_sessions++] = ses; - spin_unlock(&mg->lock); - kref_put(&mg->refcount, mount_group_release); -} - -/** - * dfs_cache_put_refsrv_sessions - put all referral server sessions - * - * Put all SMB sessions from the given mount group id. - * - * @mount_id: mount group uuid to lookup. - */ -void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id) -{ - struct mount_group *mg; - - if (!mount_id || uuid_is_null(mount_id)) - return; - - mutex_lock(&mount_group_list_lock); - mg = find_mount_group_locked(mount_id); - if (IS_ERR(mg)) { - mutex_unlock(&mount_group_list_lock); - return; - } - mutex_unlock(&mount_group_list_lock); - kref_put(&mg->refcount, mount_group_release); -} - /* Extract share from DFS target and return a pointer to prefix path or NULL */ static const char *parse_target_share(const char *target, char **share) { @@ -1323,142 +1092,208 @@ int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, return 0; } -static bool target_share_equal(struct TCP_Server_Info *server, const char *s1, const char *s2) +static bool target_share_equal(struct cifs_tcon *tcon, const char *s1) { - char unc[sizeof("\\\\") + SERVER_NAME_LENGTH] = {0}; - const char *host; - size_t hostlen; - char *ip = NULL; - struct sockaddr sa; + struct TCP_Server_Info *server = tcon->ses->server; + const char *s2 = &tcon->treeName[1]; + struct sockaddr_storage ss; bool match; int rc; - if (strcasecmp(s1, s2)) + if (strcasecmp(s2, s1)) return false; /* * Resolve share's hostname and check if server address matches. Otherwise just ignore it * as we could not have upcall to resolve hostname or failed to convert ip address. */ - match = true; - extract_unc_hostname(s1, &host, &hostlen); - scnprintf(unc, sizeof(unc), "\\\\%.*s", (int)hostlen, host); - - rc = dns_resolve_server_name_to_ip(unc, &ip, NULL); - if (rc < 0) { - cifs_dbg(FYI, "%s: could not resolve %.*s. assuming server address matches.\n", - __func__, (int)hostlen, host); + rc = dns_resolve_unc(server->dns_dom, s1, (struct sockaddr *)&ss); + if (rc < 0) return true; - } - if (!cifs_convert_address(&sa, ip, strlen(ip))) { - cifs_dbg(VFS, "%s: failed to convert address \'%s\'. skip address matching.\n", - __func__, ip); - } else { - cifs_server_lock(server); - match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, &sa); - cifs_server_unlock(server); - } + cifs_server_lock(server); + match = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, (struct sockaddr *)&ss); + cifs_dbg(FYI, "%s: [share=%s] ipaddr matched: %s\n", __func__, s1, match ? "yes" : "no"); + cifs_server_unlock(server); - kfree(ip); return match; } -/* - * Mark dfs tcon for reconnecting when the currently connected tcon does not match any of the new - * target shares in @refs. - */ -static void mark_for_reconnect_if_needed(struct cifs_tcon *tcon, struct dfs_cache_tgt_list *tl, - const struct dfs_info3_param *refs, int numrefs) +static bool is_ses_good(struct cifs_tcon *tcon, struct cifs_ses *ses) { - struct dfs_cache_tgt_iterator *it; - int i; + struct TCP_Server_Info *server = ses->server; + struct cifs_tcon *ipc = NULL; + bool ret; - for (it = dfs_cache_get_tgt_iterator(tl); it; it = dfs_cache_get_next_tgt(tl, it)) { - for (i = 0; i < numrefs; i++) { - if (target_share_equal(tcon->ses->server, dfs_cache_get_tgt_name(it), - refs[i].node_name)) - return; + spin_lock(&cifs_tcp_ses_lock); + spin_lock(&ses->ses_lock); + spin_lock(&ses->chan_lock); + + ret = !cifs_chan_needs_reconnect(ses, server) && + ses->ses_status == SES_GOOD; + + spin_unlock(&ses->chan_lock); + + if (!ret) + goto out; + + if (likely(ses->tcon_ipc)) { + if (ses->tcon_ipc->need_reconnect) { + ret = false; + goto out; + } + } else { + spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); + + ipc = cifs_setup_ipc(ses, tcon->seal); + + spin_lock(&cifs_tcp_ses_lock); + spin_lock(&ses->ses_lock); + if (!IS_ERR(ipc)) { + if (!ses->tcon_ipc) { + ses->tcon_ipc = ipc; + ipc = NULL; + } + } else { + ret = false; + ipc = NULL; } } - cifs_dbg(FYI, "%s: no cached or matched targets. mark dfs share for reconnect.\n", __func__); - cifs_signal_cifsd_for_reconnect(tcon->ses->server, true); +out: + spin_unlock(&ses->ses_lock); + spin_unlock(&cifs_tcp_ses_lock); + if (ipc && server->ops->tree_disconnect) { + unsigned int xid = get_xid(); + + (void)server->ops->tree_disconnect(xid, ipc); + _free_xid(xid); + } + tconInfoFree(ipc); + return ret; } -/* Refresh dfs referral of tcon and mark it for reconnect if needed */ -static int __refresh_tcon(const char *path, struct cifs_ses **sessions, struct cifs_tcon *tcon, - bool force_refresh) +/* Refresh dfs referral of @ses */ +static void refresh_ses_referral(struct cifs_tcon *tcon, struct cifs_ses *ses) { - struct cifs_ses *ses; struct cache_entry *ce; - struct dfs_info3_param *refs = NULL; - int numrefs = 0; - bool needs_refresh = false; - struct dfs_cache_tgt_list tl = DFS_CACHE_TGT_LIST_INIT(tl); - int rc = 0; unsigned int xid; + const char *path; + int rc = 0; - ses = find_ipc_from_server_path(sessions, path); - if (IS_ERR(ses)) { - cifs_dbg(FYI, "%s: could not find ipc session\n", __func__); - return PTR_ERR(ses); + xid = get_xid(); + + path = dfs_ses_refpath(ses); + if (IS_ERR(path)) { + rc = PTR_ERR(path); + goto out; + } + + ses = CIFS_DFS_ROOT_SES(ses); + if (!is_ses_good(tcon, ses)) { + cifs_dbg(FYI, "%s: skip cache refresh due to disconnected ipc\n", + __func__); + goto out; + } + + ce = cache_refresh_path(xid, ses, path, false); + if (!IS_ERR(ce)) + up_read(&htable_rw_lock); + else + rc = PTR_ERR(ce); + +out: + free_xid(xid); +} + +static int __refresh_tcon_referral(struct cifs_tcon *tcon, + const char *path, + struct dfs_info3_param *refs, + int numrefs, bool force_refresh) +{ + struct cache_entry *ce; + bool reconnect = force_refresh; + int rc = 0; + int i; + + if (unlikely(!numrefs)) + return 0; + + if (force_refresh) { + for (i = 0; i < numrefs; i++) { + /* TODO: include prefix paths in the matching */ + if (target_share_equal(tcon, refs[i].node_name)) { + reconnect = false; + break; + } + } + } + + down_write(&htable_rw_lock); + ce = lookup_cache_entry(path); + if (!IS_ERR(ce)) { + if (force_refresh || cache_entry_expired(ce)) + rc = update_cache_entry_locked(ce, refs, numrefs); + } else if (PTR_ERR(ce) == -ENOENT) { + ce = add_cache_entry_locked(refs, numrefs); + } + up_write(&htable_rw_lock); + + if (IS_ERR(ce)) + rc = PTR_ERR(ce); + if (reconnect) { + cifs_tcon_dbg(FYI, "%s: mark for reconnect\n", __func__); + cifs_signal_cifsd_for_reconnect(tcon->ses->server, true); + } + return rc; +} + +static void refresh_tcon_referral(struct cifs_tcon *tcon, bool force_refresh) +{ + struct dfs_info3_param *refs = NULL; + struct cache_entry *ce; + struct cifs_ses *ses; + bool needs_refresh; + const char *path; + unsigned int xid; + int numrefs = 0; + int rc = 0; + + xid = get_xid(); + ses = tcon->ses; + + path = dfs_ses_refpath(ses); + if (IS_ERR(path)) { + rc = PTR_ERR(path); + goto out; } down_read(&htable_rw_lock); ce = lookup_cache_entry(path); needs_refresh = force_refresh || IS_ERR(ce) || cache_entry_expired(ce); - if (!IS_ERR(ce)) { - rc = get_targets(ce, &tl); - if (rc) - cifs_dbg(FYI, "%s: could not get dfs targets: %d\n", __func__, rc); + if (!needs_refresh) { + up_read(&htable_rw_lock); + goto out; } up_read(&htable_rw_lock); - if (!needs_refresh) { - rc = 0; + ses = CIFS_DFS_ROOT_SES(ses); + if (!is_ses_good(tcon, ses)) { + cifs_dbg(FYI, "%s: skip cache refresh due to disconnected ipc\n", + __func__); goto out; } - xid = get_xid(); rc = get_dfs_referral(xid, ses, path, &refs, &numrefs); - free_xid(xid); - - /* Create or update a cache entry with the new referral */ if (!rc) { - dump_refs(refs, numrefs); - - down_write(&htable_rw_lock); - ce = lookup_cache_entry(path); - if (IS_ERR(ce)) - add_cache_entry_locked(refs, numrefs); - else if (force_refresh || cache_entry_expired(ce)) - update_cache_entry_locked(ce, refs, numrefs); - up_write(&htable_rw_lock); - - mark_for_reconnect_if_needed(tcon, &tl, refs, numrefs); + rc = __refresh_tcon_referral(tcon, path, refs, + numrefs, force_refresh); } out: - dfs_cache_free_tgts(&tl); + free_xid(xid); free_dfs_info_array(refs, numrefs); - return rc; -} - -static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool force_refresh) -{ - struct TCP_Server_Info *server = tcon->ses->server; - - mutex_lock(&server->refpath_lock); - if (server->origin_fullpath) { - if (server->leaf_fullpath && strcasecmp(server->leaf_fullpath, - server->origin_fullpath)) - __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, force_refresh); - __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, force_refresh); - } - mutex_unlock(&server->refpath_lock); - - return 0; } /** @@ -1474,40 +1309,19 @@ static int refresh_tcon(struct cifs_ses **sessions, struct cifs_tcon *tcon, bool int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) { struct cifs_tcon *tcon; - struct TCP_Server_Info *server; - struct mount_group *mg; - struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL}; - int rc; if (!cifs_sb || !cifs_sb->master_tlink) return -EINVAL; tcon = cifs_sb_master_tcon(cifs_sb); - server = tcon->ses->server; - if (!server->origin_fullpath) { + spin_lock(&tcon->tc_lock); + if (!tcon->origin_fullpath) { + spin_unlock(&tcon->tc_lock); cifs_dbg(FYI, "%s: not a dfs mount\n", __func__); return 0; } - - if (uuid_is_null(&cifs_sb->dfs_mount_id)) { - cifs_dbg(FYI, "%s: no dfs mount group id\n", __func__); - return -EINVAL; - } - - mutex_lock(&mount_group_list_lock); - mg = find_mount_group_locked(&cifs_sb->dfs_mount_id); - if (IS_ERR(mg)) { - mutex_unlock(&mount_group_list_lock); - cifs_dbg(FYI, "%s: no ipc session for refreshing referral\n", __func__); - return PTR_ERR(mg); - } - kref_get(&mg->refcount); - mutex_unlock(&mount_group_list_lock); - - spin_lock(&mg->lock); - memcpy(&sessions, mg->sessions, mg->num_sessions * sizeof(mg->sessions[0])); - spin_unlock(&mg->lock); + spin_unlock(&tcon->tc_lock); /* * After reconnecting to a different server, unique ids won't match anymore, so we disable @@ -1519,111 +1333,23 @@ int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb) * that have different prefix paths. */ cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; - rc = refresh_tcon(sessions, tcon, true); - kref_put(&mg->refcount, mount_group_release); - return rc; + refresh_tcon_referral(tcon, true); + return 0; } -/* - * Refresh all active dfs mounts regardless of whether they are in cache or not. - * (cache can be cleared) - */ -static void refresh_mounts(struct cifs_ses **sessions) +/* Refresh all DFS referrals related to DFS tcon */ +void dfs_cache_refresh(struct work_struct *work) { - struct TCP_Server_Info *server; + struct cifs_tcon *tcon; struct cifs_ses *ses; - struct cifs_tcon *tcon, *ntcon; - struct list_head tcons; - INIT_LIST_HEAD(&tcons); + tcon = container_of(work, struct cifs_tcon, dfs_cache_work.work); - spin_lock(&cifs_tcp_ses_lock); - list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) { - spin_lock(&server->srv_lock); - if (!server->is_dfs_conn) { - spin_unlock(&server->srv_lock); - continue; - } - spin_unlock(&server->srv_lock); + list_for_each_entry(ses, &tcon->dfs_ses_list, dlist) + refresh_ses_referral(tcon, ses); + refresh_tcon_referral(tcon, false); - list_for_each_entry(ses, &server->smb_ses_list, smb_ses_list) { - list_for_each_entry(tcon, &ses->tcon_list, tcon_list) { - spin_lock(&tcon->tc_lock); - if (!tcon->ipc && !tcon->need_reconnect) { - tcon->tc_count++; - list_add_tail(&tcon->ulist, &tcons); - } - spin_unlock(&tcon->tc_lock); - } - } - } - spin_unlock(&cifs_tcp_ses_lock); - - list_for_each_entry_safe(tcon, ntcon, &tcons, ulist) { - struct TCP_Server_Info *server = tcon->ses->server; - - list_del_init(&tcon->ulist); - - mutex_lock(&server->refpath_lock); - if (server->origin_fullpath) { - if (server->leaf_fullpath && strcasecmp(server->leaf_fullpath, - server->origin_fullpath)) - __refresh_tcon(server->leaf_fullpath + 1, sessions, tcon, false); - __refresh_tcon(server->origin_fullpath + 1, sessions, tcon, false); - } - mutex_unlock(&server->refpath_lock); - - cifs_put_tcon(tcon); - } -} - -/* - * Worker that will refresh DFS cache and active mounts based on lowest TTL value from a DFS - * referral. - */ -static void refresh_cache_worker(struct work_struct *work) -{ - struct list_head mglist; - struct mount_group *mg, *tmp_mg; - struct cifs_ses *sessions[CACHE_MAX_ENTRIES + 1] = {NULL}; - int max_sessions = ARRAY_SIZE(sessions) - 1; - int i = 0, count; - - INIT_LIST_HEAD(&mglist); - - /* Get refereces of mount groups */ - mutex_lock(&mount_group_list_lock); - list_for_each_entry(mg, &mount_group_list, list) { - kref_get(&mg->refcount); - list_add(&mg->refresh_list, &mglist); - } - mutex_unlock(&mount_group_list_lock); - - /* Fill in local array with an NULL-terminated list of all referral server sessions */ - list_for_each_entry(mg, &mglist, refresh_list) { - if (i >= max_sessions) - break; - - spin_lock(&mg->lock); - if (i + mg->num_sessions > max_sessions) - count = max_sessions - i; - else - count = mg->num_sessions; - memcpy(&sessions[i], mg->sessions, count * sizeof(mg->sessions[0])); - spin_unlock(&mg->lock); - i += count; - } - - if (sessions[0]) - refresh_mounts(sessions); - - list_for_each_entry_safe(mg, tmp_mg, &mglist, refresh_list) { - list_del_init(&mg->refresh_list); - kref_put(&mg->refcount, mount_group_release); - } - - spin_lock(&cache_ttl_lock); - queue_delayed_work(dfscache_wq, &refresh_task, cache_ttl * HZ); - spin_unlock(&cache_ttl_lock); + queue_delayed_work(dfscache_wq, &tcon->dfs_cache_work, + atomic_read(&dfs_cache_ttl) * HZ); } diff --git a/fs/cifs/dfs_cache.h b/fs/cifs/dfs_cache.h index 2b8c2526de..ee20cfedc0 100644 --- a/fs/cifs/dfs_cache.h +++ b/fs/cifs/dfs_cache.h @@ -13,7 +13,14 @@ #include #include "cifsglob.h" -#define DFS_CACHE_TGT_LIST_INIT(var) { .tl_numtgts = 0, .tl_list = LIST_HEAD_INIT((var).tl_list), } +extern struct workqueue_struct *dfscache_wq; +extern atomic_t dfs_cache_ttl; + +#define DFS_CACHE_TGT_LIST_INIT(var) \ + { .tl_numtgts = 0, .tl_list = LIST_HEAD_INIT((var).tl_list), } + +#define DFS_CACHE_TGT_LIST(var) \ + struct dfs_cache_tgt_list var = DFS_CACHE_TGT_LIST_INIT(var) struct dfs_cache_tgt_list { int tl_numtgts; @@ -35,25 +42,21 @@ int dfs_cache_find(const unsigned int xid, struct cifs_ses *ses, const struct nl struct dfs_cache_tgt_list *tgt_list); int dfs_cache_noreq_find(const char *path, struct dfs_info3_param *ref, struct dfs_cache_tgt_list *tgt_list); -int dfs_cache_update_tgthint(const unsigned int xid, struct cifs_ses *ses, - const struct nls_table *cp, int remap, const char *path, - const struct dfs_cache_tgt_iterator *it); -int dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it); +void dfs_cache_noreq_update_tgthint(const char *path, const struct dfs_cache_tgt_iterator *it); int dfs_cache_get_tgt_referral(const char *path, const struct dfs_cache_tgt_iterator *it, struct dfs_info3_param *ref); int dfs_cache_get_tgt_share(char *path, const struct dfs_cache_tgt_iterator *it, char **share, char **prefix); -void dfs_cache_put_refsrv_sessions(const uuid_t *mount_id); -void dfs_cache_add_refsrv_session(const uuid_t *mount_id, struct cifs_ses *ses); char *dfs_cache_canonical_path(const char *path, const struct nls_table *cp, int remap); int dfs_cache_remount_fs(struct cifs_sb_info *cifs_sb); +void dfs_cache_refresh(struct work_struct *work); static inline struct dfs_cache_tgt_iterator * dfs_cache_get_next_tgt(struct dfs_cache_tgt_list *tl, struct dfs_cache_tgt_iterator *it) { - if (!tl || list_empty(&tl->tl_list) || !it || - list_is_last(&it->it_list, &tl->tl_list)) + if (!tl || !tl->tl_numtgts || list_empty(&tl->tl_list) || + !it || list_is_last(&it->it_list, &tl->tl_list)) return NULL; return list_next_entry(it, it_list); } @@ -72,7 +75,7 @@ static inline void dfs_cache_free_tgts(struct dfs_cache_tgt_list *tl) { struct dfs_cache_tgt_iterator *it, *nit; - if (!tl || list_empty(&tl->tl_list)) + if (!tl || !tl->tl_numtgts || list_empty(&tl->tl_list)) return; list_for_each_entry_safe(it, nit, &tl->tl_list, it_list) { list_del(&it->it_list); @@ -94,4 +97,9 @@ dfs_cache_get_nr_tgts(const struct dfs_cache_tgt_list *tl) return tl ? tl->tl_numtgts : 0; } +static inline int dfs_cache_get_ttl(void) +{ + return atomic_read(&dfs_cache_ttl); +} + #endif /* _CIFS_DFS_CACHE_H */ diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index 82b8a84d41..eafa8ef942 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -76,14 +76,13 @@ build_path_from_dentry(struct dentry *direntry, void *page) prefix); } -char * -build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, - bool prefix) +char *__build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, + const char *tree, int tree_len, + bool prefix) { int dfsplen; int pplen = 0; struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); - struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); char dirsep = CIFS_DIR_SEP(cifs_sb); char *s; @@ -91,7 +90,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, return ERR_PTR(-ENOMEM); if (prefix) - dfsplen = strnlen(tcon->treeName, MAX_TREE_SIZE + 1); + dfsplen = strnlen(tree, tree_len + 1); else dfsplen = 0; @@ -121,7 +120,7 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, } if (dfsplen) { s -= dfsplen; - memcpy(s, tcon->treeName, dfsplen); + memcpy(s, tree, dfsplen); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_POSIX_PATHS) { int i; for (i = 0; i < dfsplen; i++) { @@ -133,6 +132,16 @@ build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, return s; } +char *build_path_from_dentry_optional_prefix(struct dentry *direntry, void *page, + bool prefix) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + + return __build_path_from_dentry_optional_prefix(direntry, page, tcon->treeName, + MAX_TREE_SIZE, prefix); +} + /* * Don't allow path components longer than the server max. * Don't allow the separator character in a path component. @@ -772,7 +781,7 @@ cifs_d_revalidate(struct dentry *direntry, unsigned int flags) const struct dentry_operations cifs_dentry_ops = { .d_revalidate = cifs_d_revalidate, - .d_automount = cifs_dfs_d_automount, + .d_automount = cifs_d_automount, /* d_delete: cifs_d_delete, */ /* not needed except for debugging */ }; @@ -847,5 +856,5 @@ const struct dentry_operations cifs_ci_dentry_ops = { .d_revalidate = cifs_d_revalidate, .d_hash = cifs_ci_hash, .d_compare = cifs_ci_compare, - .d_automount = cifs_dfs_d_automount, + .d_automount = cifs_d_automount, }; diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c index d0d9ae881f..c317efcf17 100644 --- a/fs/cifs/dns_resolve.c +++ b/fs/cifs/dns_resolve.c @@ -12,6 +12,7 @@ * */ +#include #include #include #include "dns_resolve.h" @@ -19,70 +20,76 @@ #include "cifsproto.h" #include "cifs_debug.h" -/** - * dns_resolve_server_name_to_ip - Resolve UNC server name to ip address. - * @unc: UNC path specifying the server (with '/' as delimiter) - * @ip_addr: Where to return the IP address. - * @expiry: Where to return the expiry time for the dns record. - * - * The IP address will be returned in string form, and the caller is - * responsible for freeing it. - * - * Returns length of result on success, -ve on error. - */ -int -dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry) +static int resolve_name(const char *name, size_t namelen, struct sockaddr *addr) { - struct sockaddr_storage ss; - const char *hostname, *sep; - char *name; - int len, rc; + char *ip; + int rc; - if (!ip_addr || !unc) + rc = dns_query(NULL, name, namelen, NULL, &ip, NULL); + if (rc < 0) { + cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", + __func__, (int)namelen, (int)namelen, name); + } else { + cifs_dbg(FYI, "%s: resolved: %*.*s to %s\n", + __func__, (int)namelen, (int)namelen, name, ip); + + rc = cifs_convert_address(addr, ip, strlen(ip)); + kfree(ip); + if (!rc) { + cifs_dbg(FYI, "%s: unable to determine ip address\n", + __func__); + rc = -EHOSTUNREACH; + } else { + rc = 0; + } + } + return rc; +} + +/** + * dns_resolve_name - Perform an upcall to resolve hostname to an ip address. + * @dom: DNS domain name (or NULL) + * @name: Name to look up + * @namelen: Length of name + * @ip_addr: Where to return the IP address + * + * Returns zero on success, -ve code otherwise. + */ +int dns_resolve_name(const char *dom, const char *name, + size_t namelen, struct sockaddr *ip_addr) +{ + size_t len; + char *s; + int rc; + + cifs_dbg(FYI, "%s: dom=%s name=%.*s\n", __func__, dom, (int)namelen, name); + if (!ip_addr || !name || !*name || !namelen) return -EINVAL; - len = strlen(unc); - if (len < 3) { - cifs_dbg(FYI, "%s: unc is too short: %s\n", __func__, unc); - return -EINVAL; + cifs_dbg(FYI, "%s: hostname=%.*s\n", __func__, (int)namelen, name); + /* Try to interpret hostname as an IPv4 or IPv6 address */ + rc = cifs_convert_address(ip_addr, name, namelen); + if (rc > 0) { + cifs_dbg(FYI, "%s: unc is IP, skipping dns upcall: %*.*s\n", + __func__, (int)namelen, (int)namelen, name); + return 0; } - /* Discount leading slashes for cifs */ - len -= 2; - hostname = unc + 2; + /* + * If @name contains a NetBIOS name and @dom has been specified, then + * convert @name to an FQDN and try resolving it first. + */ + if (dom && *dom && cifs_netbios_name(name, namelen)) { + len = strnlen(dom, CIFS_MAX_DOMAINNAME_LEN) + namelen + 2; + s = kmalloc(len, GFP_KERNEL); + if (!s) + return -ENOMEM; - /* Search for server name delimiter */ - sep = memchr(hostname, '/', len); - if (sep) - len = sep - hostname; - else - cifs_dbg(FYI, "%s: probably server name is whole unc: %s\n", - __func__, unc); - - /* Try to interpret hostname as an IPv4 or IPv6 address */ - rc = cifs_convert_address((struct sockaddr *)&ss, hostname, len); - if (rc > 0) - goto name_is_IP_address; - - /* Perform the upcall */ - rc = dns_query(NULL, hostname, len, NULL, ip_addr, expiry); - if (rc < 0) - cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n", - __func__, len, len, hostname); - else - cifs_dbg(FYI, "%s: resolved: %*.*s to %s expiry %llu\n", - __func__, len, len, hostname, *ip_addr, - expiry ? (*expiry) : 0); - return rc; - -name_is_IP_address: - name = kmalloc(len + 1, GFP_KERNEL); - if (!name) - return -ENOMEM; - memcpy(name, hostname, len); - name[len] = 0; - cifs_dbg(FYI, "%s: unc is IP, skipping dns upcall: %s\n", - __func__, name); - *ip_addr = name; - return 0; + scnprintf(s, len, "%.*s.%s", (int)namelen, name, dom); + rc = resolve_name(s, len - 1, ip_addr); + kfree(s); + if (!rc) + return 0; + } + return resolve_name(name, namelen, ip_addr); } diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h index afc0df3812..0dc706f2c4 100644 --- a/fs/cifs/dns_resolve.h +++ b/fs/cifs/dns_resolve.h @@ -11,8 +11,31 @@ #ifndef _DNS_RESOLVE_H #define _DNS_RESOLVE_H +#include +#include "cifsglob.h" +#include "cifsproto.h" + #ifdef __KERNEL__ -extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry); + +int dns_resolve_name(const char *dom, const char *name, + size_t namelen, struct sockaddr *ip_addr); + +static inline int dns_resolve_unc(const char *dom, const char *unc, + struct sockaddr *ip_addr) +{ + const char *name; + size_t namelen; + + if (!unc || strlen(unc) < 3) + return -EINVAL; + + extract_unc_hostname(unc, &name, &namelen); + if (!namelen) + return -EINVAL; + + return dns_resolve_name(dom, name, namelen, ip_addr); +} + #endif /* KERNEL */ #endif /* _DNS_RESOLVE_H */ diff --git a/fs/cifs/file.c b/fs/cifs/file.c index d2f231baa1..0580416165 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -405,6 +405,8 @@ static void cifsFileInfo_put_work(struct work_struct *work) * cifsFileInfo_put - release a reference of file priv data * * Always potentially wait for oplock handler. See _cifsFileInfo_put(). + * + * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file */ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) { @@ -420,8 +422,11 @@ void cifsFileInfo_put(struct cifsFileInfo *cifs_file) * * If @wait_for_oplock_handler is true and we are releasing the last * reference, wait for any running oplock break handler of the file - * and cancel any pending one. If calling this function from the - * oplock break handler, you need to pass false. + * and cancel any pending one. + * + * @cifs_file: cifs/smb3 specific info (eg refcounts) for an open file + * @wait_oplock_handler: must be false if called from oplock_break_handler + * @offload: not offloaded on close and oplock breaks * */ void _cifsFileInfo_put(struct cifsFileInfo *cifs_file, diff --git a/fs/cifs/fs_context.c b/fs/cifs/fs_context.c index 3a6802f56b..2ecdd4c94c 100644 --- a/fs/cifs/fs_context.c +++ b/fs/cifs/fs_context.c @@ -37,7 +37,7 @@ #include "rfc1002pdu.h" #include "fs_context.h" -static DEFINE_MUTEX(cifs_mount_mutex); +DEFINE_MUTEX(cifs_mount_mutex); static const match_table_t cifs_smb_version_tokens = { { Smb_1, SMB1_VERSION_STRING }, @@ -241,6 +241,8 @@ cifs_parse_security_flavors(struct fs_context *fc, char *value, struct smb3_fs_c #endif case Opt_sec_none: ctx->nullauth = 1; + kfree(ctx->username); + ctx->username = NULL; break; default: cifs_errorf(fc, "bad security option: %s\n", value); @@ -318,7 +320,6 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx { memcpy(new_ctx, ctx, sizeof(*ctx)); new_ctx->prepath = NULL; - new_ctx->mount_options = NULL; new_ctx->nodename = NULL; new_ctx->username = NULL; new_ctx->password = NULL; @@ -327,11 +328,12 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx new_ctx->UNC = NULL; new_ctx->source = NULL; new_ctx->iocharset = NULL; + new_ctx->leaf_fullpath = NULL; + new_ctx->dns_dom = NULL; /* * Make sure to stay in sync with smb3_cleanup_fs_context_contents() */ DUP_CTX_STR(prepath); - DUP_CTX_STR(mount_options); DUP_CTX_STR(username); DUP_CTX_STR(password); DUP_CTX_STR(server_hostname); @@ -340,6 +342,8 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx DUP_CTX_STR(domainname); DUP_CTX_STR(nodename); DUP_CTX_STR(iocharset); + DUP_CTX_STR(leaf_fullpath); + DUP_CTX_STR(dns_dom); return 0; } @@ -451,14 +455,17 @@ out: * but there are some bugs that prevent rename from working if there are * multiple delimiters. * - * Returns a sanitized duplicate of @path. @gfp indicates the GFP_* flags - * for kstrdup. + * Return a sanitized duplicate of @path or NULL for empty prefix paths. + * Otherwise, return ERR_PTR. + * + * @gfp indicates the GFP_* flags for kstrdup. * The caller is responsible for freeing the original. */ #define IS_DELIM(c) ((c) == '/' || (c) == '\\') char *cifs_sanitize_prepath(char *prepath, gfp_t gfp) { char *cursor1 = prepath, *cursor2 = prepath; + char *s; /* skip all prepended delimiters */ while (IS_DELIM(*cursor1)) @@ -479,8 +486,39 @@ char *cifs_sanitize_prepath(char *prepath, gfp_t gfp) if (IS_DELIM(*(cursor2 - 1))) cursor2--; - *(cursor2) = '\0'; - return kstrdup(prepath, gfp); + *cursor2 = '\0'; + if (!*prepath) + return NULL; + s = kstrdup(prepath, gfp); + if (!s) + return ERR_PTR(-ENOMEM); + return s; +} + +/* + * Return full path based on the values of @ctx->{UNC,prepath}. + * + * It is assumed that both values were already parsed by smb3_parse_devname(). + */ +char *smb3_fs_context_fullpath(const struct smb3_fs_context *ctx, char dirsep) +{ + size_t ulen, plen; + char *s; + + ulen = strlen(ctx->UNC); + plen = ctx->prepath ? strlen(ctx->prepath) + 1 : 0; + + s = kmalloc(ulen + plen + 1, GFP_KERNEL); + if (!s) + return ERR_PTR(-ENOMEM); + memcpy(s, ctx->UNC, ulen); + if (plen) { + s[ulen] = dirsep; + memcpy(s + ulen + 1, ctx->prepath, plen); + } + s[ulen + plen] = '\0'; + convert_delimiter(s, dirsep); + return s; } /* @@ -494,6 +532,7 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx) char *pos; const char *delims = "/\\"; size_t len; + int rc; if (unlikely(!devname || !*devname)) { cifs_dbg(VFS, "Device name not specified\n"); @@ -521,6 +560,8 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx) /* now go until next delimiter or end of string */ len = strcspn(pos, delims); + if (!len) + return -EINVAL; /* move "pos" up to delimiter or NULL */ pos += len; @@ -543,8 +584,11 @@ smb3_parse_devname(const char *devname, struct smb3_fs_context *ctx) return 0; ctx->prepath = cifs_sanitize_prepath(pos, GFP_KERNEL); - if (!ctx->prepath) - return -ENOMEM; + if (IS_ERR(ctx->prepath)) { + rc = PTR_ERR(ctx->prepath); + ctx->prepath = NULL; + return rc; + } return 0; } @@ -580,17 +624,12 @@ static const struct fs_context_operations smb3_fs_context_ops = { static int smb3_fs_context_parse_monolithic(struct fs_context *fc, void *data) { - struct smb3_fs_context *ctx = smb3_fc2context(fc); char *options = data, *key; int ret = 0; if (!options) return 0; - ctx->mount_options = kstrdup(data, GFP_KERNEL); - if (ctx->mount_options == NULL) - return -ENOMEM; - ret = security_sb_eat_lsm_opts(options, &fc->security); if (ret) return ret; @@ -729,9 +768,9 @@ static int smb3_get_tree(struct fs_context *fc) if (err) return err; - mutex_lock(&cifs_mount_mutex); + cifs_mount_lock(); ret = smb3_get_tree_common(fc); - mutex_unlock(&cifs_mount_mutex); + cifs_mount_unlock(); return ret; } @@ -1116,12 +1155,13 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, cifs_errorf(fc, "Unknown error parsing devname\n"); goto cifs_parse_mount_err; } - ctx->source = kstrdup(param->string, GFP_KERNEL); - if (ctx->source == NULL) { + ctx->source = smb3_fs_context_fullpath(ctx, '/'); + if (IS_ERR(ctx->source)) { + ctx->source = NULL; cifs_errorf(fc, "OOM when copying UNC string\n"); goto cifs_parse_mount_err; } - fc->source = kstrdup(param->string, GFP_KERNEL); + fc->source = kstrdup(ctx->source, GFP_KERNEL); if (fc->source == NULL) { cifs_errorf(fc, "OOM when copying UNC string\n"); goto cifs_parse_mount_err; @@ -1130,6 +1170,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, case Opt_user: kfree(ctx->username); ctx->username = NULL; + if (ctx->nullauth) + break; if (strlen(param->string) == 0) { /* null user, ie. anonymous authentication */ ctx->nullauth = 1; @@ -1542,8 +1584,6 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx) /* * Make sure this stays in sync with smb3_fs_context_dup() */ - kfree(ctx->mount_options); - ctx->mount_options = NULL; kfree(ctx->username); ctx->username = NULL; kfree_sensitive(ctx->password); @@ -1562,6 +1602,10 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx) ctx->iocharset = NULL; kfree(ctx->prepath); ctx->prepath = NULL; + kfree(ctx->leaf_fullpath); + ctx->leaf_fullpath = NULL; + kfree(ctx->dns_dom); + ctx->dns_dom = NULL; } void diff --git a/fs/cifs/fs_context.h b/fs/cifs/fs_context.h index fbe31355fe..1df6f10276 100644 --- a/fs/cifs/fs_context.h +++ b/fs/cifs/fs_context.h @@ -259,8 +259,11 @@ struct smb3_fs_context { __u16 compression; /* compression algorithm 0xFFFF default 0=disabled */ bool rootfs:1; /* if it's a SMB root file system */ bool witness:1; /* use witness protocol */ - - char *mount_options; + char *leaf_fullpath; + struct cifs_ses *dfs_root_ses; + bool dfs_automount:1; /* set for dfs automount only */ + bool dfs_conn:1; /* set for dfs mounts */ + char *dns_dom; }; extern const struct fs_parameter_spec smb3_fs_parameters[]; @@ -279,4 +282,16 @@ extern void smb3_update_mnt_flags(struct cifs_sb_info *cifs_sb); extern char *cifs_sanitize_prepath(char *prepath, gfp_t gfp); +extern struct mutex cifs_mount_mutex; + +static inline void cifs_mount_lock(void) +{ + mutex_lock(&cifs_mount_mutex); +} + +static inline void cifs_mount_unlock(void) +{ + mutex_unlock(&cifs_mount_mutex); +} + #endif diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index b7645178ad..afce456729 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -57,13 +57,9 @@ static void cifs_set_ops(struct inode *inode) inode->i_data.a_ops = &cifs_addr_ops; break; case S_IFDIR: -#ifdef CONFIG_CIFS_DFS_UPCALL if (IS_AUTOMOUNT(inode)) { - inode->i_op = &cifs_dfs_referral_inode_operations; + inode->i_op = &cifs_namespace_inode_operations; } else { -#else /* NO DFS support, treat as a directory */ - { -#endif inode->i_op = &cifs_dir_inode_ops; inode->i_fop = &cifs_dir_ops; } @@ -796,11 +792,15 @@ static __u64 simple_hashstr(const char *str) * cifs_backup_query_path_info - SMB1 fallback code to get ino * * Fallback code to get file metadata when we don't have access to - * @full_path (EACCES) and have backup creds. + * full_path (EACCES) and have backup creds. * - * @data will be set to search info result buffer - * @resp_buf will be set to cifs resp buf and needs to be freed with - * cifs_buf_release() when done with @data. + * @xid: transaction id used to identify original request in logs + * @tcon: information about the server share we have mounted + * @sb: the superblock stores info such as disk space available + * @full_path: name of the file we are getting the metadata for + * @resp_buf: will be set to cifs resp buf and needs to be freed with + * cifs_buf_release() when done with @data + * @data: will be set to search info result buffer */ static int cifs_backup_query_path_info(int xid, @@ -2263,7 +2263,9 @@ cifs_invalidate_mapping(struct inode *inode) /** * cifs_wait_bit_killable - helper for functions that are sleeping on bit locks - * @word: long word containing the bit lock + * + * @key: currently unused + * @mode: the task state to sleep in */ static int cifs_wait_bit_killable(struct wait_bit_key *key, int mode) diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c index c17d5d57f5..3570e9f16b 100644 --- a/fs/cifs/misc.c +++ b/fs/cifs/misc.c @@ -21,6 +21,7 @@ #include "cifsfs.h" #ifdef CONFIG_CIFS_DFS_UPCALL #include "dns_resolve.h" +#include "dfs.h" #endif #include "fs_context.h" @@ -92,6 +93,7 @@ sesInfoFree(struct cifs_ses *buf_to_free) return; } + unload_nls(buf_to_free->local_nls); atomic_dec(&sesInfoAllocCount); kfree(buf_to_free->serverOS); kfree(buf_to_free->serverDomain); @@ -99,6 +101,7 @@ sesInfoFree(struct cifs_ses *buf_to_free) kfree_sensitive(buf_to_free->password); kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); + kfree(buf_to_free->dns_dom); kfree_sensitive(buf_to_free->auth_key.response); spin_lock(&buf_to_free->iface_lock); list_for_each_entry_safe(iface, niface, &buf_to_free->iface_list, @@ -133,6 +136,9 @@ tconInfoAlloc(void) spin_lock_init(&ret_buf->stat_lock); atomic_set(&ret_buf->num_local_opens, 0); atomic_set(&ret_buf->num_remote_opens, 0); +#ifdef CONFIG_CIFS_DFS_UPCALL + INIT_LIST_HEAD(&ret_buf->dfs_ses_list); +#endif return ret_buf; } @@ -148,6 +154,7 @@ tconInfoFree(struct cifs_tcon *buf_to_free) kfree(buf_to_free->nativeFileSystem); kfree_sensitive(buf_to_free->password); kfree(buf_to_free->crfid.fid); + kfree(buf_to_free->origin_fullpath); kfree(buf_to_free); } @@ -691,9 +698,9 @@ parse_dfs_referrals(struct get_dfs_referral_rsp *rsp, u32 rsp_size, *num_of_nodes = le16_to_cpu(rsp->NumberOfReferrals); if (*num_of_nodes < 1) { - cifs_dbg(VFS, "num_referrals: must be at least > 0, but we get num_referrals = %d\n", - *num_of_nodes); - rc = -EINVAL; + cifs_dbg(VFS | ONCE, "%s: [path=%s] num_referrals must be at least > 0, but we got %d\n", + __func__, searchName, *num_of_nodes); + rc = -ENOENT; goto parse_DFS_referrals_exit; } @@ -1040,20 +1047,26 @@ struct super_cb_data { struct super_block *sb; }; -static void tcp_super_cb(struct super_block *sb, void *arg) +static void tcon_super_cb(struct super_block *sb, void *arg) { struct super_cb_data *sd = arg; - struct TCP_Server_Info *server = sd->data; struct cifs_sb_info *cifs_sb; - struct cifs_tcon *tcon; + struct cifs_tcon *t1 = sd->data, *t2; if (sd->sb) return; cifs_sb = CIFS_SB(sb); - tcon = cifs_sb_master_tcon(cifs_sb); - if (tcon->ses->server == server) + t2 = cifs_sb_master_tcon(cifs_sb); + + spin_lock(&t2->tc_lock); + if ((t1->ses == t2->ses || + t1->ses->dfs_root_ses == t2->ses->dfs_root_ses) && + t1->ses->server == t2->ses->server && + t2->origin_fullpath && + dfs_src_pathname_equal(t2->origin_fullpath, t1->origin_fullpath)) sd->sb = sb; + spin_unlock(&t2->tc_lock); } static struct super_block *__cifs_get_super(void (*f)(struct super_block *, void *), @@ -1079,6 +1092,7 @@ static struct super_block *__cifs_get_super(void (*f)(struct super_block *, void return sd.sb; } } + pr_warn_once("%s: could not find dfs superblock\n", __func__); return ERR_PTR(-EINVAL); } @@ -1088,9 +1102,15 @@ static void __cifs_put_super(struct super_block *sb) cifs_sb_deactive(sb); } -struct super_block *cifs_get_tcp_super(struct TCP_Server_Info *server) +struct super_block *cifs_get_dfs_tcon_super(struct cifs_tcon *tcon) { - return __cifs_get_super(tcp_super_cb, server); + spin_lock(&tcon->tc_lock); + if (!tcon->origin_fullpath) { + spin_unlock(&tcon->tc_lock); + return ERR_PTR(-ENOENT); + } + spin_unlock(&tcon->tc_lock); + return __cifs_get_super(tcon_super_cb, tcon); } void cifs_put_tcp_super(struct super_block *sb) @@ -1100,64 +1120,45 @@ void cifs_put_tcp_super(struct super_block *sb) #ifdef CONFIG_CIFS_DFS_UPCALL int match_target_ip(struct TCP_Server_Info *server, - const char *share, size_t share_len, + const char *host, size_t hostlen, bool *result) { + struct sockaddr_storage ss; int rc; - char *target, *tip = NULL; - struct sockaddr tipaddr; + + cifs_dbg(FYI, "%s: hostname=%.*s\n", __func__, (int)hostlen, host); *result = false; - target = kzalloc(share_len + 3, GFP_KERNEL); - if (!target) { - rc = -ENOMEM; - goto out; - } - - scnprintf(target, share_len + 3, "\\\\%.*s", (int)share_len, share); - - cifs_dbg(FYI, "%s: target name: %s\n", __func__, target + 2); - - rc = dns_resolve_server_name_to_ip(target, &tip, NULL); + rc = dns_resolve_name(server->dns_dom, host, hostlen, + (struct sockaddr *)&ss); if (rc < 0) - goto out; - - cifs_dbg(FYI, "%s: target ip: %s\n", __func__, tip); - - if (!cifs_convert_address(&tipaddr, tip, strlen(tip))) { - cifs_dbg(VFS, "%s: failed to convert target ip address\n", - __func__); - rc = -EINVAL; - goto out; - } + return rc; spin_lock(&server->srv_lock); - *result = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, - &tipaddr); + *result = cifs_match_ipaddr((struct sockaddr *)&server->dstaddr, (struct sockaddr *)&ss); spin_unlock(&server->srv_lock); - cifs_dbg(FYI, "%s: ip addresses match: %u\n", __func__, *result); - rc = 0; - -out: - kfree(target); - kfree(tip); - - return rc; + cifs_dbg(FYI, "%s: ip addresses matched: %s\n", __func__, *result ? "yes" : "no"); + return 0; } int cifs_update_super_prepath(struct cifs_sb_info *cifs_sb, char *prefix) { + int rc; + kfree(cifs_sb->prepath); + cifs_sb->prepath = NULL; if (prefix && *prefix) { cifs_sb->prepath = cifs_sanitize_prepath(prefix, GFP_ATOMIC); - if (!cifs_sb->prepath) - return -ENOMEM; - - convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb)); - } else - cifs_sb->prepath = NULL; + if (IS_ERR(cifs_sb->prepath)) { + rc = PTR_ERR(cifs_sb->prepath); + cifs_sb->prepath = NULL; + return rc; + } + if (cifs_sb->prepath) + convert_delimiter(cifs_sb->prepath, CIFS_DIR_SEP(cifs_sb)); + } cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_USE_PREFIX_PATH; return 0; diff --git a/fs/cifs/namespace.c b/fs/cifs/namespace.c new file mode 100644 index 0000000000..c0ba43c961 --- /dev/null +++ b/fs/cifs/namespace.c @@ -0,0 +1,290 @@ +/* + * Contains mounting routines used for handling traversal via SMB junctions. + * + * Copyright (c) 2007 Igor Mammedov + * Copyright (C) International Business Machines Corp., 2008 + * Author(s): Igor Mammedov (niallain@gmail.com) + * Steve French (sfrench@us.ibm.com) + * Copyright (c) 2023 Paulo Alcantara + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include "cifsglob.h" +#include "cifsproto.h" +#include "cifsfs.h" +#include "cifs_debug.h" +#include "fs_context.h" + +static LIST_HEAD(cifs_automount_list); + +static void cifs_expire_automounts(struct work_struct *work); +static DECLARE_DELAYED_WORK(cifs_automount_task, + cifs_expire_automounts); +static int cifs_mountpoint_expiry_timeout = 500 * HZ; + +static void cifs_expire_automounts(struct work_struct *work) +{ + struct list_head *list = &cifs_automount_list; + + mark_mounts_for_expiry(list); + if (!list_empty(list)) + schedule_delayed_work(&cifs_automount_task, + cifs_mountpoint_expiry_timeout); +} + +void cifs_release_automount_timer(void) +{ + if (WARN_ON(!list_empty(&cifs_automount_list))) + return; + cancel_delayed_work_sync(&cifs_automount_task); +} + +/** + * cifs_build_devname - build a devicename from a UNC and optional prepath + * @nodename: pointer to UNC string + * @prepath: pointer to prefixpath (or NULL if there isn't one) + * + * Build a new cifs devicename after chasing a DFS referral. Allocate a buffer + * big enough to hold the final thing. Copy the UNC from the nodename, and + * concatenate the prepath onto the end of it if there is one. + * + * Returns pointer to the built string, or a ERR_PTR. Caller is responsible + * for freeing the returned string. + */ +char * +cifs_build_devname(char *nodename, const char *prepath) +{ + size_t pplen; + size_t unclen; + char *dev; + char *pos; + + /* skip over any preceding delimiters */ + nodename += strspn(nodename, "\\"); + if (!*nodename) + return ERR_PTR(-EINVAL); + + /* get length of UNC and set pos to last char */ + unclen = strlen(nodename); + pos = nodename + unclen - 1; + + /* trim off any trailing delimiters */ + while (*pos == '\\') { + --pos; + --unclen; + } + + /* allocate a buffer: + * +2 for preceding "//" + * +1 for delimiter between UNC and prepath + * +1 for trailing NULL + */ + pplen = prepath ? strlen(prepath) : 0; + dev = kmalloc(2 + unclen + 1 + pplen + 1, GFP_KERNEL); + if (!dev) + return ERR_PTR(-ENOMEM); + + pos = dev; + /* add the initial "//" */ + *pos = '/'; + ++pos; + *pos = '/'; + ++pos; + + /* copy in the UNC portion from referral */ + memcpy(pos, nodename, unclen); + pos += unclen; + + /* copy the prefixpath remainder (if there is one) */ + if (pplen) { + *pos = '/'; + ++pos; + memcpy(pos, prepath, pplen); + pos += pplen; + } + + /* NULL terminator */ + *pos = '\0'; + + convert_delimiter(dev, '/'); + return dev; +} + +static bool is_dfs_mount(struct dentry *dentry) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + bool ret; + + spin_lock(&tcon->tc_lock); + ret = !!tcon->origin_fullpath; + spin_unlock(&tcon->tc_lock); + return ret; +} + +/* Return full path out of a dentry set for automount */ +static char *automount_fullpath(struct dentry *dentry, void *page) +{ + struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); + struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); + size_t len; + char *s; + + spin_lock(&tcon->tc_lock); + if (unlikely(!tcon->origin_fullpath)) { + spin_unlock(&tcon->tc_lock); + return ERR_PTR(-EREMOTE); + } + spin_unlock(&tcon->tc_lock); + + s = dentry_path_raw(dentry, page, PATH_MAX); + if (IS_ERR(s)) + return s; + /* for root, we want "" */ + if (!s[1]) + s++; + + spin_lock(&tcon->tc_lock); + len = strlen(tcon->origin_fullpath); + if (s < (char *)page + len) { + spin_unlock(&tcon->tc_lock); + return ERR_PTR(-ENAMETOOLONG); + } + + s -= len; + memcpy(s, tcon->origin_fullpath, len); + spin_unlock(&tcon->tc_lock); + convert_delimiter(s, '/'); + + return s; +} + +static void fs_context_set_ids(struct smb3_fs_context *ctx) +{ + kuid_t uid = current_fsuid(); + kgid_t gid = current_fsgid(); + + if (ctx->multiuser) { + if (!ctx->uid_specified) + ctx->linux_uid = uid; + if (!ctx->gid_specified) + ctx->linux_gid = gid; + } + if (!ctx->cruid_specified) + ctx->cred_uid = uid; +} + +/* + * Create a vfsmount that we can automount + */ +static struct vfsmount *cifs_do_automount(struct path *path) +{ + int rc; + struct dentry *mntpt = path->dentry; + struct fs_context *fc; + struct cifs_sb_info *cifs_sb; + void *page = NULL; + struct smb3_fs_context *ctx, *cur_ctx; + struct smb3_fs_context tmp; + char *full_path; + struct vfsmount *mnt; + + if (IS_ROOT(mntpt)) + return ERR_PTR(-ESTALE); + + /* + * The MSDFS spec states that paths in DFS referral requests and + * responses must be prefixed by a single '\' character instead of + * the double backslashes usually used in the UNC. This function + * gives us the latter, so we must adjust the result. + */ + cifs_sb = CIFS_SB(mntpt->d_sb); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) + return ERR_PTR(-EREMOTE); + + cur_ctx = cifs_sb->ctx; + + fc = fs_context_for_submount(path->mnt->mnt_sb->s_type, mntpt); + if (IS_ERR(fc)) + return ERR_CAST(fc); + + ctx = smb3_fc2context(fc); + + page = alloc_dentry_path(); + full_path = automount_fullpath(mntpt, page); + if (IS_ERR(full_path)) { + mnt = ERR_CAST(full_path); + goto out; + } + + tmp = *cur_ctx; + tmp.source = NULL; + tmp.leaf_fullpath = NULL; + tmp.UNC = tmp.prepath = NULL; + tmp.dfs_root_ses = NULL; + fs_context_set_ids(&tmp); + + rc = smb3_fs_context_dup(ctx, &tmp); + if (rc) { + mnt = ERR_PTR(rc); + goto out; + } + + rc = smb3_parse_devname(full_path, ctx); + if (rc) { + mnt = ERR_PTR(rc); + goto out; + } + + ctx->source = smb3_fs_context_fullpath(ctx, '/'); + if (IS_ERR(ctx->source)) { + mnt = ERR_CAST(ctx->source); + ctx->source = NULL; + goto out; + } + ctx->dfs_automount = ctx->dfs_conn = is_dfs_mount(mntpt); + cifs_dbg(FYI, "%s: ctx: source=%s UNC=%s prepath=%s dfs_automount=%d\n", + __func__, ctx->source, ctx->UNC, ctx->prepath, ctx->dfs_automount); + + mnt = fc_mount(fc); +out: + put_fs_context(fc); + free_dentry_path(page); + return mnt; +} + +/* + * Attempt to automount the referral + */ +struct vfsmount *cifs_d_automount(struct path *path) +{ + struct vfsmount *newmnt; + + cifs_dbg(FYI, "%s: %pd\n", __func__, path->dentry); + + newmnt = cifs_do_automount(path); + if (IS_ERR(newmnt)) { + cifs_dbg(FYI, "leaving %s [automount failed]\n" , __func__); + return newmnt; + } + + mntget(newmnt); /* prevent immediate expiration */ + mnt_set_expiry(newmnt, &cifs_automount_list); + schedule_delayed_work(&cifs_automount_task, + cifs_mountpoint_expiry_timeout); + cifs_dbg(FYI, "leaving %s [ok]\n" , __func__); + return newmnt; +} + +const struct inode_operations cifs_namespace_inode_operations = { +}; diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c index 818ec67254..af24615723 100644 --- a/fs/cifs/sess.c +++ b/fs/cifs/sess.c @@ -219,7 +219,7 @@ int cifs_try_adding_channels(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses) spin_lock(&ses->iface_lock); if (!ses->iface_count) { spin_unlock(&ses->iface_lock); - cifs_dbg(VFS, "server %s does not advertise interfaces\n", + cifs_dbg(ONCE, "server %s does not advertise interfaces\n", ses->server->hostname); break; } @@ -330,7 +330,7 @@ cifs_chan_update_iface(struct cifs_ses *ses, struct TCP_Server_Info *server) spin_lock(&ses->iface_lock); if (!ses->iface_count) { spin_unlock(&ses->iface_lock); - cifs_dbg(VFS, "server %s does not advertise interfaces\n", ses->server->hostname); + cifs_dbg(ONCE, "server %s does not advertise interfaces\n", ses->server->hostname); return 0; } diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index 8dd994f9f2..2001ea29a0 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -864,6 +864,10 @@ smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *serve * * Assumes @iov does not contain the rfc1002 length and iov[0] has the * SMB2 header. + * + * @ses: server session structure + * @iov: array containing the SMB request we will send to the server + * @nvec: number of array entries for the iov */ int smb311_update_preauth_hash(struct cifs_ses *ses, struct TCP_Server_Info *server, diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index c61bf61081..48e2004216 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -43,6 +43,8 @@ static int change_conf(struct TCP_Server_Info *server) { server->credits += server->echo_credits + server->oplock_credits; + if (server->credits > server->max_credits) + server->credits = server->max_credits; server->oplock_credits = server->echo_credits = 0; switch (server->credits) { case 0: @@ -519,6 +521,43 @@ smb3_negotiate_rsize(struct cifs_tcon *tcon, struct smb3_fs_context *ctx) return rsize; } +/* + * compare two interfaces a and b + * return 0 if everything matches. + * return 1 if a is rdma capable, or rss capable, or has higher link speed + * return -1 otherwise. + */ +static int +iface_cmp(struct cifs_server_iface *a, struct cifs_server_iface *b) +{ + int cmp_ret = 0; + + WARN_ON(!a || !b); + if (a->rdma_capable == b->rdma_capable) { + if (a->rss_capable == b->rss_capable) { + if (a->speed == b->speed) { + cmp_ret = cifs_ipaddr_cmp((struct sockaddr *) &a->sockaddr, + (struct sockaddr *) &b->sockaddr); + if (!cmp_ret) + return 0; + else if (cmp_ret > 0) + return 1; + else + return -1; + } else if (a->speed > b->speed) + return 1; + else + return -1; + } else if (a->rss_capable > b->rss_capable) + return 1; + else + return -1; + } else if (a->rdma_capable > b->rdma_capable) + return 1; + else + return -1; +} + static int parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf, size_t buf_len, struct cifs_ses *ses, bool in_mount) @@ -2909,7 +2948,7 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, struct fsctl_get_dfs_referral_req *dfs_req = NULL; struct get_dfs_referral_rsp *dfs_rsp = NULL; u32 dfs_req_size = 0, dfs_rsp_size = 0; - int retry_count = 0; + int retry_once = 0; cifs_dbg(FYI, "%s: path: %s\n", __func__, search_name); @@ -2955,19 +2994,25 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, /* Path to resolve in an UTF-16 null-terminated string */ memcpy(dfs_req->RequestFileName, utf16_path, utf16_path_len); - do { + for (;;) { rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_DFS_GET_REFERRALS, (char *)dfs_req, dfs_req_size, CIFSMaxBufSize, (char **)&dfs_rsp, &dfs_rsp_size); - if (!is_retryable_error(rc)) + if (fatal_signal_pending(current)) { + rc = -EINTR; + break; + } + if (!is_retryable_error(rc) || retry_once++) break; usleep_range(512, 2048); - } while (++retry_count < 5); + } + if (!rc && !dfs_rsp) + rc = -EIO; if (rc) { if (!is_retryable_error(rc) && rc != -ENOENT && rc != -EOPNOTSUPP) - cifs_tcon_dbg(VFS, "%s: ioctl error: rc=%d\n", __func__, rc); + cifs_tcon_dbg(FYI, "%s: ioctl error: rc=%d\n", __func__, rc); goto out; } @@ -2975,9 +3020,9 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses, num_of_nodes, target_nodes, nls_codepage, remap, search_name, true /* is_unicode */); - if (rc) { - cifs_tcon_dbg(VFS, "parse error in %s rc=%d\n", __func__, rc); - goto out; + if (rc && rc != -ENOENT) { + cifs_tcon_dbg(VFS, "%s: failed to parse DFS referral %s: %d\n", + __func__, search_name, rc); } out: diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index ac70d013b2..418af8fa66 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -142,9 +142,8 @@ static int smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, struct TCP_Server_Info *server) { - int rc = 0; - struct nls_table *nls_codepage = NULL; struct cifs_ses *ses; + int rc = 0; /* * SMB2s NegProt, SessSetup, Logoff do not have tcon yet so @@ -154,23 +153,15 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon, if (tcon == NULL) return 0; - /* - * Need to also skip SMB2_IOCTL because it is used for checking nested dfs links in - * cifs_tree_connect(). - */ - if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL) + if (smb2_command == SMB2_TREE_CONNECT) return 0; spin_lock(&tcon->tc_lock); if (tcon->status == TID_EXITING) { /* - * only tree disconnect, open, and write, - * (and ulogoff which does not have tcon) - * are allowed as we start force umount. + * only tree disconnect allowed when disconnecting ... */ - if ((smb2_command != SMB2_WRITE) && - (smb2_command != SMB2_CREATE) && - (smb2_command != SMB2_TREE_DISCONNECT)) { + if (smb2_command != SMB2_TREE_DISCONNECT) { spin_unlock(&tcon->tc_lock); cifs_dbg(FYI, "can not send cmd %d while umounting\n", smb2_command); @@ -245,8 +236,6 @@ again: } spin_unlock(&server->srv_lock); - nls_codepage = load_nls_default(); - /* * need to prevent multiple threads trying to simultaneously * reconnect the same SMB session @@ -269,7 +258,7 @@ again: rc = cifs_negotiate_protocol(0, ses, server); if (!rc) { - rc = cifs_setup_session(0, ses, server, nls_codepage); + rc = cifs_setup_session(0, ses, server, ses->local_nls); if ((rc == -EACCES) && !tcon->retry) { mutex_unlock(&ses->session_mutex); rc = -EHOSTDOWN; @@ -292,7 +281,7 @@ skip_sess_setup: if (tcon->use_persistent) tcon->need_reopen_files = true; - rc = cifs_tree_connect(0, tcon, nls_codepage); + rc = cifs_tree_connect(0, tcon); mutex_unlock(&ses->session_mutex); cifs_dbg(FYI, "reconnect tcon rc = %d\n", rc); @@ -324,10 +313,10 @@ out: case SMB2_CHANGE_NOTIFY: case SMB2_QUERY_INFO: case SMB2_SET_INFO: + case SMB2_IOCTL: rc = -EAGAIN; } failed: - unload_nls(nls_codepage); return rc; } @@ -1290,7 +1279,12 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data) } /* enough to enable echos and oplocks and one max size write */ - req->hdr.CreditRequest = cpu_to_le16(130); + if (server->credits >= server->max_credits) + req->hdr.CreditRequest = cpu_to_le16(0); + else + req->hdr.CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, 130)); /* only one of SMB2 signing flags may be set in SMB2 request */ if (server->sign) @@ -1872,7 +1866,12 @@ SMB2_tcon(const unsigned int xid, struct cifs_ses *ses, const char *tree, rqst.rq_nvec = 2; /* Need 64 for max size write so ask for more in case not there yet */ - req->hdr.CreditRequest = cpu_to_le16(64); + if (server->credits >= server->max_credits) + req->hdr.CreditRequest = cpu_to_le16(0); + else + req->hdr.CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, 64)); rc = cifs_send_recv(xid, ses, server, &rqst, &resp_buftype, flags, &rsp_iov); @@ -1928,7 +1927,7 @@ tcon_exit: tcon_error_exit: if (rsp && rsp->hdr.Status == STATUS_BAD_NETWORK_NAME) - cifs_tcon_dbg(VFS, "BAD_NETWORK_NAME: %s\n", tree); + cifs_dbg(VFS | ONCE, "BAD_NETWORK_NAME: %s\n", tree); goto tcon_exit; } @@ -3821,7 +3820,7 @@ void smb2_reconnect_server(struct work_struct *work) if (ses->tcon_ipc && ses->tcon_ipc->need_reconnect) { list_add_tail(&ses->tcon_ipc->rlist, &tmp_list); tcon_selected = tcon_exist = true; - ses->ses_count++; + cifs_smb_ses_inc_refcount(ses); } /* * handle the case where channel needs to reconnect @@ -3832,7 +3831,7 @@ void smb2_reconnect_server(struct work_struct *work) if (!tcon_selected && cifs_chan_needs_reconnect(ses, server)) { list_add_tail(&ses->rlist, &tmp_ses_list); ses_exist = true; - ses->ses_count++; + cifs_smb_ses_inc_refcount(ses); } spin_unlock(&ses->chan_lock); } @@ -4217,6 +4216,7 @@ smb2_async_readv(struct cifs_readdata *rdata) struct TCP_Server_Info *server; struct cifs_tcon *tcon = tlink_tcon(rdata->cfile->tlink); unsigned int total_len; + int credit_request; cifs_dbg(FYI, "%s: offset=%llu bytes=%u\n", __func__, rdata->offset, rdata->bytes); @@ -4248,7 +4248,13 @@ smb2_async_readv(struct cifs_readdata *rdata) if (rdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8); + credit_request = le16_to_cpu(shdr->CreditCharge) + 8; + if (server->credits >= server->max_credits) + shdr->CreditRequest = cpu_to_le16(0); + else + shdr->CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, credit_request)); rc = adjust_credits(server, &rdata->credits, rdata->bytes); if (rc) @@ -4456,6 +4462,7 @@ smb2_async_writev(struct cifs_writedata *wdata, struct kvec iov[1]; struct smb_rqst rqst = { }; unsigned int total_len; + int credit_request; if (!wdata->server) server = wdata->server = cifs_pick_channel(tcon->ses); @@ -4555,7 +4562,13 @@ smb2_async_writev(struct cifs_writedata *wdata, if (wdata->credits.value > 0) { shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes, SMB2_MAX_BUFFER_SIZE)); - shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8); + credit_request = le16_to_cpu(shdr->CreditCharge) + 8; + if (server->credits >= server->max_credits) + shdr->CreditRequest = cpu_to_le16(0); + else + shdr->CreditRequest = cpu_to_le16( + min_t(int, server->max_credits - + server->credits, credit_request)); rc = adjust_credits(server, &wdata->credits, wdata->bytes); if (rc) diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index ee4c7e55d3..8a6d1d4e7b 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -163,7 +163,7 @@ smb2_find_smb_ses_unlocked(struct TCP_Server_Info *server, __u64 ses_id) spin_unlock(&ses->ses_lock); continue; } - ++ses->ses_count; + cifs_smb_ses_inc_refcount(ses); spin_unlock(&ses->ses_lock); return ses; } diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c index 6668e7c4e3..262bc4bd3a 100644 --- a/fs/gfs2/file.c +++ b/fs/gfs2/file.c @@ -778,9 +778,13 @@ static inline bool should_fault_in_pages(struct iov_iter *i, if (!iter_is_iovec(i)) return false; + /* + * Try to fault in multiple pages initially. When that doesn't result + * in any progress, fall back to a single page. + */ size = PAGE_SIZE; offs = offset_in_page(iocb->ki_pos); - if (*prev_count != count || !*window_size) { + if (*prev_count != count) { size_t nr_dirtied; nr_dirtied = max(current->nr_dirtied_pause - @@ -862,6 +866,7 @@ static ssize_t gfs2_file_direct_write(struct kiocb *iocb, struct iov_iter *from, struct gfs2_inode *ip = GFS2_I(inode); size_t prev_count = 0, window_size = 0; size_t written = 0; + bool enough_retries; ssize_t ret; /* @@ -904,11 +909,17 @@ retry: if (ret > 0) written = ret; + enough_retries = prev_count == iov_iter_count(from) && + window_size <= PAGE_SIZE; if (should_fault_in_pages(from, iocb, &prev_count, &window_size)) { gfs2_glock_dq(gh); window_size -= fault_in_iov_iter_readable(from, window_size); - if (window_size) - goto retry; + if (window_size) { + if (!enough_retries) + goto retry; + /* fall back to buffered I/O */ + ret = 0; + } } out_unlock: if (gfs2_holder_queued(gh)) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c71c0e97c9..2b581c59fa 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -871,7 +871,7 @@ retry: goto fail_gunlock4; mark_inode_dirty(inode); - d_instantiate(dentry, inode); + d_instantiate_new(dentry, inode); /* After instantiate, errors should result in evict which will destroy * both inode and iopen glocks properly. */ if (file) { @@ -883,7 +883,6 @@ retry: gfs2_glock_dq_uninit(&gh); gfs2_glock_put(io_gl); gfs2_qa_put(dip); - unlock_new_inode(inode); return error; fail_gunlock4: @@ -2168,6 +2167,14 @@ static int gfs2_getattr(const struct path *path, struct kstat *stat, return 0; } +static bool fault_in_fiemap(struct fiemap_extent_info *fi) +{ + struct fiemap_extent __user *dest = fi->fi_extents_start; + size_t size = sizeof(*dest) * fi->fi_extents_max; + + return fault_in_safe_writeable((char __user *)dest, size) == 0; +} + static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { @@ -2177,14 +2184,22 @@ static int gfs2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, inode_lock_shared(inode); +retry: ret = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, 0, &gh); if (ret) goto out; + pagefault_disable(); ret = iomap_fiemap(inode, fieinfo, start, len, &gfs2_iomap_ops); + pagefault_enable(); gfs2_glock_dq_uninit(&gh); + if (ret == -EFAULT && fault_in_fiemap(fieinfo)) { + fieinfo->fi_extents_mapped = 0; + goto retry; + } + out: inode_unlock_shared(inode); return ret; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 756fafdea4..07d912f3f9 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -1282,6 +1282,9 @@ static enum evict_behavior evict_should_delete(struct inode *inode, struct gfs2_sbd *sdp = sb->s_fs_info; int ret; + if (inode->i_nlink) + return EVICT_SHOULD_SKIP_DELETE; + if (gfs2_holder_initialized(&ip->i_iopen_gh) && test_bit(GLF_DEFER_DELETE, &ip->i_iopen_gh.gh_gl->gl_flags)) return EVICT_SHOULD_DEFER_DELETE; @@ -1301,11 +1304,9 @@ static enum evict_behavior evict_should_delete(struct inode *inode, if (ret) return EVICT_SHOULD_SKIP_DELETE; - if (test_bit(GLF_INSTANTIATE_NEEDED, &ip->i_gl->gl_flags)) { - ret = gfs2_instantiate(gh); - if (ret) - return EVICT_SHOULD_SKIP_DELETE; - } + ret = gfs2_instantiate(gh); + if (ret) + return EVICT_SHOULD_SKIP_DELETE; /* * The inode may have been recreated in the meantime. @@ -1322,12 +1323,18 @@ static enum evict_behavior evict_should_delete(struct inode *inode, /** * evict_unlinked_inode - delete the pieces of an unlinked evicted inode * @inode: The inode to evict + * @gh: The glock holder structure */ -static int evict_unlinked_inode(struct inode *inode) +static int evict_unlinked_inode(struct inode *inode, struct gfs2_holder *gh) { struct gfs2_inode *ip = GFS2_I(inode); + struct gfs2_glock *gl = ip->i_gl; int ret; + /* The inode glock must be held exclusively and be instantiated. */ + BUG_ON(!gfs2_holder_initialized(gh) || + test_bit(GLF_INSTANTIATE_NEEDED, &gl->gl_flags)); + if (S_ISDIR(inode->i_mode) && (ip->i_diskflags & GFS2_DIF_EXHASH)) { ret = gfs2_dir_exhash_dealloc(ip); @@ -1347,9 +1354,6 @@ static int evict_unlinked_inode(struct inode *inode) goto out; } - if (ip->i_gl) - gfs2_inode_remember_delete(ip->i_gl, ip->i_no_formal_ino); - /* * As soon as we clear the bitmap for the dinode, gfs2_create_inode() * can get called to recreate it, or even gfs2_inode_lookup() if the @@ -1363,6 +1367,9 @@ static int evict_unlinked_inode(struct inode *inode) */ ret = gfs2_dinode_dealloc(ip); + if (!ret) + gfs2_inode_remember_delete(gl, ip->i_no_formal_ino); + out: return ret; } @@ -1370,33 +1377,40 @@ out: /* * evict_linked_inode - evict an inode whose dinode has not been unlinked * @inode: The inode to evict + * @gh: The glock holder structure */ -static int evict_linked_inode(struct inode *inode) +static int evict_linked_inode(struct inode *inode, struct gfs2_holder *gh) { struct super_block *sb = inode->i_sb; struct gfs2_sbd *sdp = sb->s_fs_info; struct gfs2_inode *ip = GFS2_I(inode); - struct address_space *metamapping; + struct gfs2_glock *gl = ip->i_gl; + struct address_space *metamapping = gfs2_glock2aspace(gl); int ret; - gfs2_log_flush(sdp, ip->i_gl, GFS2_LOG_HEAD_FLUSH_NORMAL | + if (!(test_bit(GLF_DIRTY, &gl->gl_flags) || inode->i_flags & I_DIRTY)) + goto clean; + + /* The inode glock must be held exclusively and be instantiated. */ + if (!gfs2_holder_initialized(gh)) + ret = gfs2_glock_nq_init(gl, LM_ST_EXCLUSIVE, 0, gh); + else + ret = gfs2_instantiate(gh); + if (ret) + return ret; + + gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL | GFS2_LFC_EVICT_INODE); - metamapping = gfs2_glock2aspace(ip->i_gl); - if (test_bit(GLF_DIRTY, &ip->i_gl->gl_flags)) { + if (test_bit(GLF_DIRTY, &gl->gl_flags)) { filemap_fdatawrite(metamapping); filemap_fdatawait(metamapping); } write_inode_now(inode, 1); - gfs2_ail_flush(ip->i_gl, 0); + gfs2_ail_flush(gl, 0); - ret = gfs2_trans_begin(sdp, 0, sdp->sd_jdesc->jd_blocks); - if (ret) - return ret; - - /* Needs to be done before glock release & also in a transaction */ +clean: truncate_inode_pages(&inode->i_data, 0); truncate_inode_pages(metamapping, 0); - gfs2_trans_end(sdp); return 0; } @@ -1431,7 +1445,7 @@ static void gfs2_evict_inode(struct inode *inode) int ret; gfs2_holder_mark_uninitialized(&gh); - if (inode->i_nlink || sb_rdonly(sb) || !ip->i_no_addr) + if (sb_rdonly(sb) || !ip->i_no_addr || !ip->i_gl) goto out; /* @@ -1455,14 +1469,14 @@ static void gfs2_evict_inode(struct inode *inode) behavior = EVICT_SHOULD_SKIP_DELETE; } if (behavior == EVICT_SHOULD_DELETE) - ret = evict_unlinked_inode(inode); + ret = evict_unlinked_inode(inode, &gh); else - ret = evict_linked_inode(inode); + ret = evict_linked_inode(inode, &gh); if (gfs2_rs_active(&ip->i_res)) gfs2_rs_deltree(&ip->i_res); - if (ret && ret != GLR_TRYFAILED && ret != -EROFS) + if (ret && ret != -EROFS) fs_warn(sdp, "gfs2_evict_inode: %d\n", ret); out: if (gfs2_holder_initialized(&gh)) diff --git a/fs/nls/nls_base.c b/fs/nls/nls_base.c index 52ccd34b1e..a026dbd359 100644 --- a/fs/nls/nls_base.c +++ b/fs/nls/nls_base.c @@ -272,7 +272,7 @@ int unregister_nls(struct nls_table * nls) return -EINVAL; } -static struct nls_table *find_nls(char *charset) +static struct nls_table *find_nls(const char *charset) { struct nls_table *nls; spin_lock(&nls_lock); @@ -288,7 +288,7 @@ static struct nls_table *find_nls(char *charset) return nls; } -struct nls_table *load_nls(char *charset) +struct nls_table *load_nls(const char *charset) { return try_then_request_module(find_nls(charset), "nls_%s", charset); } diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 972172d048..7b3cd3edac 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -3268,7 +3268,7 @@ xfs_alloc_vextent( { xfs_agblock_t agsize; /* allocation group size */ int error; - int flags; /* XFS_ALLOC_FLAG_... locking flags */ + int flags = 0; /* XFS_ALLOC_FLAG_... locking flags */ struct xfs_mount *mp; /* mount structure pointer */ xfs_agnumber_t sagno; /* starting allocation group number */ xfs_alloctype_t type; /* input allocation type */ @@ -3359,9 +3359,10 @@ xfs_alloc_vextent( /* * Start with allocation group given by bno. */ - args->agno = XFS_FSB_TO_AGNO(mp, args->fsbno); + args->agno = sagno = XFS_FSB_TO_AGNO(mp, args->fsbno); args->type = XFS_ALLOCTYPE_THIS_AG; - sagno = minimum_agno; + if (args->agno < minimum_agno) + args->agno = sagno = minimum_agno; flags = 0; } else { /* @@ -3403,15 +3404,15 @@ xfs_alloc_vextent( /* * If we are try-locking, we can't deadlock on AGF * locks, so we can wrap all the way back to the first - * AG. Otherwise, wrap back to the start AG so we can't - * deadlock, and let the end of scan handler decide what - * to do next. + * AG. Otherwise, wrap back to the minimum allowable AG + * so we can't deadlock, and let the end of scan handler + * decide what to do next. */ if (++(args->agno) == mp->m_sb.sb_agcount) { if (flags & XFS_ALLOC_FLAG_TRYLOCK) args->agno = 0; else - args->agno = sagno; + args->agno = minimum_agno; } /* @@ -3426,17 +3427,19 @@ xfs_alloc_vextent( } /* - * Blocking pass next, so we must obey minimum + * Blocking pass next. Similar to how FIRST_AG + * mode is handled above, we must obey minimum * agno constraints to avoid ABBA AGF deadlocks. */ flags = 0; - if (minimum_agno > sagno) - sagno = minimum_agno; - if (type == XFS_ALLOCTYPE_START_BNO) { args->agbno = XFS_FSB_TO_AGBNO(mp, args->fsbno); args->type = XFS_ALLOCTYPE_NEAR_BNO; + if (sagno < minimum_agno) { + args->agno = sagno = minimum_agno; + args->type = XFS_ALLOCTYPE_THIS_AG; + } } } xfs_perag_put(args->pag); diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 55400db2b1..dd95225051 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -315,6 +315,20 @@ struct mmu_gather { RH_KABI_FILL_HOLE(unsigned int vma_pfn : 1) + /* + * Did we unshare (unmap) any shared page tables? For now only + * used for hugetlb PMD table sharing. + */ + RH_KABI_FILL_HOLE(unsigned int unshared_tables : 1) + + /* + * Did we unshare any page tables such that they are now exclusive + * and could get reused+modified by the new owner? When setting this + * flag, "unshared_tables" will be set as well. For now only used + * for hugetlb PMD table sharing. + */ + RH_KABI_FILL_HOLE(unsigned int fully_unshared_tables : 1) + unsigned int batch_count; #ifndef CONFIG_MMU_GATHER_NO_GATHER @@ -351,6 +365,7 @@ static inline void __tlb_reset_range(struct mmu_gather *tlb) tlb->cleared_pmds = 0; tlb->cleared_puds = 0; tlb->cleared_p4ds = 0; + tlb->unshared_tables = 0; /* * Do not reset mmu_gather::vma_* fields here, we do not * call into tlb_start_vma() again to set them if there is an @@ -430,7 +445,7 @@ static inline void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb) * these bits. */ if (!(tlb->freed_tables || tlb->cleared_ptes || tlb->cleared_pmds || - tlb->cleared_puds || tlb->cleared_p4ds)) + tlb->cleared_puds || tlb->cleared_p4ds || tlb->unshared_tables)) return; tlb_flush(tlb); @@ -672,6 +687,62 @@ static inline void tlb_flush_p4d_range(struct mmu_gather *tlb, #endif #endif +#ifdef CONFIG_ARCH_WANT_HUGE_PMD_SHARE +static inline void tlb_unshare_pmd_pte(struct mmu_gather *tlb, pte_t *ptep, + unsigned long addr) +{ + /* + * The caller must make sure that concurrent unsharing + exclusive + * reuse is impossible until tlb_flush_unshared_tables() was called. + */ + put_page(virt_to_page(ptep)); + + /* Clearing a PUD pointing at a PMD table with PMD leaves. */ + tlb_flush_pmd_range(tlb, addr & PUD_MASK, PUD_SIZE); + + /* + * If the page table is now exclusively owned, we fully unshared + * a page table. + */ + if (page_count(virt_to_page(ptep)) == 1) + tlb->fully_unshared_tables = true; + tlb->unshared_tables = true; +} + +static inline void tlb_flush_unshared_tables(struct mmu_gather *tlb) +{ + /* + * As soon as the caller drops locks to allow for reuse of + * previously-shared tables, these tables could get modified and + * even reused outside of hugetlb context, so we have to make sure that + * any page table walkers (incl. TLB, GUP-fast) are aware of that + * change. + * + * Even if we are not fully unsharing a PMD table, we must + * flush the TLB for the unsharer now. + */ + if (tlb->unshared_tables) + tlb_flush_mmu_tlbonly(tlb); + + /* + * Similarly, we must make sure that concurrent GUP-fast will not + * walk previously-shared page tables that are getting modified+reused + * elsewhere. So broadcast an IPI to wait for any concurrent GUP-fast. + * + * We only perform this when we are the last sharer of a page table, + * as the IPI will reach all CPUs: any GUP-fast. + * + * Note that on configs where tlb_remove_table_sync_one() is a NOP, + * the expectation is that the tlb_flush_mmu_tlbonly() would have issued + * required IPIs already for us. + */ + if (tlb->fully_unshared_tables) { + tlb_remove_table_sync_one(); + tlb->fully_unshared_tables = false; + } +} +#endif /* CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ + #endif /* CONFIG_MMU */ #endif /* _ASM_GENERIC__TLB_H */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 810e8ba974..ac21c262a2 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -167,8 +167,9 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr, unsigned long sz); pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr, unsigned long sz); -int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, +int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long *addr, pte_t *ptep); +void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma); void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end); struct page *follow_huge_addr(struct mm_struct *mm, unsigned long address, @@ -207,13 +208,18 @@ static inline struct address_space *hugetlb_page_mapping_lock_write( return NULL; } -static inline int huge_pmd_unshare(struct mm_struct *mm, +static inline int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long *addr, pte_t *ptep) { return 0; } +static inline void huge_pmd_unshare_flush(struct mmu_gather *tlb, + struct vm_area_struct *vma) +{ +} + static inline void adjust_range_if_pmd_sharing_possible( struct vm_area_struct *vma, unsigned long *start, unsigned long *end) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5d6b758af2..cf382f0e83 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -633,6 +633,7 @@ static inline cpumask_t *mm_cpumask(struct mm_struct *mm) struct mmu_gather; extern void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm); extern void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm); +extern void tlb_gather_mmu_vma(struct mmu_gather *tlb, struct vm_area_struct *vma); extern void tlb_finish_mmu(struct mmu_gather *tlb); static inline void init_tlb_flush_pending(struct mm_struct *mm) diff --git a/include/linux/nls.h b/include/linux/nls.h index 499e486b37..e0bf8367b2 100644 --- a/include/linux/nls.h +++ b/include/linux/nls.h @@ -47,7 +47,7 @@ enum utf16_endian { /* nls_base.c */ extern int __register_nls(struct nls_table *, struct module *); extern int unregister_nls(struct nls_table *); -extern struct nls_table *load_nls(char *); +extern struct nls_table *load_nls(const char *charset); extern void unload_nls(struct nls_table *); extern struct nls_table *load_nls_default(void); #define register_nls(nls) __register_nls((nls), THIS_MODULE) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 197c9f9ded..48dd108a3d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3764,6 +3764,18 @@ skb_header_pointer(const struct sk_buff *skb, int offset, int len, void *buffer) skb_headlen(skb), buffer); } +/* Variant of skb_header_pointer() where @offset is user-controlled + * and potentially negative. + */ +static inline void * __must_check +skb_header_pointer_careful(const struct sk_buff *skb, int offset, + int len, void *buffer) +{ + if (unlikely(offset < 0 && -offset > skb_headroom(skb))) + return NULL; + return skb_header_pointer(skb, offset, len, buffer); +} + /** * skb_needs_linearize - check if we need to linearize a given skb * depending on the given device features. diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h index 103a48a488..dc4160706d 100644 --- a/include/linux/uprobes.h +++ b/include/linux/uprobes.h @@ -150,6 +150,7 @@ extern bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check c extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs); extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr, void *src, unsigned long len); +extern unsigned long arch_uprobe_get_xol_area(void); #else /* !CONFIG_UPROBES */ struct uprobes_state { }; diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 661348f23e..e364d2b6e7 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -70,7 +70,7 @@ struct netns_ipv4 { struct sock *mc_autojoin_sk; struct inet_peer_base *peers; - struct sock * __percpu *tcp_sk; + RH_KABI_DEPRECATE(struct sock **, tcp_sk) struct netns_frags frags; #ifdef CONFIG_NETFILTER struct xt_table *iptable_filter; diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index 74245b8a51..e3e8d504e5 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -250,7 +250,8 @@ struct scsi_device { RH_KABI_USE(4, struct sbitmap *budget_map) RH_KABI_USE_SPLIT(5, atomic_t iotmo_cnt) /* partial use */ - RH_KABI_RESERVE(6) + RH_KABI_USE_SPLIT(6, atomic_t ua_new_media_ctr, /* Counter for New Media UNIT ATTENTIONs */ + atomic_t ua_por_ctr) /* Counter for Power On / Reset UAs */ unsigned long sdev_data[0]; } __attribute__((aligned(sizeof(unsigned long)))); @@ -623,6 +624,10 @@ static inline int scsi_device_busy(struct scsi_device *sdev) return sbitmap_weight(sdev->budget_map); } +/* Macros to access the UNIT ATTENTION counters */ +#define scsi_get_ua_new_media_ctr(sdev) atomic_read(&sdev->ua_new_media_ctr) +#define scsi_get_ua_por_ctr(sdev) atomic_read(&sdev->ua_por_ctr) + #define MODULE_ALIAS_SCSI_DEVICE(type) \ MODULE_ALIAS("scsi:t-" __stringify(type) "*") #define SCSI_DEVICE_MODALIAS_FMT "scsi:t-0x%02x" diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c index 5e656d7e63..658825e58a 100644 --- a/kernel/events/uprobes.c +++ b/kernel/events/uprobes.c @@ -1439,6 +1439,12 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon set_bit(MMF_RECALC_UPROBES, &vma->vm_mm->flags); } +unsigned long __weak arch_uprobe_get_xol_area(void) +{ + /* Try to map as high as possible, this is only a hint. */ + return get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, PAGE_SIZE, 0, 0); +} + /* Slot allocation for XOL */ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) { @@ -1454,9 +1460,7 @@ static int xol_add_vma(struct mm_struct *mm, struct xol_area *area) } if (!area->vaddr) { - /* Try to map as high as possible, this is only a hint. */ - area->vaddr = get_unmapped_area(NULL, TASK_SIZE - PAGE_SIZE, - PAGE_SIZE, 0, 0); + area->vaddr = arch_uprobe_get_xol_area(); if (area->vaddr & ~PAGE_MASK) { ret = area->vaddr; goto fail; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index d5cc73b1c8..d858680654 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -4058,7 +4058,6 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, struct hstate *h = hstate_vma(vma); unsigned long sz = huge_page_size(h); struct mmu_notifier_range range; - bool force_flush = false; WARN_ON(!is_vm_hugetlb_page(vma)); BUG_ON(start & ~huge_page_mask(h)); @@ -4085,10 +4084,8 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, vma, &address, ptep)) { + if (huge_pmd_unshare(tlb, vma, &address, ptep)) { spin_unlock(ptl); - tlb_flush_pmd_range(tlb, address & PUD_MASK, PUD_SIZE); - force_flush = true; continue; } @@ -4146,21 +4143,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, mmu_notifier_invalidate_range_end(&range); tlb_end_vma(tlb, vma); - /* - * If we unshared PMDs, the TLB flush was not recorded in mmu_gather. We - * could defer the flush until now, since by holding i_mmap_rwsem we - * guaranteed that the last refernece would not be dropped. But we must - * do the flushing before we return, as otherwise i_mmap_rwsem will be - * dropped and the last reference to the shared PMDs page might be - * dropped as well. - * - * In theory we could defer the freeing of the PMD pages as well, but - * huge_pmd_unshare() relies on the exact page_count for the PMD page to - * detect sharing, so we cannot defer the release of the page either. - * Instead, do flush now. - */ - if (force_flush) - tlb_flush_mmu_tlbonly(tlb); + huge_pmd_unshare_flush(tlb, vma); } void __unmap_hugepage_range_final(struct mmu_gather *tlb, @@ -5233,8 +5216,8 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, pte_t pte; struct hstate *h = hstate_vma(vma); unsigned long pages = 0; - bool shared_pmd = false; struct mmu_notifier_range range; + struct mmu_gather tlb; /* * In the case of shared PMDs, the area to flush could be beyond @@ -5247,6 +5230,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, BUG_ON(address >= end); flush_cache_range(vma, range.start, range.end); + tlb_gather_mmu_vma(&tlb, vma); mmu_notifier_invalidate_range_start(&range); i_mmap_lock_write(vma->vm_file->f_mapping); @@ -5256,10 +5240,9 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, if (!ptep) continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, vma, &address, ptep)) { + if (huge_pmd_unshare(&tlb, vma, &address, ptep)) { pages++; spin_unlock(ptl); - shared_pmd = true; continue; } pte = huge_ptep_get(ptep); @@ -5289,21 +5272,14 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, pte = pte_mkhuge(huge_pte_modify(old_pte, newprot)); pte = arch_make_huge_pte(pte, vma, NULL, 0); huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte); + tlb_remove_huge_tlb_entry(h, &tlb, ptep, address); pages++; } spin_unlock(ptl); } - /* - * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare - * may have cleared our pud entry and done put_page on the page table: - * once we release i_mmap_rwsem, another task can do the final put_page - * and that page table be reused and filled with junk. If we actually - * did unshare a page of pmds, flush the range corresponding to the pud. - */ - if (shared_pmd) - flush_hugetlb_tlb_range(vma, range.start, range.end); - else - flush_hugetlb_tlb_range(vma, start, end); + + tlb_flush_mmu_tlbonly(&tlb); + huge_pmd_unshare_flush(&tlb, vma); /* * No need to call mmu_notifier_invalidate_range() we are downgrading * page table protection not changing it to point to a new page. @@ -5312,6 +5288,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, */ i_mmap_unlock_write(vma->vm_file->f_mapping); mmu_notifier_invalidate_range_end(&range); + tlb_finish_mmu(&tlb); return pages << h->order; } @@ -5639,20 +5616,28 @@ out: } /* - * unmap huge page backed by shared pte. + * huge_pmd_unshare - Unmap a pmd table if it is shared by multiple users + * @tlb: the current mmu_gather. + * @vma: the vma covering the pmd table. + * @addr: the address we are trying to unshare. + * @ptep: pointer into the (pmd) page table. + * + * Called with page table lock held and i_mmap_rwsem held in write mode. + * + * Note: The caller must call huge_pmd_unshare_flush() before dropping the + * i_mmap_rwsem. * * Hugetlb pte page is ref counted at the time of mapping. If pte is shared * indicated by page_count > 1, unmap is achieved by clearing pud and * decrementing the ref count. If count == 1, the pte page is not shared. * - * Called with page table lock held and i_mmap_rwsem held in write mode. - * - * returns: 1 successfully unmapped a shared pte page - * 0 the underlying pte page is not shared, or it is the last user + * Returns: 1 if it was a shared PMD table and it got unmapped, or 0 if it + * was not a shared PMD table. */ -int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long *addr, pte_t *ptep) +int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { + struct mm_struct *mm = vma->vm_mm; pgd_t *pgd = pgd_offset(mm, *addr); p4d_t *p4d = p4d_offset(pgd, *addr); pud_t *pud = pud_offset(p4d, *addr); @@ -5663,20 +5648,8 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, return 0; pud_clear(pud); - /* - * Once our caller drops the rmap lock, some other process might be - * using this page table as a normal, non-hugetlb page table. - * Wait for pending gup_fast() in other threads to finish before letting - * that happen. - * - * RHEL-120391: some customers reported severe interference/performance - * degradation on particular database workloads, thus we are including - * a waiving flag to allow for disabling this CVE mitigation - */ - if (likely(!is_rh_waived(CVE_2025_38085))) - tlb_remove_table_sync_one(); + tlb_unshare_pmd_pte(tlb, ptep, *addr); - put_page(virt_to_page(ptep)); mm_dec_nr_pmds(mm); /* * This update of passed address optimizes loops sequentially @@ -5688,6 +5661,30 @@ int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, *addr |= PUD_SIZE - PMD_SIZE; return 1; } + +/* + * huge_pmd_unshare_flush - Complete a sequence of huge_pmd_unshare() calls + * @tlb: the current mmu_gather. + * @vma: the vma covering the pmd table. + * + * Perform necessary TLB flushes or IPI broadcasts to synchronize PMD table + * unsharing with concurrent page table walkers. + * + * This function must be called after a sequence of huge_pmd_unshare() + * calls while still holding the i_mmap_rwsem. + */ +void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + /* + * We must synchronize page table unsharing such that nobody will + * try reusing a previously-shared page table while it might still + * be in use by previous sharers (TLB, GUP_fast). + */ + i_mmap_assert_write_locked(vma->vm_file->f_mapping); + + tlb_flush_unshared_tables(tlb); +} + #define want_pmd_share() (1) #else /* !CONFIG_ARCH_WANT_HUGE_PMD_SHARE */ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) @@ -5695,12 +5692,16 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) return NULL; } -int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, - unsigned long *addr, pte_t *ptep) +int huge_pmd_unshare(struct mmu_gather *tlb, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { return 0; } +void huge_pmd_unshare_flush(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ +} + void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end) { diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index cabe7d7f14..0af960cb96 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -277,6 +278,7 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, tlb->page_size = 0; #endif + tlb->fully_unshared_tables = 0; __tlb_reset_range(tlb); inc_tlb_flush_pending(tlb->mm); } @@ -291,6 +293,30 @@ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm) __tlb_gather_mmu(tlb, mm, true); } +/** + * tlb_gather_mmu_vma - initialize an mmu_gather structure for operating on a + * single VMA + * @tlb: the mmu_gather structure to initialize + * @vma: the vm_area_struct + * + * Called to initialize an (on-stack) mmu_gather structure for operating on + * a single VMA. In contrast to tlb_gather_mmu(), calling this function will + * not require another call to tlb_start_vma(). In contrast to tlb_start_vma(), + * this function will *not* call flush_cache_range(). + * + * For hugetlb VMAs, this function will also initialize the mmu_gather + * page_size accordingly, not requiring a separate call to + * tlb_change_page_size(). + */ +void tlb_gather_mmu_vma(struct mmu_gather *tlb, struct vm_area_struct *vma) +{ + tlb_gather_mmu(tlb, vma->vm_mm); + tlb_update_vma_flags(tlb, vma); + if (is_vm_hugetlb_page(vma)) + /* All entries have the same size. */ + tlb_change_page_size(tlb, huge_page_size(hstate_vma(vma))); +} + /** * tlb_finish_mmu - finish an mmu_gather structure * @tlb: the mmu_gather structure to finish @@ -300,6 +326,12 @@ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm) */ void tlb_finish_mmu(struct mmu_gather *tlb) { + /* + * We expect an earlier huge_pmd_unshare_flush() call to sort this out, + * due to complicated locking requirements with page table unsharing. + */ + VM_WARN_ON_ONCE(tlb->fully_unshared_tables); + /* * If there are parallel threads are doing PTE changes on same range * under non-exclusive lock (e.g., mmap_lock read-side) but defer TLB diff --git a/mm/rmap.c b/mm/rmap.c index 7e77609266..c4bf10ed12 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -72,7 +72,7 @@ #include #include -#include +#include #include @@ -1519,13 +1519,17 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, address = pvmw.address; if (PageHuge(page) && !PageAnon(page)) { + struct mmu_gather tlb; + /* * To call huge_pmd_unshare, i_mmap_rwsem must be * held in write mode. Caller needs to explicitly * do this outside rmap routines. */ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); - if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) { + + tlb_gather_mmu_vma(&tlb, vma); + if (huge_pmd_unshare(&tlb, vma, &address, pvmw.pte)) { /* * huge_pmd_unshare unmapped an entire PMD * page. There is no way of knowing exactly @@ -1534,7 +1538,8 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * already adjusted above to cover this range. */ flush_cache_range(vma, range.start, range.end); - flush_tlb_range(vma, range.start, range.end); + huge_pmd_unshare_flush(&tlb, vma); + tlb_finish_mmu(&tlb); mmu_notifier_invalidate_range(mm, range.start, range.end); @@ -1550,6 +1555,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, page_vma_mapped_walk_done(&pvmw); break; } + tlb_finish_mmu(&tlb); } if (IS_ENABLED(CONFIG_MIGRATION) && diff --git a/net/atm/lec.c b/net/atm/lec.c index 255b37a863..f58af668b3 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -123,6 +123,7 @@ static unsigned char bus_mac[ETH_ALEN] = { 0xff, 0xff, 0xff, 0xff, 0xff, 0xff }; /* Device structures */ static struct net_device *dev_lec[MAX_LEC_ITF]; +static DEFINE_MUTEX(lec_mutex); #if IS_ENABLED(CONFIG_BRIDGE) static void lec_handle_bridge(struct sk_buff *skb, struct net_device *dev) @@ -686,6 +687,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) int bytes_left; struct atmlec_ioc ioc_data; + lockdep_assert_held(&lec_mutex); /* Lecd must be up in this case */ bytes_left = copy_from_user(&ioc_data, arg, sizeof(struct atmlec_ioc)); if (bytes_left != 0) @@ -711,6 +713,7 @@ static int lec_vcc_attach(struct atm_vcc *vcc, void __user *arg) static int lec_mcast_attach(struct atm_vcc *vcc, int arg) { + lockdep_assert_held(&lec_mutex); if (arg < 0 || arg >= MAX_LEC_ITF) return -EINVAL; arg = array_index_nospec(arg, MAX_LEC_ITF); @@ -726,6 +729,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) int i; struct lec_priv *priv; + lockdep_assert_held(&lec_mutex); if (arg < 0) i = 0; else @@ -745,6 +749,7 @@ static int lecd_attach(struct atm_vcc *vcc, int arg) snprintf(dev_lec[i]->name, IFNAMSIZ, "lec%d", i); if (register_netdev(dev_lec[i])) { free_netdev(dev_lec[i]); + dev_lec[i] = NULL; return -EINVAL; } @@ -912,7 +917,6 @@ static void *lec_itf_walk(struct lec_state *state, loff_t *l) v = (dev && netdev_priv(dev)) ? lec_priv_walk(state, l, netdev_priv(dev)) : NULL; if (!v && dev) { - dev_put(dev); /* Partial state reset for the next time we get called */ dev = NULL; } @@ -936,6 +940,7 @@ static void *lec_seq_start(struct seq_file *seq, loff_t *pos) { struct lec_state *state = seq->private; + mutex_lock(&lec_mutex); state->itf = 0; state->dev = NULL; state->locked = NULL; @@ -953,8 +958,9 @@ static void lec_seq_stop(struct seq_file *seq, void *v) if (state->dev) { spin_unlock_irqrestore(&state->locked->lec_arp_lock, state->flags); - dev_put(state->dev); + state->dev = NULL; } + mutex_unlock(&lec_mutex); } static void *lec_seq_next(struct seq_file *seq, void *v, loff_t *pos) @@ -1012,6 +1018,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return -ENOIOCTLCMD; } + mutex_lock(&lec_mutex); switch (cmd) { case ATMLEC_CTRL: err = lecd_attach(vcc, (int)arg); @@ -1026,6 +1033,7 @@ static int lane_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) break; } + mutex_unlock(&lec_mutex); return err; } diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index dc110d95df..6b6f85f6e7 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -4076,6 +4076,12 @@ int tcp_abort(struct sock *sk, int err) /* Don't race with userspace socket closes such as tcp_close. */ lock_sock(sk); + /* Avoid closing the same socket twice. */ + if (sk->sk_state == TCP_CLOSE) { + release_sock(sk); + return -ENOENT; + } + if (sk->sk_state == TCP_LISTEN) { tcp_set_state(sk, TCP_CLOSE); inet_csk_listen_stop(sk); @@ -4085,19 +4091,16 @@ int tcp_abort(struct sock *sk, int err) local_bh_disable(); bh_lock_sock(sk); - if (!sock_flag(sk, SOCK_DEAD)) { - sk->sk_err = err; - /* This barrier is coupled with smp_rmb() in tcp_poll() */ - smp_wmb(); - sk_error_report(sk); - if (tcp_need_reset(sk->sk_state)) - tcp_send_active_reset(sk, GFP_ATOMIC); - tcp_done(sk); - } + sk->sk_err = err; + /* This barrier is coupled with smp_rmb() in tcp_poll() */ + smp_wmb(); + sk_error_report(sk); + if (tcp_need_reset(sk->sk_state)) + tcp_send_active_reset(sk, GFP_ATOMIC); + tcp_done(sk); bh_unlock_sock(sk); local_bh_enable(); - tcp_write_queue_purge(sk); release_sock(sk); return 0; } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index f9a3862ce8..76b0753d79 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -96,6 +96,8 @@ static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, struct inet_hashinfo tcp_hashinfo; EXPORT_SYMBOL(tcp_hashinfo); +static DEFINE_PER_CPU(struct sock *, ipv4_tcp_sk); + static u32 tcp_v4_init_seq(const struct sk_buff *skb) { return secure_tcp_seq(ip_hdr(skb)->daddr, @@ -802,7 +804,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) arg.tos = ip_hdr(skb)->tos; arg.uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk); + ctl_sk = this_cpu_read(ipv4_tcp_sk); + sock_net_set(ctl_sk, net); if (sk) { ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; @@ -816,6 +819,7 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) txhash); ctl_sk->sk_mark = 0; + sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); __TCP_INC_STATS(net, TCP_MIB_OUTRSTS); local_bh_enable(); @@ -898,7 +902,8 @@ static void tcp_v4_send_ack(const struct sock *sk, arg.tos = tos; arg.uid = sock_net_uid(net, sk_fullsock(sk) ? sk : NULL); local_bh_disable(); - ctl_sk = *this_cpu_ptr(net->ipv4.tcp_sk); + ctl_sk = this_cpu_read(ipv4_tcp_sk); + sock_net_set(ctl_sk, net); if (sk) ctl_sk->sk_mark = (sk->sk_state == TCP_TIME_WAIT) ? inet_twsk(sk)->tw_mark : sk->sk_mark; @@ -909,6 +914,7 @@ static void tcp_v4_send_ack(const struct sock *sk, txhash); ctl_sk->sk_mark = 0; + sock_net_set(ctl_sk, &init_net); __TCP_INC_STATS(net, TCP_MIB_OUTSEGS); local_bh_enable(); } @@ -2740,41 +2746,14 @@ EXPORT_SYMBOL(tcp_prot); static void __net_exit tcp_sk_exit(struct net *net) { - int cpu; - if (net->ipv4.tcp_congestion_control) bpf_module_put(net->ipv4.tcp_congestion_control, net->ipv4.tcp_congestion_control->owner); - - for_each_possible_cpu(cpu) - inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); - free_percpu(net->ipv4.tcp_sk); } static int __net_init tcp_sk_init(struct net *net) { - int res, cpu, cnt; - - net->ipv4.tcp_sk = alloc_percpu(struct sock *); - if (!net->ipv4.tcp_sk) - return -ENOMEM; - - for_each_possible_cpu(cpu) { - struct sock *sk; - - res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, - IPPROTO_TCP, net); - if (res) - goto fail; - sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); - - /* Please enforce IP_DF and IPID==0 for RST and - * ACK sent in SYN-RECV and TIME-WAIT state. - */ - inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; - - *per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk; - } + int cnt; net->ipv4.sysctl_tcp_ecn = 2; net->ipv4.sysctl_tcp_ecn_fallback = 1; @@ -2858,10 +2837,6 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4_sysctl_tcp_shrink_window = 0; return 0; -fail: - tcp_sk_exit(net); - - return res; } static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) @@ -2938,6 +2913,24 @@ static void __init bpf_iter_register(void) void __init tcp_v4_init(void) { + int cpu, res; + + for_each_possible_cpu(cpu) { + struct sock *sk; + + res = inet_ctl_sock_create(&sk, PF_INET, SOCK_RAW, + IPPROTO_TCP, &init_net); + if (res) + panic("Failed to create the TCP control socket.\n"); + sock_set_flag(sk, SOCK_USE_WRITE_QUEUE); + + /* Please enforce IP_DF and IPID==0 for RST and + * ACK sent in SYN-RECV and TIME-WAIT state. + */ + inet_sk(sk)->pmtudisc = IP_PMTUDISC_DO; + + per_cpu(ipv4_tcp_sk, cpu) = sk; + } if (register_pernet_subsys(&tcp_sk_ops)) panic("Failed to create the TCP control socket.\n"); diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 6b66d32dd0..2469f47ce7 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -155,10 +155,8 @@ next_knode: int toff = off + key->off + (off2 & key->offmask); __be32 *data, hdata; - if (skb_headroom(skb) + toff > INT_MAX) - goto out; - - data = skb_header_pointer(skb, toff, 4, &hdata); + data = skb_header_pointer_careful(skb, toff, 4, + &hdata); if (!data) goto out; if ((*data ^ key->val) & key->mask) { @@ -208,8 +206,9 @@ check_terminal: if (ht->divisor) { __be32 *data, hdata; - data = skb_header_pointer(skb, off + n->sel.hoff, 4, - &hdata); + data = skb_header_pointer_careful(skb, + off + n->sel.hoff, + 4, &hdata); if (!data) goto out; sel = ht->divisor & u32_hash_fold(*data, &n->sel, @@ -223,7 +222,7 @@ check_terminal: if (n->sel.flags & TC_U32_VAROFFSET) { __be16 *data, hdata; - data = skb_header_pointer(skb, + data = skb_header_pointer_careful(skb, off + n->sel.offoff, 2, &hdata); if (!data)