diff --git a/kernel.spec b/kernel.spec index 6ee461adc..8b2384b93 100644 --- a/kernel.spec +++ b/kernel.spec @@ -62,7 +62,7 @@ Summary: The Linux kernel # For non-released -rc kernels, this will be appended after the rcX and # gitX tags, so a 3 here would become part of release "0.rcX.gitX.3" # -%global baserelease 3 +%global baserelease 4 %global fedora_build %{baserelease} # base_sublevel is the kernel version we're starting with and patching @@ -713,6 +713,8 @@ Patch14000: hibernate-freeze-filesystems.patch Patch14010: lis3-improve-handling-of-null-rate.patch +Patch20000: uprobes-3.5-tip.patch + # ARM # OMAP Patch21000: arm-omap-3.5-fixes.patch @@ -1413,6 +1415,8 @@ ApplyPatch efi-dont-map-boot-services-on-32bit.patch ApplyPatch lis3-improve-handling-of-null-rate.patch +ApplyPatch uprobes-3.5-tip.patch + ApplyPatch power-x86-destdir.patch ApplyPatch hfsplus-Fix-bless-ioctl-when-used-with-hardlinks.patch @@ -2282,6 +2286,9 @@ fi # ||----w | # || || %changelog +* Fri Jun 22 2012 Josh Boyer +- Add uprobe backports from -tip from Anton Arapov + * Wed Jun 20 2012 Josh Boyer - Fix incorrect logic in irqpoll patch diff --git a/uprobes-3.5-tip.patch b/uprobes-3.5-tip.patch new file mode 100644 index 000000000..8105dc45c --- /dev/null +++ b/uprobes-3.5-tip.patch @@ -0,0 +1,915 @@ +The split-out series is available in the git repository at: + + git://fedorapeople.org/home/fedora/aarapov/public_git/kernel-uprobes.git tags/rawhide_exported + +Ananth N Mavinakayanahalli (1): + uprobes: Pass probed vaddr to arch_uprobe_analyze_insn() + +Josh Stone (1): + uprobes: add exports necessary for uprobes use by modules + +Oleg Nesterov (21): + uprobes: Optimize is_swbp_at_addr() for current->mm + uprobes: Change read_opcode() to use FOLL_FORCE + uprobes: Introduce find_active_uprobe() helper + uprobes: Teach find_active_uprobe() to provide the "is_swbp" info + uprobes: Change register_for_each_vma() to take mm->mmap_sem for writing + uprobes: Teach handle_swbp() to rely on "is_swbp" rather than uprobes_srcu + uprobes: Kill uprobes_srcu/uprobe_srcu_id + uprobes: Valid_vma() should reject VM_HUGETLB + uprobes: __copy_insn() should ensure a_ops->readpage != NULL + uprobes: Write_opcode()->__replace_page() can race with try_to_unmap() + uprobes: Install_breakpoint() should fail if is_swbp_insn() == T + uprobes: Rework register_for_each_vma() to make it O(n) + uprobes: Change build_map_info() to try kmalloc(GFP_NOWAIT) first + uprobes: Copy_insn() shouldn't depend on mm/vma/vaddr + uprobes: Copy_insn() should not return -ENOMEM if __copy_insn() fails + uprobes: No need to re-check vma_address() in write_opcode() + uprobes: Simplify the usage of uprobe->pending_list + uprobes: Don't use loff_t for the valid virtual address + uprobes: __copy_insn() needs "loff_t offset" + uprobes: Remove the unnecessary initialization in add_utask() + uprobes: Move BUG_ON(UPROBE_SWBP_INSN_SIZE) from write_opcode() to install_breakpoint() + +Peter Zijlstra (1): + uprobes: Document uprobe_register() vs uprobe_mmap() race + +Signed-off-by: Anton Arapov +--- + arch/x86/include/asm/uprobes.h | 2 +- + arch/x86/kernel/ptrace.c | 6 + + arch/x86/kernel/uprobes.c | 3 +- + include/linux/sched.h | 1 - + kernel/events/uprobes.c | 464 ++++++++++++++++++++-------------------- + 5 files changed, 240 insertions(+), 236 deletions(-) + +diff --git a/arch/x86/include/asm/uprobes.h b/arch/x86/include/asm/uprobes.h +index 1e9bed1..f3971bb 100644 +--- a/arch/x86/include/asm/uprobes.h ++++ b/arch/x86/include/asm/uprobes.h +@@ -48,7 +48,7 @@ struct arch_uprobe_task { + #endif + }; + +-extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm); ++extern int arch_uprobe_analyze_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long addr); + extern int arch_uprobe_pre_xol(struct arch_uprobe *aup, struct pt_regs *regs); + extern int arch_uprobe_post_xol(struct arch_uprobe *aup, struct pt_regs *regs); + extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk); +diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c +index c4c6a5c..a6a6871 100644 +--- a/arch/x86/kernel/ptrace.c ++++ b/arch/x86/kernel/ptrace.c +@@ -1415,6 +1415,12 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task) + #endif + } + ++/* ++ * This is declared in linux/regset.h and defined in machine-dependent ++ * code. We put the export here to ensure no machine forgets it. ++ */ ++EXPORT_SYMBOL_GPL(task_user_regset_view); ++ + static void fill_sigtrap_info(struct task_struct *tsk, + struct pt_regs *regs, + int error_code, int si_code, +diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c +index dc4e910..36fd420 100644 +--- a/arch/x86/kernel/uprobes.c ++++ b/arch/x86/kernel/uprobes.c +@@ -409,9 +409,10 @@ static int validate_insn_bits(struct arch_uprobe *auprobe, struct mm_struct *mm, + * arch_uprobe_analyze_insn - instruction analysis including validity and fixups. + * @mm: the probed address space. + * @arch_uprobe: the probepoint information. ++ * @addr: virtual address at which to install the probepoint + * Return 0 on success or a -ve number on error. + */ +-int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm) ++int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long addr) + { + int ret; + struct insn insn; +diff --git a/include/linux/sched.h b/include/linux/sched.h +index 4059c0f..c7cfa69 100644 +--- a/include/linux/sched.h ++++ b/include/linux/sched.h +@@ -1581,7 +1581,6 @@ struct task_struct { + #endif + #ifdef CONFIG_UPROBES + struct uprobe_task *utask; +- int uprobe_srcu_id; + #endif + }; + +diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c +index 985be4d..d9e5ba5 100644 +--- a/kernel/events/uprobes.c ++++ b/kernel/events/uprobes.c +@@ -34,17 +34,34 @@ + #include /* notifier mechanism */ + + #include ++#include + + #define UINSNS_PER_PAGE (PAGE_SIZE/UPROBE_XOL_SLOT_BYTES) + #define MAX_UPROBE_XOL_SLOTS UINSNS_PER_PAGE + +-static struct srcu_struct uprobes_srcu; + static struct rb_root uprobes_tree = RB_ROOT; + + static DEFINE_SPINLOCK(uprobes_treelock); /* serialize rbtree access */ + + #define UPROBES_HASH_SZ 13 + ++/* ++ * We need separate register/unregister and mmap/munmap lock hashes because ++ * of mmap_sem nesting. ++ * ++ * uprobe_register() needs to install probes on (potentially) all processes ++ * and thus needs to acquire multiple mmap_sems (consequtively, not ++ * concurrently), whereas uprobe_mmap() is called while holding mmap_sem ++ * for the particular process doing the mmap. ++ * ++ * uprobe_register()->register_for_each_vma() needs to drop/acquire mmap_sem ++ * because of lock order against i_mmap_mutex. This means there's a hole in ++ * the register vma iteration where a mmap() can happen. ++ * ++ * Thus uprobe_register() can race with uprobe_mmap() and we can try and ++ * install a probe where one is already installed. ++ */ ++ + /* serialize (un)register */ + static struct mutex uprobes_mutex[UPROBES_HASH_SZ]; + +@@ -61,17 +78,6 @@ static struct mutex uprobes_mmap_mutex[UPROBES_HASH_SZ]; + */ + static atomic_t uprobe_events = ATOMIC_INIT(0); + +-/* +- * Maintain a temporary per vma info that can be used to search if a vma +- * has already been handled. This structure is introduced since extending +- * vm_area_struct wasnt recommended. +- */ +-struct vma_info { +- struct list_head probe_list; +- struct mm_struct *mm; +- loff_t vaddr; +-}; +- + struct uprobe { + struct rb_node rb_node; /* node in the rb tree */ + atomic_t ref; +@@ -100,7 +106,8 @@ static bool valid_vma(struct vm_area_struct *vma, bool is_register) + if (!is_register) + return true; + +- if ((vma->vm_flags & (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) == (VM_READ|VM_EXEC)) ++ if ((vma->vm_flags & (VM_HUGETLB|VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)) ++ == (VM_READ|VM_EXEC)) + return true; + + return false; +@@ -129,33 +136,17 @@ static loff_t vma_address(struct vm_area_struct *vma, loff_t offset) + static int __replace_page(struct vm_area_struct *vma, struct page *page, struct page *kpage) + { + struct mm_struct *mm = vma->vm_mm; +- pgd_t *pgd; +- pud_t *pud; +- pmd_t *pmd; +- pte_t *ptep; +- spinlock_t *ptl; + unsigned long addr; +- int err = -EFAULT; ++ spinlock_t *ptl; ++ pte_t *ptep; + + addr = page_address_in_vma(page, vma); + if (addr == -EFAULT) +- goto out; +- +- pgd = pgd_offset(mm, addr); +- if (!pgd_present(*pgd)) +- goto out; +- +- pud = pud_offset(pgd, addr); +- if (!pud_present(*pud)) +- goto out; +- +- pmd = pmd_offset(pud, addr); +- if (!pmd_present(*pmd)) +- goto out; ++ return -EFAULT; + +- ptep = pte_offset_map_lock(mm, pmd, addr, &ptl); ++ ptep = page_check_address(page, mm, addr, &ptl, 0); + if (!ptep) +- goto out; ++ return -EAGAIN; + + get_page(kpage); + page_add_new_anon_rmap(kpage, vma, addr); +@@ -174,10 +165,8 @@ static int __replace_page(struct vm_area_struct *vma, struct page *page, struct + try_to_free_swap(page); + put_page(page); + pte_unmap_unlock(ptep, ptl); +- err = 0; + +-out: +- return err; ++ return 0; + } + + /** +@@ -222,9 +211,8 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, + void *vaddr_old, *vaddr_new; + struct vm_area_struct *vma; + struct uprobe *uprobe; +- loff_t addr; + int ret; +- ++retry: + /* Read the page with vaddr into memory */ + ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &old_page, &vma); + if (ret <= 0) +@@ -246,10 +234,6 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, + if (mapping != vma->vm_file->f_mapping) + goto put_out; + +- addr = vma_address(vma, uprobe->offset); +- if (vaddr != (unsigned long)addr) +- goto put_out; +- + ret = -ENOMEM; + new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, vaddr); + if (!new_page) +@@ -267,11 +251,7 @@ static int write_opcode(struct arch_uprobe *auprobe, struct mm_struct *mm, + vaddr_new = kmap_atomic(new_page); + + memcpy(vaddr_new, vaddr_old, PAGE_SIZE); +- +- /* poke the new insn in, ASSUMES we don't cross page boundary */ +- vaddr &= ~PAGE_MASK; +- BUG_ON(vaddr + UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); +- memcpy(vaddr_new + vaddr, &opcode, UPROBE_SWBP_INSN_SIZE); ++ memcpy(vaddr_new + (vaddr & ~PAGE_MASK), &opcode, UPROBE_SWBP_INSN_SIZE); + + kunmap_atomic(vaddr_new); + kunmap_atomic(vaddr_old); +@@ -291,6 +271,8 @@ unlock_out: + put_out: + put_page(old_page); + ++ if (unlikely(ret == -EAGAIN)) ++ goto retry; + return ret; + } + +@@ -312,7 +294,7 @@ static int read_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_ + void *vaddr_new; + int ret; + +- ret = get_user_pages(NULL, mm, vaddr, 1, 0, 0, &page, NULL); ++ ret = get_user_pages(NULL, mm, vaddr, 1, 0, 1, &page, NULL); + if (ret <= 0) + return ret; + +@@ -333,10 +315,20 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) + uprobe_opcode_t opcode; + int result; + ++ if (current->mm == mm) { ++ pagefault_disable(); ++ result = __copy_from_user_inatomic(&opcode, (void __user*)vaddr, ++ sizeof(opcode)); ++ pagefault_enable(); ++ ++ if (likely(result == 0)) ++ goto out; ++ } ++ + result = read_opcode(mm, vaddr, &opcode); + if (result) + return result; +- ++out: + if (is_swbp_insn(&opcode)) + return 1; + +@@ -355,7 +347,9 @@ static int is_swbp_at_addr(struct mm_struct *mm, unsigned long vaddr) + int __weak set_swbp(struct arch_uprobe *auprobe, struct mm_struct *mm, unsigned long vaddr) + { + int result; +- ++ /* ++ * See the comment near uprobes_hash(). ++ */ + result = is_swbp_at_addr(mm, vaddr); + if (result == 1) + return -EEXIST; +@@ -520,7 +514,6 @@ static struct uprobe *alloc_uprobe(struct inode *inode, loff_t offset) + uprobe->inode = igrab(inode); + uprobe->offset = offset; + init_rwsem(&uprobe->consumer_rwsem); +- INIT_LIST_HEAD(&uprobe->pending_list); + + /* add to uprobes_tree, sorted on inode:offset */ + cur_uprobe = insert_uprobe(uprobe); +@@ -588,20 +581,22 @@ static bool consumer_del(struct uprobe *uprobe, struct uprobe_consumer *uc) + } + + static int +-__copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *insn, +- unsigned long nbytes, unsigned long offset) ++__copy_insn(struct address_space *mapping, struct file *filp, char *insn, ++ unsigned long nbytes, loff_t offset) + { +- struct file *filp = vma->vm_file; + struct page *page; + void *vaddr; +- unsigned long off1; +- unsigned long idx; ++ unsigned long off; ++ pgoff_t idx; + + if (!filp) + return -EINVAL; + +- idx = (unsigned long)(offset >> PAGE_CACHE_SHIFT); +- off1 = offset &= ~PAGE_MASK; ++ if (!mapping->a_ops->readpage) ++ return -EIO; ++ ++ idx = offset >> PAGE_CACHE_SHIFT; ++ off = offset & ~PAGE_MASK; + + /* + * Ensure that the page that has the original instruction is +@@ -612,22 +607,20 @@ __copy_insn(struct address_space *mapping, struct vm_area_struct *vma, char *ins + return PTR_ERR(page); + + vaddr = kmap_atomic(page); +- memcpy(insn, vaddr + off1, nbytes); ++ memcpy(insn, vaddr + off, nbytes); + kunmap_atomic(vaddr); + page_cache_release(page); + + return 0; + } + +-static int +-copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) ++static int copy_insn(struct uprobe *uprobe, struct file *filp) + { + struct address_space *mapping; + unsigned long nbytes; + int bytes; + +- addr &= ~PAGE_MASK; +- nbytes = PAGE_SIZE - addr; ++ nbytes = PAGE_SIZE - (uprobe->offset & ~PAGE_MASK); + mapping = uprobe->inode->i_mapping; + + /* Instruction at end of binary; copy only available bytes */ +@@ -638,13 +631,13 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) + + /* Instruction at the page-boundary; copy bytes in second page */ + if (nbytes < bytes) { +- if (__copy_insn(mapping, vma, uprobe->arch.insn + nbytes, +- bytes - nbytes, uprobe->offset + nbytes)) +- return -ENOMEM; +- ++ int err = __copy_insn(mapping, filp, uprobe->arch.insn + nbytes, ++ bytes - nbytes, uprobe->offset + nbytes); ++ if (err) ++ return err; + bytes = nbytes; + } +- return __copy_insn(mapping, vma, uprobe->arch.insn, bytes, uprobe->offset); ++ return __copy_insn(mapping, filp, uprobe->arch.insn, bytes, uprobe->offset); + } + + /* +@@ -672,9 +665,8 @@ copy_insn(struct uprobe *uprobe, struct vm_area_struct *vma, unsigned long addr) + */ + static int + install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, +- struct vm_area_struct *vma, loff_t vaddr) ++ struct vm_area_struct *vma, unsigned long vaddr) + { +- unsigned long addr; + int ret; + + /* +@@ -687,20 +679,22 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, + if (!uprobe->consumers) + return -EEXIST; + +- addr = (unsigned long)vaddr; +- + if (!(uprobe->flags & UPROBE_COPY_INSN)) { +- ret = copy_insn(uprobe, vma, addr); ++ ret = copy_insn(uprobe, vma->vm_file); + if (ret) + return ret; + + if (is_swbp_insn((uprobe_opcode_t *)uprobe->arch.insn)) +- return -EEXIST; ++ return -ENOTSUPP; + +- ret = arch_uprobe_analyze_insn(&uprobe->arch, mm); ++ ret = arch_uprobe_analyze_insn(&uprobe->arch, mm, vaddr); + if (ret) + return ret; + ++ /* write_opcode() assumes we don't cross page boundary */ ++ BUG_ON((uprobe->offset & ~PAGE_MASK) + ++ UPROBE_SWBP_INSN_SIZE > PAGE_SIZE); ++ + uprobe->flags |= UPROBE_COPY_INSN; + } + +@@ -713,7 +707,7 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, + * Hence increment before and decrement on failure. + */ + atomic_inc(&mm->uprobes_state.count); +- ret = set_swbp(&uprobe->arch, mm, addr); ++ ret = set_swbp(&uprobe->arch, mm, vaddr); + if (ret) + atomic_dec(&mm->uprobes_state.count); + +@@ -721,27 +715,21 @@ install_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, + } + + static void +-remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, loff_t vaddr) ++remove_breakpoint(struct uprobe *uprobe, struct mm_struct *mm, unsigned long vaddr) + { +- if (!set_orig_insn(&uprobe->arch, mm, (unsigned long)vaddr, true)) ++ if (!set_orig_insn(&uprobe->arch, mm, vaddr, true)) + atomic_dec(&mm->uprobes_state.count); + } + + /* +- * There could be threads that have hit the breakpoint and are entering the +- * notifier code and trying to acquire the uprobes_treelock. The thread +- * calling delete_uprobe() that is removing the uprobe from the rb_tree can +- * race with these threads and might acquire the uprobes_treelock compared +- * to some of the breakpoint hit threads. In such a case, the breakpoint +- * hit threads will not find the uprobe. The current unregistering thread +- * waits till all other threads have hit a breakpoint, to acquire the +- * uprobes_treelock before the uprobe is removed from the rbtree. ++ * There could be threads that have already hit the breakpoint. They ++ * will recheck the current insn and restart if find_uprobe() fails. ++ * See find_active_uprobe(). + */ + static void delete_uprobe(struct uprobe *uprobe) + { + unsigned long flags; + +- synchronize_srcu(&uprobes_srcu); + spin_lock_irqsave(&uprobes_treelock, flags); + rb_erase(&uprobe->rb_node, &uprobes_tree); + spin_unlock_irqrestore(&uprobes_treelock, flags); +@@ -750,139 +738,135 @@ static void delete_uprobe(struct uprobe *uprobe) + atomic_dec(&uprobe_events); + } + +-static struct vma_info * +-__find_next_vma_info(struct address_space *mapping, struct list_head *head, +- struct vma_info *vi, loff_t offset, bool is_register) ++struct map_info { ++ struct map_info *next; ++ struct mm_struct *mm; ++ unsigned long vaddr; ++}; ++ ++static inline struct map_info *free_map_info(struct map_info *info) ++{ ++ struct map_info *next = info->next; ++ kfree(info); ++ return next; ++} ++ ++static struct map_info * ++build_map_info(struct address_space *mapping, loff_t offset, bool is_register) + { ++ unsigned long pgoff = offset >> PAGE_SHIFT; + struct prio_tree_iter iter; + struct vm_area_struct *vma; +- struct vma_info *tmpvi; +- unsigned long pgoff; +- int existing_vma; +- loff_t vaddr; +- +- pgoff = offset >> PAGE_SHIFT; ++ struct map_info *curr = NULL; ++ struct map_info *prev = NULL; ++ struct map_info *info; ++ int more = 0; + ++ again: ++ mutex_lock(&mapping->i_mmap_mutex); + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff) { + if (!valid_vma(vma, is_register)) + continue; + +- existing_vma = 0; +- vaddr = vma_address(vma, offset); +- +- list_for_each_entry(tmpvi, head, probe_list) { +- if (tmpvi->mm == vma->vm_mm && tmpvi->vaddr == vaddr) { +- existing_vma = 1; +- break; +- } ++ if (!prev && !more) { ++ /* ++ * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through ++ * reclaim. This is optimistic, no harm done if it fails. ++ */ ++ prev = kmalloc(sizeof(struct map_info), ++ GFP_NOWAIT | __GFP_NOMEMALLOC | __GFP_NOWARN); ++ if (prev) ++ prev->next = NULL; + } +- +- /* +- * Another vma needs a probe to be installed. However skip +- * installing the probe if the vma is about to be unlinked. +- */ +- if (!existing_vma && atomic_inc_not_zero(&vma->vm_mm->mm_users)) { +- vi->mm = vma->vm_mm; +- vi->vaddr = vaddr; +- list_add(&vi->probe_list, head); +- +- return vi; ++ if (!prev) { ++ more++; ++ continue; + } +- } + +- return NULL; +-} +- +-/* +- * Iterate in the rmap prio tree and find a vma where a probe has not +- * yet been inserted. +- */ +-static struct vma_info * +-find_next_vma_info(struct address_space *mapping, struct list_head *head, +- loff_t offset, bool is_register) +-{ +- struct vma_info *vi, *retvi; ++ if (!atomic_inc_not_zero(&vma->vm_mm->mm_users)) ++ continue; + +- vi = kzalloc(sizeof(struct vma_info), GFP_KERNEL); +- if (!vi) +- return ERR_PTR(-ENOMEM); ++ info = prev; ++ prev = prev->next; ++ info->next = curr; ++ curr = info; + +- mutex_lock(&mapping->i_mmap_mutex); +- retvi = __find_next_vma_info(mapping, head, vi, offset, is_register); ++ info->mm = vma->vm_mm; ++ info->vaddr = vma_address(vma, offset); ++ } + mutex_unlock(&mapping->i_mmap_mutex); + +- if (!retvi) +- kfree(vi); ++ if (!more) ++ goto out; ++ ++ prev = curr; ++ while (curr) { ++ mmput(curr->mm); ++ curr = curr->next; ++ } + +- return retvi; ++ do { ++ info = kmalloc(sizeof(struct map_info), GFP_KERNEL); ++ if (!info) { ++ curr = ERR_PTR(-ENOMEM); ++ goto out; ++ } ++ info->next = prev; ++ prev = info; ++ } while (--more); ++ ++ goto again; ++ out: ++ while (prev) ++ prev = free_map_info(prev); ++ return curr; + } + + static int register_for_each_vma(struct uprobe *uprobe, bool is_register) + { +- struct list_head try_list; +- struct vm_area_struct *vma; +- struct address_space *mapping; +- struct vma_info *vi, *tmpvi; +- struct mm_struct *mm; +- loff_t vaddr; +- int ret; ++ struct map_info *info; ++ int err = 0; + +- mapping = uprobe->inode->i_mapping; +- INIT_LIST_HEAD(&try_list); ++ info = build_map_info(uprobe->inode->i_mapping, ++ uprobe->offset, is_register); ++ if (IS_ERR(info)) ++ return PTR_ERR(info); + +- ret = 0; ++ while (info) { ++ struct mm_struct *mm = info->mm; ++ struct vm_area_struct *vma; + +- for (;;) { +- vi = find_next_vma_info(mapping, &try_list, uprobe->offset, is_register); +- if (!vi) +- break; ++ if (err) ++ goto free; + +- if (IS_ERR(vi)) { +- ret = PTR_ERR(vi); +- break; +- } ++ down_write(&mm->mmap_sem); ++ vma = find_vma(mm, (unsigned long)info->vaddr); ++ if (!vma || !valid_vma(vma, is_register)) ++ goto unlock; + +- mm = vi->mm; +- down_read(&mm->mmap_sem); +- vma = find_vma(mm, (unsigned long)vi->vaddr); +- if (!vma || !valid_vma(vma, is_register)) { +- list_del(&vi->probe_list); +- kfree(vi); +- up_read(&mm->mmap_sem); +- mmput(mm); +- continue; +- } +- vaddr = vma_address(vma, uprobe->offset); + if (vma->vm_file->f_mapping->host != uprobe->inode || +- vaddr != vi->vaddr) { +- list_del(&vi->probe_list); +- kfree(vi); +- up_read(&mm->mmap_sem); +- mmput(mm); +- continue; +- } +- +- if (is_register) +- ret = install_breakpoint(uprobe, mm, vma, vi->vaddr); +- else +- remove_breakpoint(uprobe, mm, vi->vaddr); ++ vma_address(vma, uprobe->offset) != info->vaddr) ++ goto unlock; + +- up_read(&mm->mmap_sem); +- mmput(mm); + if (is_register) { +- if (ret && ret == -EEXIST) +- ret = 0; +- if (ret) +- break; ++ err = install_breakpoint(uprobe, mm, vma, info->vaddr); ++ /* ++ * We can race against uprobe_mmap(), see the ++ * comment near uprobe_hash(). ++ */ ++ if (err == -EEXIST) ++ err = 0; ++ } else { ++ remove_breakpoint(uprobe, mm, info->vaddr); + } ++ unlock: ++ up_write(&mm->mmap_sem); ++ free: ++ mmput(mm); ++ info = free_map_info(info); + } + +- list_for_each_entry_safe(vi, tmpvi, &try_list, probe_list) { +- list_del(&vi->probe_list); +- kfree(vi); +- } +- +- return ret; ++ return err; + } + + static int __uprobe_register(struct uprobe *uprobe) +@@ -945,6 +929,7 @@ int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer * + + return ret; + } ++EXPORT_SYMBOL_GPL(uprobe_register); + + /* + * uprobe_unregister - unregister a already registered probe. +@@ -976,6 +961,7 @@ void uprobe_unregister(struct inode *inode, loff_t offset, struct uprobe_consume + if (uprobe) + put_uprobe(uprobe); + } ++EXPORT_SYMBOL_GPL(uprobe_unregister); + + /* + * Of all the nodes that correspond to the given inode, return the node +@@ -1048,7 +1034,7 @@ static void build_probe_list(struct inode *inode, struct list_head *head) + int uprobe_mmap(struct vm_area_struct *vma) + { + struct list_head tmp_list; +- struct uprobe *uprobe, *u; ++ struct uprobe *uprobe; + struct inode *inode; + int ret, count; + +@@ -1066,12 +1052,9 @@ int uprobe_mmap(struct vm_area_struct *vma) + ret = 0; + count = 0; + +- list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { +- loff_t vaddr; +- +- list_del(&uprobe->pending_list); ++ list_for_each_entry(uprobe, &tmp_list, pending_list) { + if (!ret) { +- vaddr = vma_address(vma, uprobe->offset); ++ loff_t vaddr = vma_address(vma, uprobe->offset); + + if (vaddr < vma->vm_start || vaddr >= vma->vm_end) { + put_uprobe(uprobe); +@@ -1079,8 +1062,10 @@ int uprobe_mmap(struct vm_area_struct *vma) + } + + ret = install_breakpoint(uprobe, vma->vm_mm, vma, vaddr); +- +- /* Ignore double add: */ ++ /* ++ * We can race against uprobe_register(), see the ++ * comment near uprobe_hash(). ++ */ + if (ret == -EEXIST) { + ret = 0; + +@@ -1115,7 +1100,7 @@ int uprobe_mmap(struct vm_area_struct *vma) + void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned long end) + { + struct list_head tmp_list; +- struct uprobe *uprobe, *u; ++ struct uprobe *uprobe; + struct inode *inode; + + if (!atomic_read(&uprobe_events) || !valid_vma(vma, false)) +@@ -1132,11 +1117,8 @@ void uprobe_munmap(struct vm_area_struct *vma, unsigned long start, unsigned lon + mutex_lock(uprobes_mmap_hash(inode)); + build_probe_list(inode, &tmp_list); + +- list_for_each_entry_safe(uprobe, u, &tmp_list, pending_list) { +- loff_t vaddr; +- +- list_del(&uprobe->pending_list); +- vaddr = vma_address(vma, uprobe->offset); ++ list_for_each_entry(uprobe, &tmp_list, pending_list) { ++ loff_t vaddr = vma_address(vma, uprobe->offset); + + if (vaddr >= start && vaddr < end) { + /* +@@ -1378,9 +1360,6 @@ void uprobe_free_utask(struct task_struct *t) + { + struct uprobe_task *utask = t->utask; + +- if (t->uprobe_srcu_id != -1) +- srcu_read_unlock_raw(&uprobes_srcu, t->uprobe_srcu_id); +- + if (!utask) + return; + +@@ -1398,7 +1377,6 @@ void uprobe_free_utask(struct task_struct *t) + void uprobe_copy_process(struct task_struct *t) + { + t->utask = NULL; +- t->uprobe_srcu_id = -1; + } + + /* +@@ -1417,7 +1395,6 @@ static struct uprobe_task *add_utask(void) + if (unlikely(!utask)) + return NULL; + +- utask->active_uprobe = NULL; + current->utask = utask; + return utask; + } +@@ -1479,41 +1456,64 @@ static bool can_skip_sstep(struct uprobe *uprobe, struct pt_regs *regs) + return false; + } + ++static struct uprobe *find_active_uprobe(unsigned long bp_vaddr, int *is_swbp) ++{ ++ struct mm_struct *mm = current->mm; ++ struct uprobe *uprobe = NULL; ++ struct vm_area_struct *vma; ++ ++ down_read(&mm->mmap_sem); ++ vma = find_vma(mm, bp_vaddr); ++ if (vma && vma->vm_start <= bp_vaddr) { ++ if (valid_vma(vma, false)) { ++ struct inode *inode; ++ loff_t offset; ++ ++ inode = vma->vm_file->f_mapping->host; ++ offset = bp_vaddr - vma->vm_start; ++ offset += (vma->vm_pgoff << PAGE_SHIFT); ++ uprobe = find_uprobe(inode, offset); ++ } ++ ++ if (!uprobe) ++ *is_swbp = is_swbp_at_addr(mm, bp_vaddr); ++ } else { ++ *is_swbp = -EFAULT; ++ } ++ up_read(&mm->mmap_sem); ++ ++ return uprobe; ++} ++ + /* + * Run handler and ask thread to singlestep. + * Ensure all non-fatal signals cannot interrupt thread while it singlesteps. + */ + static void handle_swbp(struct pt_regs *regs) + { +- struct vm_area_struct *vma; + struct uprobe_task *utask; + struct uprobe *uprobe; +- struct mm_struct *mm; + unsigned long bp_vaddr; ++ int uninitialized_var(is_swbp); + +- uprobe = NULL; + bp_vaddr = uprobe_get_swbp_addr(regs); +- mm = current->mm; +- down_read(&mm->mmap_sem); +- vma = find_vma(mm, bp_vaddr); +- +- if (vma && vma->vm_start <= bp_vaddr && valid_vma(vma, false)) { +- struct inode *inode; +- loff_t offset; +- +- inode = vma->vm_file->f_mapping->host; +- offset = bp_vaddr - vma->vm_start; +- offset += (vma->vm_pgoff << PAGE_SHIFT); +- uprobe = find_uprobe(inode, offset); +- } +- +- srcu_read_unlock_raw(&uprobes_srcu, current->uprobe_srcu_id); +- current->uprobe_srcu_id = -1; +- up_read(&mm->mmap_sem); ++ uprobe = find_active_uprobe(bp_vaddr, &is_swbp); + + if (!uprobe) { +- /* No matching uprobe; signal SIGTRAP. */ +- send_sig(SIGTRAP, current, 0); ++ if (is_swbp > 0) { ++ /* No matching uprobe; signal SIGTRAP. */ ++ send_sig(SIGTRAP, current, 0); ++ } else { ++ /* ++ * Either we raced with uprobe_unregister() or we can't ++ * access this memory. The latter is only possible if ++ * another thread plays with our ->mm. In both cases ++ * we can simply restart. If this vma was unmapped we ++ * can pretend this insn was not executed yet and get ++ * the (correct) SIGSEGV after restart. ++ */ ++ instruction_pointer_set(regs, bp_vaddr); ++ } + return; + } + +@@ -1620,7 +1620,6 @@ int uprobe_pre_sstep_notifier(struct pt_regs *regs) + utask->state = UTASK_BP_HIT; + + set_thread_flag(TIF_UPROBE); +- current->uprobe_srcu_id = srcu_read_lock_raw(&uprobes_srcu); + + return 1; + } +@@ -1655,7 +1654,6 @@ static int __init init_uprobes(void) + mutex_init(&uprobes_mutex[i]); + mutex_init(&uprobes_mmap_mutex[i]); + } +- init_srcu_struct(&uprobes_srcu); + + return register_die_notifier(&uprobe_exception_nb); + }