forked from rpms/kernel
		
	This is a fix for the NX emulation patch to force the brk area well outside of the exec randomization area to avoid future allocation or brk growth collisions. Normally this isn't a problem, except when the text region has been loaded from a PIE binary and the CS limit can't be put just above bss. A test-case that will show failures without this patch can be found here: http://bazaar.launchpad.net/~ubuntu-bugcontrol/qa-regression-testing/master/annotate/head%3A/scripts/kernel-aslr-collisions/explode-brk.c Signed-off-by: Kees Cook <kees.cook@canonical.com>
		
			
				
	
	
		
			616 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			616 lines
		
	
	
		
			17 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| --- a/arch/x86/include/asm/desc.h
 | |
| +++ b/arch/x86/include/asm/desc.h
 | |
| @@ -5,6 +5,7 @@
 | |
|  #include <asm/ldt.h>
 | |
|  #include <asm/mmu.h>
 | |
|  #include <linux/smp.h>
 | |
| +#include <linux/mm_types.h>
 | |
|  
 | |
|  static inline void fill_ldt(struct desc_struct *desc,
 | |
|  			    const struct user_desc *info)
 | |
| @@ -93,6 +94,9 @@ static inline int desc_empty(const void *ptr)
 | |
|  
 | |
|  #define load_TLS(t, cpu) native_load_tls(t, cpu)
 | |
|  #define set_ldt native_set_ldt
 | |
| +#ifdef CONFIG_X86_32
 | |
| +#define load_user_cs_desc native_load_user_cs_desc
 | |
| +#endif /*CONFIG_X86_32*/
 | |
|  
 | |
|  #define write_ldt_entry(dt, entry, desc)	\
 | |
|  	native_write_ldt_entry(dt, entry, desc)
 | |
| @@ -392,4 +396,25 @@ static inline void set_system_intr_gate_ist(int n, void *addr, unsigned ist)
 | |
|  	_set_gate(n, GATE_INTERRUPT, addr, 0x3, ist, __KERNEL_CS);
 | |
|  }
 | |
|  
 | |
| +#ifdef CONFIG_X86_32
 | |
| +static inline void set_user_cs(struct desc_struct *desc, unsigned long limit)
 | |
| +{
 | |
| +	limit = (limit - 1) / PAGE_SIZE;
 | |
| +	desc->a = limit & 0xffff;
 | |
| +	desc->b = (limit & 0xf0000) | 0x00c0fb00;
 | |
| +}
 | |
| +
 | |
| +static inline void native_load_user_cs_desc(int cpu, struct mm_struct *mm)
 | |
| +{
 | |
| +	get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS] = (mm)->context.user_cs;
 | |
| +}
 | |
| +
 | |
| +#define arch_add_exec_range arch_add_exec_range
 | |
| +#define arch_remove_exec_range arch_remove_exec_range
 | |
| +#define arch_flush_exec_range arch_flush_exec_range
 | |
| +extern void arch_add_exec_range(struct mm_struct *mm, unsigned long limit);
 | |
| +extern void arch_remove_exec_range(struct mm_struct *mm, unsigned long limit);
 | |
| +extern void arch_flush_exec_range(struct mm_struct *mm);
 | |
| +#endif /* CONFIG_X86_32 */
 | |
| +
 | |
|  #endif /* _ASM_X86_DESC_H */
 | |
| --- a/arch/x86/include/asm/mmu.h
 | |
| +++ b/arch/x86/include/asm/mmu.h
 | |
| @@ -7,12 +7,19 @@
 | |
|  /*
 | |
|   * The x86 doesn't have a mmu context, but
 | |
|   * we put the segment information here.
 | |
| + *
 | |
| + * exec_limit is used to track the range PROT_EXEC
 | |
| + * mappings span.
 | |
|   */
 | |
|  typedef struct {
 | |
|  	void *ldt;
 | |
|  	int size;
 | |
|  	struct mutex lock;
 | |
|  	void *vdso;
 | |
| +#ifdef CONFIG_X86_32
 | |
| +	struct desc_struct user_cs;
 | |
| +	unsigned long exec_limit;
 | |
| +#endif
 | |
|  } mm_context_t;
 | |
|  
 | |
|  #ifdef CONFIG_SMP
 | |
| --- a/arch/x86/include/asm/paravirt.h
 | |
| +++ b/arch/x86/include/asm/paravirt.h
 | |
| @@ -289,6 +289,12 @@ static inline void set_ldt(const void *addr, unsigned entries)
 | |
|  {
 | |
|  	PVOP_VCALL2(pv_cpu_ops.set_ldt, addr, entries);
 | |
|  }
 | |
| +#ifdef CONFIG_X86_32
 | |
| +static inline void load_user_cs_desc(unsigned int cpu, struct mm_struct *mm)
 | |
| +{
 | |
| +	PVOP_VCALL2(pv_cpu_ops.load_user_cs_desc, cpu, mm);
 | |
| +}
 | |
| +#endif /*CONFIG_X86_32*/
 | |
|  static inline void store_gdt(struct desc_ptr *dtr)
 | |
|  {
 | |
|  	PVOP_VCALL1(pv_cpu_ops.store_gdt, dtr);
 | |
| --- a/arch/x86/include/asm/paravirt_types.h
 | |
| +++ b/arch/x86/include/asm/paravirt_types.h
 | |
| @@ -118,6 +118,9 @@ struct pv_cpu_ops {
 | |
|  	void (*store_gdt)(struct desc_ptr *);
 | |
|  	void (*store_idt)(struct desc_ptr *);
 | |
|  	void (*set_ldt)(const void *desc, unsigned entries);
 | |
| +#ifdef CONFIG_X86_32
 | |
| +	void (*load_user_cs_desc)(int cpu, struct mm_struct *mm);
 | |
| +#endif
 | |
|  	unsigned long (*store_tr)(void);
 | |
|  	void (*load_tls)(struct thread_struct *t, unsigned int cpu);
 | |
|  #ifdef CONFIG_X86_64
 | |
| --- a/arch/x86/kernel/cpu/common.c
 | |
| +++ b/arch/x86/kernel/cpu/common.c
 | |
| @@ -802,6 +802,22 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
 | |
|  	/* Filter out anything that depends on CPUID levels we don't have */
 | |
|  	filter_cpuid_features(c, true);
 | |
|  
 | |
| +#ifdef CONFIG_X86_32
 | |
| +	/*
 | |
| +	 *  emulation of NX with segment limits unfortunately means
 | |
| +	 *  we have to disable the fast system calls, due to the way that
 | |
| +	 *  sysexit clears the segment limits on return.
 | |
| +	 *  If we have either disabled exec-shield on the boot command line,
 | |
| +	 *  or we have NX, then we don't need to do this.
 | |
| +	 */
 | |
| +	if (!disable_nx) {
 | |
| +#ifdef CONFIG_X86_PAE
 | |
| +		if (!test_cpu_cap(c, X86_FEATURE_NX))
 | |
| +#endif
 | |
| +			clear_cpu_cap(c, X86_FEATURE_SEP);
 | |
| +	}
 | |
| +#endif
 | |
| +
 | |
|  	/* If the model name is still unset, do table lookup. */
 | |
|  	if (!c->x86_model_id[0]) {
 | |
|  		const char *p;
 | |
| --- a/arch/x86/kernel/paravirt.c
 | |
| +++ b/arch/x86/kernel/paravirt.c
 | |
| @@ -345,6 +345,9 @@ struct pv_cpu_ops pv_cpu_ops = {
 | |
|  	.read_tscp = native_read_tscp,
 | |
|  	.load_tr_desc = native_load_tr_desc,
 | |
|  	.set_ldt = native_set_ldt,
 | |
| +#ifdef CONFIG_X86_32
 | |
| +	.load_user_cs_desc = native_load_user_cs_desc,
 | |
| +#endif /*CONFIG_X86_32*/
 | |
|  	.load_gdt = native_load_gdt,
 | |
|  	.load_idt = native_load_idt,
 | |
|  	.store_gdt = native_store_gdt,
 | |
| --- a/arch/x86/kernel/process_32.c
 | |
| +++ b/arch/x86/kernel/process_32.c
 | |
| @@ -243,7 +243,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
 | |
|  void
 | |
|  start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 | |
|  {
 | |
| +	int cpu;
 | |
| +
 | |
|  	set_user_gs(regs, 0);
 | |
| +
 | |
|  	regs->fs		= 0;
 | |
|  	set_fs(USER_DS);
 | |
|  	regs->ds		= __USER_DS;
 | |
| @@ -252,6 +255,11 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
 | |
|  	regs->cs		= __USER_CS;
 | |
|  	regs->ip		= new_ip;
 | |
|  	regs->sp		= new_sp;
 | |
| +
 | |
| +	cpu = get_cpu();
 | |
| +	load_user_cs_desc(cpu, current->mm);
 | |
| +	put_cpu();
 | |
| +
 | |
|  	/*
 | |
|  	 * Free the old FP and other extended state
 | |
|  	 */
 | |
| @@ -311,6 +319,9 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
 | |
|  	if (preload_fpu)
 | |
|  		prefetch(next->fpu.state);
 | |
|  
 | |
| +	if (next_p->mm)
 | |
| +		load_user_cs_desc(cpu, next_p->mm);
 | |
| +
 | |
|  	/*
 | |
|  	 * Reload esp0.
 | |
|  	 */
 | |
| @@ -404,3 +415,40 @@ unsigned long get_wchan(struct task_struct *p)
 | |
|  	return 0;
 | |
|  }
 | |
|  
 | |
| +static void modify_cs(struct mm_struct *mm, unsigned long limit)
 | |
| +{
 | |
| +	mm->context.exec_limit = limit;
 | |
| +	set_user_cs(&mm->context.user_cs, limit);
 | |
| +	if (mm == current->mm) {
 | |
| +		int cpu;
 | |
| +
 | |
| +		cpu = get_cpu();
 | |
| +		load_user_cs_desc(cpu, mm);
 | |
| +		put_cpu();
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +void arch_add_exec_range(struct mm_struct *mm, unsigned long limit)
 | |
| +{
 | |
| +	if (limit > mm->context.exec_limit)
 | |
| +		modify_cs(mm, limit);
 | |
| +}
 | |
| +
 | |
| +void arch_remove_exec_range(struct mm_struct *mm, unsigned long old_end)
 | |
| +{
 | |
| +	struct vm_area_struct *vma;
 | |
| +	unsigned long limit = PAGE_SIZE;
 | |
| +
 | |
| +	if (old_end == mm->context.exec_limit) {
 | |
| +		for (vma = mm->mmap; vma; vma = vma->vm_next)
 | |
| +			if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
 | |
| +				limit = vma->vm_end;
 | |
| +		modify_cs(mm, limit);
 | |
| +	}
 | |
| +}
 | |
| +
 | |
| +void arch_flush_exec_range(struct mm_struct *mm)
 | |
| +{
 | |
| +	mm->context.exec_limit = 0;
 | |
| +	set_user_cs(&mm->context.user_cs, 0);
 | |
| +}
 | |
| --- a/arch/x86/kernel/traps.c
 | |
| +++ b/arch/x86/kernel/traps.c
 | |
| @@ -109,6 +109,78 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
 | |
|  	dec_preempt_count();
 | |
|  }
 | |
|  
 | |
| +#ifdef CONFIG_X86_32
 | |
| +static inline int
 | |
| +__compare_user_cs_desc(const struct desc_struct *desc1,
 | |
| +	const struct desc_struct *desc2)
 | |
| +{
 | |
| +	return ((desc1->limit0 != desc2->limit0) ||
 | |
| +		(desc1->limit != desc2->limit) ||
 | |
| +		(desc1->base0 != desc2->base0) ||
 | |
| +		(desc1->base1 != desc2->base1) ||
 | |
| +		(desc1->base2 != desc2->base2));
 | |
| +}
 | |
| +
 | |
| +/*
 | |
| + * lazy-check for CS validity on exec-shield binaries:
 | |
| + *
 | |
| + * the original non-exec stack patch was written by
 | |
| + * Solar Designer <solar at openwall.com>. Thanks!
 | |
| + */
 | |
| +static int
 | |
| +check_lazy_exec_limit(int cpu, struct pt_regs *regs, long error_code)
 | |
| +{
 | |
| +	struct desc_struct *desc1, *desc2;
 | |
| +	struct vm_area_struct *vma;
 | |
| +	unsigned long limit;
 | |
| +
 | |
| +	if (current->mm == NULL)
 | |
| +		return 0;
 | |
| +
 | |
| +	limit = -1UL;
 | |
| +	if (current->mm->context.exec_limit != -1UL) {
 | |
| +		limit = PAGE_SIZE;
 | |
| +		spin_lock(¤t->mm->page_table_lock);
 | |
| +		for (vma = current->mm->mmap; vma; vma = vma->vm_next)
 | |
| +			if ((vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
 | |
| +				limit = vma->vm_end;
 | |
| +		vma = get_gate_vma(current);
 | |
| +		if (vma && (vma->vm_flags & VM_EXEC) && (vma->vm_end > limit))
 | |
| +			limit = vma->vm_end;
 | |
| +		spin_unlock(¤t->mm->page_table_lock);
 | |
| +		if (limit >= TASK_SIZE)
 | |
| +			limit = -1UL;
 | |
| +		current->mm->context.exec_limit = limit;
 | |
| +	}
 | |
| +	set_user_cs(¤t->mm->context.user_cs, limit);
 | |
| +
 | |
| +	desc1 = ¤t->mm->context.user_cs;
 | |
| +	desc2 = get_cpu_gdt_table(cpu) + GDT_ENTRY_DEFAULT_USER_CS;
 | |
| +
 | |
| +	if (__compare_user_cs_desc(desc1, desc2)) {
 | |
| +		/*
 | |
| +		 * The CS was not in sync - reload it and retry the
 | |
| +		 * instruction. If the instruction still faults then
 | |
| +		 * we won't hit this branch next time around.
 | |
| +		 */
 | |
| +		if (print_fatal_signals >= 2) {
 | |
| +			printk(KERN_ERR "#GPF fixup (%ld[seg:%lx]) at %08lx, CPU#%d.\n",
 | |
| +				error_code, error_code/8, regs->ip,
 | |
| +				smp_processor_id());
 | |
| +			printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x, CPU_cs: %08x/%08x.\n",
 | |
| +				current->mm->context.exec_limit,
 | |
| +				desc1->a, desc1->b, desc2->a, desc2->b);
 | |
| +		}
 | |
| +
 | |
| +		load_user_cs_desc(cpu, current->mm);
 | |
| +
 | |
| +		return 1;
 | |
| +	}
 | |
| +
 | |
| +	return 0;
 | |
| +}
 | |
| +#endif
 | |
| +
 | |
|  static void __kprobes
 | |
|  do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
 | |
|  	long error_code, siginfo_t *info)
 | |
| @@ -265,6 +337,29 @@ do_general_protection(struct pt_regs *regs, long error_code)
 | |
|  	if (!user_mode(regs))
 | |
|  		goto gp_in_kernel;
 | |
|  
 | |
| +#ifdef CONFIG_X86_32
 | |
| +{
 | |
| +	int cpu;
 | |
| +	int ok;
 | |
| +
 | |
| +	cpu = get_cpu();
 | |
| +	ok = check_lazy_exec_limit(cpu, regs, error_code);
 | |
| +	put_cpu();
 | |
| +
 | |
| +	if (ok)
 | |
| +		return;
 | |
| +
 | |
| +	if (print_fatal_signals) {
 | |
| +		printk(KERN_ERR "#GPF(%ld[seg:%lx]) at %08lx, CPU#%d.\n",
 | |
| +			error_code, error_code/8, regs->ip, smp_processor_id());
 | |
| +		printk(KERN_ERR "exec_limit: %08lx, user_cs: %08x/%08x.\n",
 | |
| +			current->mm->context.exec_limit,
 | |
| +			current->mm->context.user_cs.a,
 | |
| +			current->mm->context.user_cs.b);
 | |
| +	}
 | |
| +}
 | |
| +#endif /*CONFIG_X86_32*/
 | |
| +
 | |
|  	tsk->thread.error_code = error_code;
 | |
|  	tsk->thread.trap_no = 13;
 | |
|  
 | |
| @@ -792,19 +887,37 @@ do_device_not_available(struct pt_regs *regs, long error_code)
 | |
|  }
 | |
|  
 | |
|  #ifdef CONFIG_X86_32
 | |
| +/*
 | |
| + * The fixup code for errors in iret jumps to here (iret_exc). It loses
 | |
| + * the original trap number and erorr code. The bogus trap 32 and error
 | |
| + * code 0 are what the vanilla kernel delivers via:
 | |
| + * DO_ERROR_INFO(32, SIGSEGV, "iret exception", iret_error, ILL_BADSTK, 0, 1)
 | |
| + *
 | |
| + * NOTE: Because of the final "1" in the macro we need to enable interrupts.
 | |
| + *
 | |
| + * In case of a general protection fault in the iret instruction, we
 | |
| + * need to check for a lazy CS update for exec-shield.
 | |
| + */
 | |
|  dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
 | |
|  {
 | |
| -	siginfo_t info;
 | |
| +	int ok;
 | |
| +	int cpu;
 | |
| +
 | |
|  	local_irq_enable();
 | |
|  
 | |
| -	info.si_signo = SIGILL;
 | |
| -	info.si_errno = 0;
 | |
| -	info.si_code = ILL_BADSTK;
 | |
| -	info.si_addr = NULL;
 | |
| -	if (notify_die(DIE_TRAP, "iret exception",
 | |
| -			regs, error_code, 32, SIGILL) == NOTIFY_STOP)
 | |
| -		return;
 | |
| -	do_trap(32, SIGILL, "iret exception", regs, error_code, &info);
 | |
| +	cpu = get_cpu();
 | |
| +	ok = check_lazy_exec_limit(cpu, regs, error_code);
 | |
| +	put_cpu();
 | |
| +
 | |
| +	if (!ok && notify_die(DIE_TRAP, "iret exception", regs,
 | |
| +		error_code, 32, SIGSEGV) != NOTIFY_STOP) {
 | |
| +			siginfo_t info;
 | |
| +			info.si_signo = SIGSEGV;
 | |
| +			info.si_errno = 0;
 | |
| +			info.si_code = ILL_BADSTK;
 | |
| +			info.si_addr = 0;
 | |
| +			do_trap(32, SIGSEGV, "iret exception", regs, error_code, &info);
 | |
| +	}
 | |
|  }
 | |
|  #endif
 | |
|  
 | |
| --- a/arch/x86/mm/setup_nx.c
 | |
| +++ b/arch/x86/mm/setup_nx.c
 | |
| @@ -1,3 +1,4 @@
 | |
| +#include <linux/sched.h>
 | |
|  #include <linux/spinlock.h>
 | |
|  #include <linux/errno.h>
 | |
|  #include <linux/init.h>
 | |
| @@ -6,7 +6,7 @@
 | |
|  #include <asm/pgtable.h>
 | |
|  #include <asm/proto.h>
 | |
| 
 | |
| -static int disable_nx __cpuinitdata;
 | |
| +int disable_nx __cpuinitdata;
 | |
| 
 | |
|  /*
 | |
|   * noexec = on|off
 | |
| @@ -40,6 +42,10 @@ void __cpuinit x86_configure_nx(void)
 | |
|  void __init x86_report_nx(void)
 | |
|  {
 | |
|  	if (!cpu_has_nx) {
 | |
| +		if (disable_nx)
 | |
| +			printk(KERN_INFO "Using x86 segment limits to approximate NX protection\n");
 | |
| +		else
 | |
| +
 | |
|  		printk(KERN_NOTICE "Notice: NX (Execute Disable) protection "
 | |
|  		       "missing in CPU or disabled in BIOS!\n");
 | |
|  	} else {
 | |
| --- a/arch/x86/mm/tlb.c
 | |
| +++ b/arch/x86/mm/tlb.c
 | |
| @@ -6,6 +6,7 @@
 | |
|  #include <linux/interrupt.h>
 | |
|  #include <linux/module.h>
 | |
|  
 | |
| +#include <asm/desc.h>
 | |
|  #include <asm/tlbflush.h>
 | |
|  #include <asm/mmu_context.h>
 | |
|  #include <asm/cache.h>
 | |
| @@ -131,6 +132,12 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
 | |
|  	union smp_flush_state *f;
 | |
|  
 | |
|  	cpu = smp_processor_id();
 | |
| +
 | |
| +#ifdef CONFIG_X86_32
 | |
| +	if (current->active_mm)
 | |
| +		load_user_cs_desc(cpu, current->active_mm);
 | |
| +#endif
 | |
| +
 | |
|  	/*
 | |
|  	 * orig_rax contains the negated interrupt vector.
 | |
|  	 * Use that to determine where the sender put the data.
 | |
| --- a/arch/x86/xen/enlighten.c
 | |
| +++ b/arch/x86/xen/enlighten.c
 | |
| @@ -335,6 +335,24 @@ static void xen_set_ldt(const void *addr, unsigned entries)
 | |
|  	xen_mc_issue(PARAVIRT_LAZY_CPU);
 | |
|  }
 | |
|  
 | |
| +#ifdef CONFIG_X86_32
 | |
| +static void xen_load_user_cs_desc(int cpu, struct mm_struct *mm)
 | |
| +{
 | |
| +	void *gdt;
 | |
| +	xmaddr_t mgdt;
 | |
| +	u64 descriptor;
 | |
| +	struct desc_struct user_cs;
 | |
| +
 | |
| +	gdt = &get_cpu_gdt_table(cpu)[GDT_ENTRY_DEFAULT_USER_CS];
 | |
| +	mgdt = virt_to_machine(gdt);
 | |
| +
 | |
| +	user_cs = mm->context.user_cs;
 | |
| +	descriptor = (u64) user_cs.a | ((u64) user_cs.b) << 32;
 | |
| +
 | |
| +	HYPERVISOR_update_descriptor(mgdt.maddr, descriptor);
 | |
| +}
 | |
| +#endif /*CONFIG_X86_32*/
 | |
| +
 | |
|  static void xen_load_gdt(const struct desc_ptr *dtr)
 | |
|  {
 | |
|  	unsigned long va = dtr->address;
 | |
| @@ -961,6 +979,9 @@ static const struct pv_cpu_ops xen_cpu_ops __initdata = {
 | |
|  
 | |
|  	.load_tr_desc = paravirt_nop,
 | |
|  	.set_ldt = xen_set_ldt,
 | |
| +#ifdef CONFIG_X86_32
 | |
| +	.load_user_cs_desc = xen_load_user_cs_desc,
 | |
| +#endif /*CONFIG_X86_32*/
 | |
|  	.load_gdt = xen_load_gdt,
 | |
|  	.load_idt = xen_load_idt,
 | |
|  	.load_tls = xen_load_tls,
 | |
| --- a/fs/binfmt_elf.c
 | |
| +++ b/fs/binfmt_elf.c
 | |
| @@ -717,6 +722,15 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs)
 | |
|  	if (retval)
 | |
|  		goto out_free_dentry;
 | |
|  
 | |
| +#ifdef CONFIG_X86_32
 | |
| +	/*
 | |
| +	 * Turn off the CS limit completely if exec-shield disabled or
 | |
| +	 * NX active:
 | |
| +	 */
 | |
| +	if (disable_nx || executable_stack != EXSTACK_DISABLE_X || (__supported_pte_mask & _PAGE_NX))
 | |
| +		arch_add_exec_range(current->mm, -1);
 | |
| +#endif
 | |
| +
 | |
|  	/* OK, This is the point of no return */
 | |
|  	current->flags &= ~PF_FORKNOEXEC;
 | |
|  	current->mm->def_flags = def_flags;
 | |
| --- a/include/linux/sched.h
 | |
| +++ b/include/linux/sched.h
 | |
| @@ -101,6 +101,9 @@ struct bio_list;
 | |
|  struct fs_struct;
 | |
|  struct perf_event_context;
 | |
|  
 | |
| +extern int disable_nx;
 | |
| +extern int print_fatal_signals;
 | |
| +
 | |
|  /*
 | |
|   * List of flags we want to share for kernel threads,
 | |
|   * if only because they are not used by them anyway.
 | |
| --- b/mm/mmap.c
 | |
| +++ b/mm/mmap.c
 | |
| @@ -44,6 +45,18 @@
 | |
|  #define arch_rebalance_pgtables(addr, len)		(addr)
 | |
|  #endif
 | |
|  
 | |
| +/* No sane architecture will #define these to anything else */
 | |
| +#ifndef arch_add_exec_range
 | |
| +#define arch_add_exec_range(mm, limit)	do { ; } while (0)
 | |
| +#endif
 | |
| +#ifndef arch_flush_exec_range
 | |
| +#define arch_flush_exec_range(mm)	do { ; } while (0)
 | |
| +#endif
 | |
| +#ifndef arch_remove_exec_range
 | |
| +#define arch_remove_exec_range(mm, limit)	do { ; } while (0)
 | |
| +#endif
 | |
| +
 | |
| +
 | |
|  static void unmap_region(struct mm_struct *mm,
 | |
|  		struct vm_area_struct *vma, struct vm_area_struct *prev,
 | |
|  		unsigned long start, unsigned long end);
 | |
| @@ -388,6 +401,9 @@
 | |
|  {
 | |
|  	struct vm_area_struct *next;
 | |
|  
 | |
| +	if (vma->vm_flags & VM_EXEC)
 | |
| +		arch_add_exec_range(mm, vma->vm_end);
 | |
| +
 | |
|  	vma->vm_prev = prev;
 | |
|  	if (prev) {
 | |
|  		next = prev->vm_next;
 | |
| @@ -489,6 +504,8 @@
 | |
|  	rb_erase(&vma->vm_rb, &mm->mm_rb);
 | |
|  	if (mm->mmap_cache == vma)
 | |
|  		mm->mmap_cache = prev;
 | |
| +	if (vma->vm_flags & VM_EXEC)
 | |
| +		arch_remove_exec_range(mm, vma->vm_end);
 | |
|  }
 | |
|  
 | |
|  /*
 | |
| @@ -790,6 +807,8 @@
 | |
|  		} else					/* cases 2, 5, 7 */
 | |
|  			err = vma_adjust(prev, prev->vm_start,
 | |
|  				end, prev->vm_pgoff, NULL);
 | |
| +		if (prev->vm_flags & VM_EXEC)
 | |
| +			arch_add_exec_range(mm, prev->vm_end);
 | |
|  		if (err)
 | |
|  			return NULL;
 | |
|  		return prev;
 | |
| @@ -1966,10 +2075,14 @@
 | |
|  	if (new->vm_ops && new->vm_ops->open)
 | |
|  		new->vm_ops->open(new);
 | |
|  
 | |
| -	if (new_below)
 | |
| +	if (new_below) {
 | |
| +		unsigned long old_end = vma->vm_end;
 | |
| +
 | |
|  		err = vma_adjust(vma, addr, vma->vm_end, vma->vm_pgoff +
 | |
|  			((addr - new->vm_start) >> PAGE_SHIFT), new);
 | |
| -	else
 | |
| +		if (vma->vm_flags & VM_EXEC)
 | |
| +			arch_remove_exec_range(mm, old_end);
 | |
| +	} else
 | |
|  		err = vma_adjust(vma, vma->vm_start, addr, vma->vm_pgoff, new);
 | |
|  
 | |
|  	/* Success. */
 | |
| @@ -2254,6 +2367,7 @@
 | |
|  
 | |
|  	free_pgtables(tlb, vma, FIRST_USER_ADDRESS, 0);
 | |
|  	tlb_finish_mmu(tlb, 0, end);
 | |
| +	arch_flush_exec_range(mm);
 | |
|  
 | |
|  	/*
 | |
|  	 * Walk the list again, actually closing and freeing it,
 | |
| --- a/mm/mprotect.c
 | |
| +++ b/mm/mprotect.c
 | |
| @@ -25,9 +25,14 @@
 | |
|  #include <linux/perf_event.h>
 | |
|  #include <asm/uaccess.h>
 | |
|  #include <asm/pgtable.h>
 | |
| +#include <asm/pgalloc.h>
 | |
|  #include <asm/cacheflush.h>
 | |
|  #include <asm/tlbflush.h>
 | |
|  
 | |
| +#ifndef arch_remove_exec_range
 | |
| +#define arch_remove_exec_range(mm, limit)      do { ; } while (0)
 | |
| +#endif
 | |
| +
 | |
|  #ifndef pgprot_modify
 | |
|  static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
 | |
|  {
 | |
| @@ -138,7 +143,7 @@ mprotect_fixup(struct vm_area_struct *vma, struct vm_area_struct **pprev,
 | |
|  	struct mm_struct *mm = vma->vm_mm;
 | |
|  	unsigned long oldflags = vma->vm_flags;
 | |
|  	long nrpages = (end - start) >> PAGE_SHIFT;
 | |
| -	unsigned long charged = 0;
 | |
| +	unsigned long charged = 0, old_end = vma->vm_end;
 | |
|  	pgoff_t pgoff;
 | |
|  	int error;
 | |
|  	int dirty_accountable = 0;
 | |
| @@ -203,6 +208,9 @@ success:
 | |
|  		dirty_accountable = 1;
 | |
|  	}
 | |
|  
 | |
| +	if (oldflags & VM_EXEC)
 | |
| +		arch_remove_exec_range(current->mm, old_end);
 | |
| +
 | |
|  	mmu_notifier_invalidate_range_start(mm, start, end);
 | |
|  	if (is_vm_hugetlb_page(vma))
 | |
|  		hugetlb_change_protection(vma, start, end, vma->vm_page_prot);
 | |
| diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
 | |
| index 57d1868..29c0c35 100644
 | |
| --- a/arch/x86/kernel/process.c
 | |
| +++ b/arch/x86/kernel/process.c
 | |
| @@ -669,6 +669,16 @@ unsigned long arch_align_stack(unsigned long sp)
 | |
|  unsigned long arch_randomize_brk(struct mm_struct *mm)
 | |
|  {
 | |
|  	unsigned long range_end = mm->brk + 0x02000000;
 | |
| -	return randomize_range(mm->brk, range_end, 0) ? : mm->brk;
 | |
| +	unsigned long bump = 0;
 | |
| +#ifdef CONFIG_X86_32
 | |
| +	/* in the case of NX emulation, shove the brk segment way out of the
 | |
| +	   way of the exec randomization area, since it can collide with
 | |
| +	   future allocations if not. */
 | |
| +	if ( (mm->get_unmapped_exec_area == arch_get_unmapped_exec_area) &&
 | |
| +	     (mm->brk < 0x08000000) ) {
 | |
| +		bump = (TASK_SIZE/6);
 | |
| +	}
 | |
| +#endif
 | |
| +	return bump + (randomize_range(mm->brk, range_end, 0) ? : mm->brk);
 | |
|  }
 | |
|  
 |