diff --git a/arch/arm/include/asm/uaccess.h b/arch/arm/include/asm/uaccess.h
index 8cc85a4ebec20a1223f966bb868e5def5f70928e..35c9db857ebe9c7d53715ec42518a6e9fbe1dc6e 100644
--- a/arch/arm/include/asm/uaccess.h
+++ b/arch/arm/include/asm/uaccess.h
@@ -510,10 +510,14 @@ __copy_to_user_std(void __user *to, const void *from, unsigned long n);
 static inline unsigned long __must_check
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
+#ifndef CONFIG_UACCESS_WITH_MEMCPY
 	unsigned int __ua_flags = uaccess_save_and_enable();
 	n = arm_copy_to_user(to, from, n);
 	uaccess_restore(__ua_flags);
 	return n;
+#else
+	return arm_copy_to_user(to, from, n);
+#endif
 }
 
 extern unsigned long __must_check
diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c
index 7a7c4cea55231b1c793982ab63d6c5598004b14b..4adfb46e3ee93276ea506b723eb11e33c57459f4 100644
--- a/arch/arm/kernel/process.c
+++ b/arch/arm/kernel/process.c
@@ -95,6 +95,22 @@ void __show_regs(struct pt_regs *regs)
 {
 	unsigned long flags;
 	char buf[64];
+#ifndef CONFIG_CPU_V7M
+	unsigned int domain;
+#ifdef CONFIG_CPU_SW_DOMAIN_PAN
+	/*
+	 * Get the domain register for the parent context. In user
+	 * mode, we don't save the DACR, so lets use what it should
+	 * be. For other modes, we place it after the pt_regs struct.
+	 */
+	if (user_mode(regs))
+		domain = DACR_UACCESS_ENABLE;
+	else
+		domain = *(unsigned int *)(regs + 1);
+#else
+	domain = get_domain();
+#endif
+#endif
 
 	show_regs_print_info(KERN_DEFAULT);
 
@@ -123,21 +139,8 @@ void __show_regs(struct pt_regs *regs)
 
 #ifndef CONFIG_CPU_V7M
 	{
-		unsigned int domain = get_domain();
 		const char *segment;
 
-#ifdef CONFIG_CPU_SW_DOMAIN_PAN
-		/*
-		 * Get the domain register for the parent context. In user
-		 * mode, we don't save the DACR, so lets use what it should
-		 * be. For other modes, we place it after the pt_regs struct.
-		 */
-		if (user_mode(regs))
-			domain = DACR_UACCESS_ENABLE;
-		else
-			domain = *(unsigned int *)(regs + 1);
-#endif
-
 		if ((domain & domain_mask(DOMAIN_USER)) ==
 		    domain_val(DOMAIN_USER, DOMAIN_NOACCESS))
 			segment = "none";
@@ -163,11 +166,11 @@ void __show_regs(struct pt_regs *regs)
 		buf[0] = '\0';
 #ifdef CONFIG_CPU_CP15_MMU
 		{
-			unsigned int transbase, dac = get_domain();
+			unsigned int transbase;
 			asm("mrc p15, 0, %0, c2, c0\n\t"
 			    : "=r" (transbase));
 			snprintf(buf, sizeof(buf), "  Table: %08x  DAC: %08x",
-			  	transbase, dac);
+				transbase, domain);
 		}
 #endif
 		asm("mrc p15, 0, %0, c1, c0\n" : "=r" (ctrl));
diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c
index 5b26e7efa9ea415967b63ede27ab1edf2bc8e888..c3fe769d75584c7248015b855bca904589f3e29f 100644
--- a/arch/arm/kernel/swp_emulate.c
+++ b/arch/arm/kernel/swp_emulate.c
@@ -36,10 +36,10 @@
  */
 #define __user_swpX_asm(data, addr, res, temp, B)		\
 	__asm__ __volatile__(					\
-	"	mov		%2, %1\n"			\
-	"0:	ldrex"B"	%1, [%3]\n"			\
-	"1:	strex"B"	%0, %2, [%3]\n"			\
+	"0:	ldrex"B"	%2, [%3]\n"			\
+	"1:	strex"B"	%0, %1, [%3]\n"			\
 	"	cmp		%0, #0\n"			\
+	"	moveq		%1, %2\n"			\
 	"	movne		%0, %4\n"			\
 	"2:\n"							\
 	"	.section	 .text.fixup,\"ax\"\n"		\
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
index d72b90905132487257220939a255ac7ed1d3754d..588bbc288396ae52fe0d801bc537906b3e5dbfce 100644
--- a/arch/arm/lib/uaccess_with_memcpy.c
+++ b/arch/arm/lib/uaccess_with_memcpy.c
@@ -88,6 +88,7 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
 static unsigned long noinline
 __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
 {
+	unsigned long ua_flags;
 	int atomic;
 
 	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
@@ -118,7 +119,9 @@ __copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
 		if (tocopy > n)
 			tocopy = n;
 
+		ua_flags = uaccess_save_and_enable();
 		memcpy((void *)to, from, tocopy);
+		uaccess_restore(ua_flags);
 		to += tocopy;
 		from += tocopy;
 		n -= tocopy;
@@ -145,14 +148,21 @@ arm_copy_to_user(void __user *to, const void *from, unsigned long n)
 	 * With frame pointer disabled, tail call optimization kicks in
 	 * as well making this test almost invisible.
 	 */
-	if (n < 64)
-		return __copy_to_user_std(to, from, n);
-	return __copy_to_user_memcpy(to, from, n);
+	if (n < 64) {
+		unsigned long ua_flags = uaccess_save_and_enable();
+		n = __copy_to_user_std(to, from, n);
+		uaccess_restore(ua_flags);
+	} else {
+		n = __copy_to_user_memcpy(to, from, n);
+	}
+	return n;
 }
 	
 static unsigned long noinline
 __clear_user_memset(void __user *addr, unsigned long n)
 {
+	unsigned long ua_flags;
+
 	if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
 		memset((void *)addr, 0, n);
 		return 0;
@@ -175,7 +185,9 @@ __clear_user_memset(void __user *addr, unsigned long n)
 		if (tocopy > n)
 			tocopy = n;
 
+		ua_flags = uaccess_save_and_enable();
 		memset((void *)addr, 0, tocopy);
+		uaccess_restore(ua_flags);
 		addr += tocopy;
 		n -= tocopy;
 
@@ -193,9 +205,14 @@ __clear_user_memset(void __user *addr, unsigned long n)
 unsigned long arm_clear_user(void __user *addr, unsigned long n)
 {
 	/* See rational for this in __copy_to_user() above. */
-	if (n < 64)
-		return __clear_user_std(addr, n);
-	return __clear_user_memset(addr, n);
+	if (n < 64) {
+		unsigned long ua_flags = uaccess_save_and_enable();
+		n = __clear_user_std(addr, n);
+		uaccess_restore(ua_flags);
+	} else {
+		n = __clear_user_memset(addr, n);
+	}
+	return n;
 }
 
 #if 0
diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c
index 845769e413323120b6d7b4afed7640746413bc98..c8c8b9ed02e09b57a79bee1ab5db9c337be0d338 100644
--- a/arch/arm/mm/context.c
+++ b/arch/arm/mm/context.c
@@ -165,13 +165,28 @@ static void flush_context(unsigned int cpu)
 		__flush_icache_all();
 }
 
-static int is_reserved_asid(u64 asid)
+static bool check_update_reserved_asid(u64 asid, u64 newasid)
 {
 	int cpu;
-	for_each_possible_cpu(cpu)
-		if (per_cpu(reserved_asids, cpu) == asid)
-			return 1;
-	return 0;
+	bool hit = false;
+
+	/*
+	 * Iterate over the set of reserved ASIDs looking for a match.
+	 * If we find one, then we can update our mm to use newasid
+	 * (i.e. the same ASID in the current generation) but we can't
+	 * exit the loop early, since we need to ensure that all copies
+	 * of the old ASID are updated to reflect the mm. Failure to do
+	 * so could result in us missing the reserved ASID in a future
+	 * generation.
+	 */
+	for_each_possible_cpu(cpu) {
+		if (per_cpu(reserved_asids, cpu) == asid) {
+			hit = true;
+			per_cpu(reserved_asids, cpu) = newasid;
+		}
+	}
+
+	return hit;
 }
 
 static u64 new_context(struct mm_struct *mm, unsigned int cpu)
@@ -181,12 +196,14 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 	u64 generation = atomic64_read(&asid_generation);
 
 	if (asid != 0) {
+		u64 newasid = generation | (asid & ~ASID_MASK);
+
 		/*
 		 * If our current ASID was active during a rollover, we
 		 * can continue to use it and this was just a false alarm.
 		 */
-		if (is_reserved_asid(asid))
-			return generation | (asid & ~ASID_MASK);
+		if (check_update_reserved_asid(asid, newasid))
+			return newasid;
 
 		/*
 		 * We had a valid ASID in a previous life, so try to re-use
@@ -194,7 +211,7 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 		 */
 		asid &= ~ASID_MASK;
 		if (!__test_and_set_bit(asid, asid_map))
-			goto bump_gen;
+			return newasid;
 	}
 
 	/*
@@ -216,11 +233,8 @@ static u64 new_context(struct mm_struct *mm, unsigned int cpu)
 
 	__set_bit(asid, asid_map);
 	cur_idx = asid;
-
-bump_gen:
-	asid |= generation;
 	cpumask_clear(mm_cpumask(mm));
-	return asid;
+	return asid | generation;
 }
 
 void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk)
diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 8a63b4cdc0f27d0fa7cf69f0bb1ffa51b6289e39..7f8cd1b3557f071a64dafc05eba4c481ef82c79d 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -22,6 +22,7 @@
 #include <linux/memblock.h>
 #include <linux/dma-contiguous.h>
 #include <linux/sizes.h>
+#include <linux/stop_machine.h>
 
 #include <asm/cp15.h>
 #include <asm/mach-types.h>
@@ -627,12 +628,10 @@ static struct section_perm ro_perms[] = {
  * safe to be called with preemption disabled, as under stop_machine().
  */
 static inline void section_update(unsigned long addr, pmdval_t mask,
-				  pmdval_t prot)
+				  pmdval_t prot, struct mm_struct *mm)
 {
-	struct mm_struct *mm;
 	pmd_t *pmd;
 
-	mm = current->active_mm;
 	pmd = pmd_offset(pud_offset(pgd_offset(mm, addr), addr), addr);
 
 #ifdef CONFIG_ARM_LPAE
@@ -656,49 +655,82 @@ static inline bool arch_has_strict_perms(void)
 	return !!(get_cr() & CR_XP);
 }
 
-#define set_section_perms(perms, field)	{				\
-	size_t i;							\
-	unsigned long addr;						\
-									\
-	if (!arch_has_strict_perms())					\
-		return;							\
-									\
-	for (i = 0; i < ARRAY_SIZE(perms); i++) {			\
-		if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) ||	\
-		    !IS_ALIGNED(perms[i].end, SECTION_SIZE)) {		\
-			pr_err("BUG: section %lx-%lx not aligned to %lx\n", \
-				perms[i].start, perms[i].end,		\
-				SECTION_SIZE);				\
-			continue;					\
-		}							\
-									\
-		for (addr = perms[i].start;				\
-		     addr < perms[i].end;				\
-		     addr += SECTION_SIZE)				\
-			section_update(addr, perms[i].mask,		\
-				       perms[i].field);			\
-	}								\
+void set_section_perms(struct section_perm *perms, int n, bool set,
+			struct mm_struct *mm)
+{
+	size_t i;
+	unsigned long addr;
+
+	if (!arch_has_strict_perms())
+		return;
+
+	for (i = 0; i < n; i++) {
+		if (!IS_ALIGNED(perms[i].start, SECTION_SIZE) ||
+		    !IS_ALIGNED(perms[i].end, SECTION_SIZE)) {
+			pr_err("BUG: section %lx-%lx not aligned to %lx\n",
+				perms[i].start, perms[i].end,
+				SECTION_SIZE);
+			continue;
+		}
+
+		for (addr = perms[i].start;
+		     addr < perms[i].end;
+		     addr += SECTION_SIZE)
+			section_update(addr, perms[i].mask,
+				set ? perms[i].prot : perms[i].clear, mm);
+	}
+
 }
 
-static inline void fix_kernmem_perms(void)
+static void update_sections_early(struct section_perm perms[], int n)
 {
-	set_section_perms(nx_perms, prot);
+	struct task_struct *t, *s;
+
+	read_lock(&tasklist_lock);
+	for_each_process(t) {
+		if (t->flags & PF_KTHREAD)
+			continue;
+		for_each_thread(t, s)
+			set_section_perms(perms, n, true, s->mm);
+	}
+	read_unlock(&tasklist_lock);
+	set_section_perms(perms, n, true, current->active_mm);
+	set_section_perms(perms, n, true, &init_mm);
+}
+
+int __fix_kernmem_perms(void *unused)
+{
+	update_sections_early(nx_perms, ARRAY_SIZE(nx_perms));
+	return 0;
+}
+
+void fix_kernmem_perms(void)
+{
+	stop_machine(__fix_kernmem_perms, NULL, NULL);
 }
 
 #ifdef CONFIG_DEBUG_RODATA
+int __mark_rodata_ro(void *unused)
+{
+	update_sections_early(ro_perms, ARRAY_SIZE(ro_perms));
+	return 0;
+}
+
 void mark_rodata_ro(void)
 {
-	set_section_perms(ro_perms, prot);
+	stop_machine(__mark_rodata_ro, NULL, NULL);
 }
 
 void set_kernel_text_rw(void)
 {
-	set_section_perms(ro_perms, clear);
+	set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), false,
+				current->active_mm);
 }
 
 void set_kernel_text_ro(void)
 {
-	set_section_perms(ro_perms, prot);
+	set_section_perms(ro_perms, ARRAY_SIZE(ro_perms), true,
+				current->active_mm);
 }
 #endif /* CONFIG_DEBUG_RODATA */
 
diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S
index de2b246fed3808fce444b560a725ed4007464174..8e1ea433c3f1e321d7ed4e91fe582a5be4eb85be 100644
--- a/arch/arm/mm/proc-v7.S
+++ b/arch/arm/mm/proc-v7.S
@@ -95,7 +95,7 @@ ENDPROC(cpu_v7_dcache_clean_area)
 .equ	cpu_v7_suspend_size, 4 * 9
 #ifdef CONFIG_ARM_CPU_SUSPEND
 ENTRY(cpu_v7_do_suspend)
-	stmfd	sp!, {r4 - r10, lr}
+	stmfd	sp!, {r4 - r11, lr}
 	mrc	p15, 0, r4, c13, c0, 0	@ FCSE/PID
 	mrc	p15, 0, r5, c13, c0, 3	@ User r/o thread ID
 	stmia	r0!, {r4 - r5}
@@ -112,7 +112,7 @@ ENTRY(cpu_v7_do_suspend)
 	mrc	p15, 0, r9, c1, c0, 1	@ Auxiliary control register
 	mrc	p15, 0, r10, c1, c0, 2	@ Co-processor access control
 	stmia	r0, {r5 - r11}
-	ldmfd	sp!, {r4 - r10, pc}
+	ldmfd	sp!, {r4 - r11, pc}
 ENDPROC(cpu_v7_do_suspend)
 
 ENTRY(cpu_v7_do_resume)