diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index 7611b10a2d238c7b4bb73696b59e2fe8b14ceb2c..0b10ef2a43726e0188b61de96bd5a5bf1ba50067 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -48,4 +48,5 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index f61f2dd67464746c728474a7b5503dd7dfbdcb67..241b9b9729d821510fb2addde4b276e73e6185a7 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -20,4 +20,5 @@ generic-y += sections.h
 generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 61cd1e786a142c440caa231a665349ed3d8f8e01..91d49c0a31185041055e627689ec09b2ab708bd1 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -46,4 +46,5 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index f17c4dc6050c7d23ade635f749aa75eed8f0ac1d..945544ec603ee12408928c0abd4db19cfbc784d9 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -59,4 +59,5 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += user.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index b7f68192d15b52cb4e6c34c78eac88a02fa971d5..1778805f63809d378a5cafb6c920b517ea753c7b 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -43,4 +43,5 @@ generic-y += topology.h
 generic-y += trace_clock.h
 generic-y += types.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 8e47b832cc7684af7a10d2a0680651cf5bf8cc0d..1fa084cf1a4398934889658b8b21f66154bf0ab6 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -7,3 +7,4 @@ generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index daee37bd09991a3edf732451587e59a8db001d54..db8ddabc6bd2819ba579c4c407a569e2232daa51 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -58,4 +58,5 @@ generic-y += types.h
 generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 9de3ba12f6b97c0f8722e2f929c70fbae429fd34..502a91d8dbbd80df039d9a111de04c1caadf0e3c 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -8,3 +8,4 @@ generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
 generic-y += vtime.h
+generic-y += word-at-a-time.h
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index e0eb704ca1fa93755d678e82c2336c8e188a34f0..fd104bd221ced1dff2c9485bdcb1be520171df7f 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -9,3 +9,4 @@ generic-y += module.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index df31353fd2001dc0e357feafcf975aa6e5537622..29acb89daaaa55f2b1640c3d751233ed034b43b7 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -54,4 +54,5 @@ generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 2f222f355c4bbc69842ccd62f3419b0cbd1732a4..b0ae88c9fed922a4ba95be0498f421d3927eb40e 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -10,3 +10,4 @@ generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += syscalls.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 40ec4ca3f946a9238afa6e0edd25c0440f86a36b..c7fe4d01e79c61bbaee58aa25b9b50bf5b9a380e 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -17,4 +17,5 @@ generic-y += segment.h
 generic-y += serial.h
 generic-y += trace_clock.h
 generic-y += user.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 6edb9ee6128ebc4d45de622afdf118f0bf5d12ee..1c8dd0f5cd5d1567126f42b67112b5b0f91962a8 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -9,3 +9,4 @@ generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += sections.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild
index 914864eb5a25daf87688f1565b33700b289be0bf..d63330e88379dcc591e29e645b5363f063122f9d 100644
--- a/arch/nios2/include/asm/Kbuild
+++ b/arch/nios2/include/asm/Kbuild
@@ -61,4 +61,5 @@ generic-y += types.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index ab9f4e0ed4cfcfd48a8d232fe20d0482739a22c5..ac1662956e0c4d4dfe8a0a7fff432e9f2a592f2c 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -7,3 +7,4 @@ generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += rwsem.h
 generic-y += vtime.h
+generic-y += word-at-a-time.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 5ad26dd94d77e83fedeba5c7f71c8eba0ff2ab29..9043d2e1e2ae0b3c01a7b6588bed848f44dd92ff 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -6,3 +6,4 @@ generic-y += mcs_spinlock.h
 generic-y += mm-arch-hooks.h
 generic-y += preempt.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 92ffe397b893c553c8504f10525a5e3d9d1e9e34..a05218ff3fe465b6e4812d7655360dc1b495a519 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -13,3 +13,4 @@ generic-y += sections.h
 generic-y += trace_clock.h
 generic-y += xor.h
 generic-y += serial.h
+generic-y += word-at-a-time.h
diff --git a/arch/tile/gxio/mpipe.c b/arch/tile/gxio/mpipe.c
index ee186e13dfe6fde92c9127aa07dccc474d1253d2..f102048d9c0e78a31b6773715a491e85e755a89c 100644
--- a/arch/tile/gxio/mpipe.c
+++ b/arch/tile/gxio/mpipe.c
@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/module.h>
+#include <linux/string.h>
 
 #include <gxio/iorpc_globals.h>
 #include <gxio/iorpc_mpipe.h>
@@ -29,32 +30,6 @@
 /* HACK: Avoid pointless "shadow" warnings. */
 #define link link_shadow
 
-/**
- * strscpy - Copy a C-string into a sized buffer, but only if it fits
- * @dest: Where to copy the string to
- * @src: Where to copy the string from
- * @size: size of destination buffer
- *
- * Use this routine to avoid copying too-long strings.
- * The routine returns the total number of bytes copied
- * (including the trailing NUL) or zero if the buffer wasn't
- * big enough.  To ensure that programmers pay attention
- * to the return code, the destination has a single NUL
- * written at the front (if size is non-zero) when the
- * buffer is not big enough.
- */
-static size_t strscpy(char *dest, const char *src, size_t size)
-{
-	size_t len = strnlen(src, size) + 1;
-	if (len > size) {
-		if (size)
-			dest[0] = '\0';
-		return 0;
-	}
-	memcpy(dest, src, len);
-	return len;
-}
-
 int gxio_mpipe_init(gxio_mpipe_context_t *context, unsigned int mpipe_index)
 {
 	char file[32];
@@ -540,7 +515,7 @@ int gxio_mpipe_link_instance(const char *link_name)
 	if (!context)
 		return GXIO_ERR_NO_DEVICE;
 
-	if (strscpy(name.name, link_name, sizeof(name.name)) == 0)
+	if (strscpy(name.name, link_name, sizeof(name.name)) < 0)
 		return GXIO_ERR_NO_DEVICE;
 
 	return gxio_mpipe_info_instance_aux(context, name);
@@ -559,7 +534,7 @@ int gxio_mpipe_link_enumerate_mac(int idx, char *link_name, uint8_t *link_mac)
 
 	rv = gxio_mpipe_info_enumerate_aux(context, idx, &name, &mac);
 	if (rv >= 0) {
-		if (strscpy(link_name, name.name, sizeof(name.name)) == 0)
+		if (strscpy(link_name, name.name, sizeof(name.name)) < 0)
 			return GXIO_ERR_INVAL_MEMORY_SIZE;
 		memcpy(link_mac, mac.mac, sizeof(mac.mac));
 	}
@@ -576,7 +551,7 @@ int gxio_mpipe_link_open(gxio_mpipe_link_t *link,
 	_gxio_mpipe_link_name_t name;
 	int rv;
 
-	if (strscpy(name.name, link_name, sizeof(name.name)) == 0)
+	if (strscpy(name.name, link_name, sizeof(name.name)) < 0)
 		return GXIO_ERR_NO_DEVICE;
 
 	rv = gxio_mpipe_link_open_aux(context, name, flags);
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index ba35c41c71fff33b2b2fe95f566ad8b3dc192c32..0b6cacaad9333a4165bfd9447a180feadedb4db4 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -40,4 +40,5 @@ generic-y += termbits.h
 generic-y += termios.h
 generic-y += trace_clock.h
 generic-y += types.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 149ec55f9c46abd97cbb9b69c7a55afa23e23393..904f3ebf4220153f816a1deca118381190f44ec4 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -25,4 +25,5 @@ generic-y += preempt.h
 generic-y += switch_to.h
 generic-y += topology.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 1fc7a286dc6f342319ec06a81b53a087b9708ef9..256c45b3ae343c983e667b01404d8fb3e3667b4a 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -62,4 +62,5 @@ generic-y += ucontext.h
 generic-y += unaligned.h
 generic-y += user.h
 generic-y += vga.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 63c223dff5f1eebed92297d2cd641535a3aeceb9..b56855a1382a374f8c52632b9aa243a1112a1a06 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -28,4 +28,5 @@ generic-y += statfs.h
 generic-y += termios.h
 generic-y += topology.h
 generic-y += trace_clock.h
+generic-y += word-at-a-time.h
 generic-y += xor.h
diff --git a/include/asm-generic/word-at-a-time.h b/include/asm-generic/word-at-a-time.h
index 94f9ea8abcae35af8ca36560403fbd25facb7c65..011dde083f231e763e4c96fcc7fb3cb9b6ce23c7 100644
--- a/include/asm-generic/word-at-a-time.h
+++ b/include/asm-generic/word-at-a-time.h
@@ -1,15 +1,10 @@
 #ifndef _ASM_WORD_AT_A_TIME_H
 #define _ASM_WORD_AT_A_TIME_H
 
-/*
- * This says "generic", but it's actually big-endian only.
- * Little-endian can use more efficient versions of these
- * interfaces, see for example
- *	 arch/x86/include/asm/word-at-a-time.h
- * for those.
- */
-
 #include <linux/kernel.h>
+#include <asm/byteorder.h>
+
+#ifdef __BIG_ENDIAN
 
 struct word_at_a_time {
 	const unsigned long high_bits, low_bits;
@@ -53,4 +48,73 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct
 #define zero_bytemask(mask) (~1ul << __fls(mask))
 #endif
 
+#else
+
+/*
+ * The optimal byte mask counting is probably going to be something
+ * that is architecture-specific. If you have a reliably fast
+ * bit count instruction, that might be better than the multiply
+ * and shift, for example.
+ */
+struct word_at_a_time {
+	const unsigned long one_bits, high_bits;
+};
+
+#define WORD_AT_A_TIME_CONSTANTS { REPEAT_BYTE(0x01), REPEAT_BYTE(0x80) }
+
+#ifdef CONFIG_64BIT
+
+/*
+ * Jan Achrenius on G+: microoptimized version of
+ * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56"
+ * that works for the bytemasks without having to
+ * mask them first.
+ */
+static inline long count_masked_bytes(unsigned long mask)
+{
+	return mask*0x0001020304050608ul >> 56;
+}
+
+#else	/* 32-bit case */
+
+/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
+static inline long count_masked_bytes(long mask)
+{
+	/* (000000 0000ff 00ffff ffffff) -> ( 1 1 2 3 ) */
+	long a = (0x0ff0001+mask) >> 23;
+	/* Fix the 1 for 00 case */
+	return a & mask;
+}
+
+#endif
+
+/* Return nonzero if it has a zero */
+static inline unsigned long has_zero(unsigned long a, unsigned long *bits, const struct word_at_a_time *c)
+{
+	unsigned long mask = ((a - c->one_bits) & ~a) & c->high_bits;
+	*bits = mask;
+	return mask;
+}
+
+static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits, const struct word_at_a_time *c)
+{
+	return bits;
+}
+
+static inline unsigned long create_zero_mask(unsigned long bits)
+{
+	bits = (bits - 1) & ~bits;
+	return bits >> 7;
+}
+
+/* The mask we created is directly usable as a bytemask */
+#define zero_bytemask(mask) (mask)
+
+static inline unsigned long find_zero(unsigned long mask)
+{
+	return count_masked_bytes(mask);
+}
+
+#endif /* __BIG_ENDIAN */
+
 #endif /* _ASM_WORD_AT_A_TIME_H */
diff --git a/include/linux/string.h b/include/linux/string.h
index a8d90db9c4b058626b2d5ddcc84f3fa5d16561aa..9ef7795e65e40c5dfbee53726909fbcb2ce341b0 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -25,6 +25,9 @@ extern char * strncpy(char *,const char *, __kernel_size_t);
 #ifndef __HAVE_ARCH_STRLCPY
 size_t strlcpy(char *, const char *, size_t);
 #endif
+#ifndef __HAVE_ARCH_STRSCPY
+ssize_t __must_check strscpy(char *, const char *, size_t);
+#endif
 #ifndef __HAVE_ARCH_STRCAT
 extern char * strcat(char *, const char *);
 #endif
diff --git a/lib/string.c b/lib/string.c
index 13d1e84ddb80e983a325011fb8dd8b6ff9ce0600..8dbb7b1eab508712da538d0ef7cadc0e24b6c723 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -27,6 +27,10 @@
 #include <linux/bug.h>
 #include <linux/errno.h>
 
+#include <asm/byteorder.h>
+#include <asm/word-at-a-time.h>
+#include <asm/page.h>
+
 #ifndef __HAVE_ARCH_STRNCASECMP
 /**
  * strncasecmp - Case insensitive, length-limited string comparison
@@ -146,6 +150,90 @@ size_t strlcpy(char *dest, const char *src, size_t size)
 EXPORT_SYMBOL(strlcpy);
 #endif
 
+#ifndef __HAVE_ARCH_STRSCPY
+/**
+ * strscpy - Copy a C-string into a sized buffer
+ * @dest: Where to copy the string to
+ * @src: Where to copy the string from
+ * @count: Size of destination buffer
+ *
+ * Copy the string, or as much of it as fits, into the dest buffer.
+ * The routine returns the number of characters copied (not including
+ * the trailing NUL) or -E2BIG if the destination buffer wasn't big enough.
+ * The behavior is undefined if the string buffers overlap.
+ * The destination buffer is always NUL terminated, unless it's zero-sized.
+ *
+ * Preferred to strlcpy() since the API doesn't require reading memory
+ * from the src string beyond the specified "count" bytes, and since
+ * the return value is easier to error-check than strlcpy()'s.
+ * In addition, the implementation is robust to the string changing out
+ * from underneath it, unlike the current strlcpy() implementation.
+ *
+ * Preferred to strncpy() since it always returns a valid string, and
+ * doesn't unnecessarily force the tail of the destination buffer to be
+ * zeroed.  If the zeroing is desired, it's likely cleaner to use strscpy()
+ * with an overflow test, then just memset() the tail of the dest buffer.
+ */
+ssize_t strscpy(char *dest, const char *src, size_t count)
+{
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
+	size_t max = count;
+	long res = 0;
+
+	if (count == 0)
+		return -E2BIG;
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+	/*
+	 * If src is unaligned, don't cross a page boundary,
+	 * since we don't know if the next page is mapped.
+	 */
+	if ((long)src & (sizeof(long) - 1)) {
+		size_t limit = PAGE_SIZE - ((long)src & (PAGE_SIZE - 1));
+		if (limit < max)
+			max = limit;
+	}
+#else
+	/* If src or dest is unaligned, don't do word-at-a-time. */
+	if (((long) dest | (long) src) & (sizeof(long) - 1))
+		max = 0;
+#endif
+
+	while (max >= sizeof(unsigned long)) {
+		unsigned long c, data;
+
+		c = *(unsigned long *)(src+res);
+		*(unsigned long *)(dest+res) = c;
+		if (has_zero(c, &data, &constants)) {
+			data = prep_zero_mask(c, data, &constants);
+			data = create_zero_mask(data);
+			return res + find_zero(data);
+		}
+		res += sizeof(unsigned long);
+		count -= sizeof(unsigned long);
+		max -= sizeof(unsigned long);
+	}
+
+	while (count) {
+		char c;
+
+		c = src[res];
+		dest[res] = c;
+		if (!c)
+			return res;
+		res++;
+		count--;
+	}
+
+	/* Hit buffer length without finding a NUL; force NUL-termination. */
+	if (res)
+		dest[res-1] = '\0';
+
+	return -E2BIG;
+}
+EXPORT_SYMBOL(strscpy);
+#endif
+
 #ifndef __HAVE_ARCH_STRCAT
 /**
  * strcat - Append one %NUL-terminated string to another