diff --git a/MAINTAINERS b/MAINTAINERS
index 2f595407770843ac836fbf28f7ea264d11c81d01..11b47aefcb78ae54946c7c87a2bf28d37c36b6f3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6186,6 +6186,7 @@ F:	Documentation/staging/crc*
 F:	arch/*/lib/crc*
 F:	include/linux/crc*
 F:	lib/crc*
+F:	scripts/gen-crc-consts.py
 
 CREATIVE SB0540
 M:	Bastien Nocera <hadess@hadess.net>
diff --git a/arch/arm/configs/moxart_defconfig b/arch/arm/configs/moxart_defconfig
index 34d079e03b3c593039af8109a7fafe9c66ff3a8f..fa06d98e43fcddaf33b023c6a9a490e81474be1c 100644
--- a/arch/arm/configs/moxart_defconfig
+++ b/arch/arm/configs/moxart_defconfig
@@ -118,7 +118,6 @@ CONFIG_TMPFS=y
 CONFIG_CONFIGFS_FS=y
 CONFIG_JFFS2_FS=y
 CONFIG_KEYS=y
-CONFIG_CRC32_BIT=y
 CONFIG_DMA_API_DEBUG=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
diff --git a/arch/arm/lib/crc-t10dif-glue.c b/arch/arm/lib/crc-t10dif-glue.c
index d24dee62670ec57b67966edad3c52999117efafa..f3584ba70e57bf8ba71e2891c6e5135842de2e95 100644
--- a/arch/arm/lib/crc-t10dif-glue.c
+++ b/arch/arm/lib/crc-t10dif-glue.c
@@ -69,12 +69,6 @@ static void __exit crc_t10dif_arm_exit(void)
 }
 module_exit(crc_t10dif_arm_exit);
 
-bool crc_t10dif_is_optimized(void)
-{
-	return static_key_enabled(&have_neon);
-}
-EXPORT_SYMBOL(crc_t10dif_is_optimized);
-
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_DESCRIPTION("Accelerated CRC-T10DIF using ARM NEON and Crypto Extensions");
 MODULE_LICENSE("GPL v2");
diff --git a/arch/arm/lib/crc32-glue.c b/arch/arm/lib/crc32-glue.c
index 2c30ba3d80e6a1d22c9a887dd956b9fd5daf0166..4340351dbde8cdfeecf751478257d862473b4907 100644
--- a/arch/arm/lib/crc32-glue.c
+++ b/arch/arm/lib/crc32-glue.c
@@ -59,14 +59,14 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
 }
 EXPORT_SYMBOL(crc32_le_arch);
 
-static u32 crc32c_le_scalar(u32 crc, const u8 *p, size_t len)
+static u32 crc32c_scalar(u32 crc, const u8 *p, size_t len)
 {
 	if (static_branch_likely(&have_crc32))
 		return crc32c_armv8_le(crc, p, len);
-	return crc32c_le_base(crc, p, len);
+	return crc32c_base(crc, p, len);
 }
 
-u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
 {
 	if (len >= PMULL_MIN_LEN + 15 &&
 	    static_branch_likely(&have_pmull) && crypto_simd_usable()) {
@@ -74,7 +74,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
 
 		/* align p to 16-byte boundary */
 		if (n) {
-			crc = crc32c_le_scalar(crc, p, n);
+			crc = crc32c_scalar(crc, p, n);
 			p += n;
 			len -= n;
 		}
@@ -85,9 +85,9 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
 		p += n;
 		len -= n;
 	}
-	return crc32c_le_scalar(crc, p, len);
+	return crc32c_scalar(crc, p, len);
 }
-EXPORT_SYMBOL(crc32c_le_arch);
+EXPORT_SYMBOL(crc32c_arch);
 
 u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
 {
diff --git a/arch/arm64/lib/crc-t10dif-glue.c b/arch/arm64/lib/crc-t10dif-glue.c
index dab7e379623298f9d82f8a0a1a36c52b7c01c91b..a007d0c5f3fed6e93c310162a1b4d4bd0be73d27 100644
--- a/arch/arm64/lib/crc-t10dif-glue.c
+++ b/arch/arm64/lib/crc-t10dif-glue.c
@@ -70,12 +70,6 @@ static void __exit crc_t10dif_arm64_exit(void)
 }
 module_exit(crc_t10dif_arm64_exit);
 
-bool crc_t10dif_is_optimized(void)
-{
-	return static_key_enabled(&have_asimd);
-}
-EXPORT_SYMBOL(crc_t10dif_is_optimized);
-
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_DESCRIPTION("CRC-T10DIF using arm64 NEON and Crypto Extensions");
 MODULE_LICENSE("GPL v2");
diff --git a/arch/arm64/lib/crc32-glue.c b/arch/arm64/lib/crc32-glue.c
index 15c4c9db573ec1545350ee8f908c956fefaa669f..ed3acd71178f8d8d5b02e69dd0d32627e8d521f4 100644
--- a/arch/arm64/lib/crc32-glue.c
+++ b/arch/arm64/lib/crc32-glue.c
@@ -22,7 +22,7 @@ asmlinkage u32 crc32_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
 asmlinkage u32 crc32c_le_arm64_4way(u32 crc, unsigned char const *p, size_t len);
 asmlinkage u32 crc32_be_arm64_4way(u32 crc, unsigned char const *p, size_t len);
 
-u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
 {
 	if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
 		return crc32_le_base(crc, p, len);
@@ -43,10 +43,10 @@ u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
 }
 EXPORT_SYMBOL(crc32_le_arch);
 
-u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
 {
 	if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
-		return crc32c_le_base(crc, p, len);
+		return crc32c_base(crc, p, len);
 
 	if (len >= min_len && cpu_have_named_feature(PMULL) && crypto_simd_usable()) {
 		kernel_neon_begin();
@@ -62,9 +62,9 @@ u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
 
 	return crc32c_le_arm64(crc, p, len);
 }
-EXPORT_SYMBOL(crc32c_le_arch);
+EXPORT_SYMBOL(crc32c_arch);
 
-u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
 {
 	if (!alternative_has_cap_likely(ARM64_HAS_CRC32))
 		return crc32_be_base(crc, p, len);
diff --git a/arch/loongarch/lib/crc32-loongarch.c b/arch/loongarch/lib/crc32-loongarch.c
index 8af8113ecd9d31218094d3b101058e62a8d50129..c44ee4f32557819cc788f07cf33094eb50eebae2 100644
--- a/arch/loongarch/lib/crc32-loongarch.c
+++ b/arch/loongarch/lib/crc32-loongarch.c
@@ -65,10 +65,10 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
 }
 EXPORT_SYMBOL(crc32_le_arch);
 
-u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
 {
 	if (!static_branch_likely(&have_crc32))
-		return crc32c_le_base(crc, p, len);
+		return crc32c_base(crc, p, len);
 
 	while (len >= sizeof(u64)) {
 		u64 value = get_unaligned_le64(p);
@@ -100,7 +100,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
 
 	return crc;
 }
-EXPORT_SYMBOL(crc32c_le_arch);
+EXPORT_SYMBOL(crc32c_arch);
 
 u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
 {
diff --git a/arch/mips/configs/bcm47xx_defconfig b/arch/mips/configs/bcm47xx_defconfig
index 6a68a96d13f809d8509cbd0bc3dc8844614f0d84..f56e8db5da9512661b45e6834800e36cecd51269 100644
--- a/arch/mips/configs/bcm47xx_defconfig
+++ b/arch/mips/configs/bcm47xx_defconfig
@@ -69,7 +69,6 @@ CONFIG_USB_HCD_BCMA=y
 CONFIG_USB_HCD_SSB=y
 CONFIG_LEDS_TRIGGER_TIMER=y
 CONFIG_LEDS_TRIGGER_DEFAULT_ON=y
-CONFIG_CRC32_SARWATE=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_INFO_REDUCED=y
diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig
index 6eff21ff15d54194bca2440c833a72b241da03f6..281dd7d0f8059b98fd77af734c1723b277e245b5 100644
--- a/arch/mips/configs/db1xxx_defconfig
+++ b/arch/mips/configs/db1xxx_defconfig
@@ -216,7 +216,6 @@ CONFIG_CRYPTO_USER=y
 CONFIG_CRYPTO_CRYPTD=y
 CONFIG_CRYPTO_USER_API_HASH=y
 CONFIG_CRYPTO_USER_API_SKCIPHER=y
-CONFIG_CRC32_SLICEBY4=y
 CONFIG_FONTS=y
 CONFIG_FONT_8x8=y
 CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/mips/configs/decstation_64_defconfig b/arch/mips/configs/decstation_64_defconfig
index da51b9731db0e80408538381aeac14ae35bb2744..9655567614aa28d80e553ff15404747dccf62f44 100644
--- a/arch/mips/configs/decstation_64_defconfig
+++ b/arch/mips/configs/decstation_64_defconfig
@@ -180,7 +180,6 @@ CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRCT10DIF=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
diff --git a/arch/mips/configs/decstation_defconfig b/arch/mips/configs/decstation_defconfig
index 424e3f011fc2a615bcecd64cf239a05eef91924a..1539fe8eb34d0cae1c71281cf3ea8b54804fe2f8 100644
--- a/arch/mips/configs/decstation_defconfig
+++ b/arch/mips/configs/decstation_defconfig
@@ -175,7 +175,6 @@ CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRCT10DIF=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
diff --git a/arch/mips/configs/decstation_r4k_defconfig b/arch/mips/configs/decstation_r4k_defconfig
index cfc8bf7917924a8cf55ac0152924095d8dc2e0c6..58c36720c94a4aef8b368535f3ada39b9a342379 100644
--- a/arch/mips/configs/decstation_r4k_defconfig
+++ b/arch/mips/configs/decstation_r4k_defconfig
@@ -175,7 +175,6 @@ CONFIG_CRYPTO_XTS=m
 CONFIG_CRYPTO_CMAC=m
 CONFIG_CRYPTO_XCBC=m
 CONFIG_CRYPTO_CRC32=m
-CONFIG_CRYPTO_CRCT10DIF=m
 CONFIG_CRYPTO_MD4=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
 CONFIG_CRYPTO_RMD160=m
diff --git a/arch/mips/configs/rt305x_defconfig b/arch/mips/configs/rt305x_defconfig
index 332f9094e8479bacdaf971b93cf7af9f39c0b3f5..8404e0a9d8b22fd4d2e1d4ed7ac445c42f09bbf2 100644
--- a/arch/mips/configs/rt305x_defconfig
+++ b/arch/mips/configs/rt305x_defconfig
@@ -129,7 +129,6 @@ CONFIG_SQUASHFS=y
 CONFIG_SQUASHFS_XZ=y
 CONFIG_CRYPTO_ARC4=m
 CONFIG_CRC_ITU_T=m
-CONFIG_CRC32_SARWATE=y
 # CONFIG_XZ_DEC_X86 is not set
 # CONFIG_XZ_DEC_POWERPC is not set
 # CONFIG_XZ_DEC_IA64 is not set
diff --git a/arch/mips/configs/xway_defconfig b/arch/mips/configs/xway_defconfig
index 08c0aa03fd5647778f6f57738413a1e6656874e4..7b91edfe3e075384e05539a114fba0761dbd8216 100644
--- a/arch/mips/configs/xway_defconfig
+++ b/arch/mips/configs/xway_defconfig
@@ -141,7 +141,6 @@ CONFIG_SQUASHFS=y
 CONFIG_SQUASHFS_XZ=y
 CONFIG_CRYPTO_ARC4=m
 CONFIG_CRC_ITU_T=m
-CONFIG_CRC32_SARWATE=y
 CONFIG_PRINTK_TIME=y
 CONFIG_STRIP_ASM_SYMS=y
 CONFIG_DEBUG_FS=y
diff --git a/arch/mips/lib/crc32-mips.c b/arch/mips/lib/crc32-mips.c
index 083e5d693a1698ffe01a55066be4969cf5e11e7b..676a4b3e290b926c110f760df639a5c870217407 100644
--- a/arch/mips/lib/crc32-mips.c
+++ b/arch/mips/lib/crc32-mips.c
@@ -16,15 +16,6 @@
 #include <asm/mipsregs.h>
 #include <linux/unaligned.h>
 
-enum crc_op_size {
-	b, h, w, d,
-};
-
-enum crc_type {
-	crc32,
-	crc32c,
-};
-
 #ifndef TOOLCHAIN_SUPPORTS_CRC
 #define _ASM_SET_CRC(OP, SZ, TYPE)					  \
 _ASM_MACRO_3R(OP, rt, rs, rt2,						  \
@@ -117,10 +108,10 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
 }
 EXPORT_SYMBOL(crc32_le_arch);
 
-u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
 {
 	if (!static_branch_likely(&have_crc32))
-		return crc32c_le_base(crc, p, len);
+		return crc32c_base(crc, p, len);
 
 	if (IS_ENABLED(CONFIG_64BIT)) {
 		for (; len >= sizeof(u64); p += sizeof(u64), len -= sizeof(u64)) {
@@ -158,7 +149,7 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
 	}
 	return crc;
 }
-EXPORT_SYMBOL(crc32c_le_arch);
+EXPORT_SYMBOL(crc32c_arch);
 
 u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
 {
diff --git a/arch/powerpc/configs/adder875_defconfig b/arch/powerpc/configs/adder875_defconfig
index 97f4d485173567bc5c03e899ce3feb6e9a0e9e70..3c6445c98a85ffc7494566e7ae50e5d0663cd03d 100644
--- a/arch/powerpc/configs/adder875_defconfig
+++ b/arch/powerpc/configs/adder875_defconfig
@@ -44,7 +44,6 @@ CONFIG_TMPFS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
-CONFIG_CRC32_SLICEBY4=y
 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_DEBUG_FS=y
 CONFIG_MAGIC_SYSRQ=y
diff --git a/arch/powerpc/configs/ep88xc_defconfig b/arch/powerpc/configs/ep88xc_defconfig
index 50cc59eb36cf11443fe20232b9d537b760916bda..354180ab94bcc98f313f63d4b8aa806e25783baa 100644
--- a/arch/powerpc/configs/ep88xc_defconfig
+++ b/arch/powerpc/configs/ep88xc_defconfig
@@ -47,7 +47,6 @@ CONFIG_TMPFS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
-CONFIG_CRC32_SLICEBY4=y
 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DETECT_HUNG_TASK=y
diff --git a/arch/powerpc/configs/mpc866_ads_defconfig b/arch/powerpc/configs/mpc866_ads_defconfig
index 6f449411abf7be116dc912aa4fd0f052532da9f5..a0d27c59ea788bd0e6ed711028bb2c13c1e59324 100644
--- a/arch/powerpc/configs/mpc866_ads_defconfig
+++ b/arch/powerpc/configs/mpc866_ads_defconfig
@@ -39,4 +39,3 @@ CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRC_CCITT=y
-CONFIG_CRC32_SLICEBY4=y
diff --git a/arch/powerpc/configs/mpc885_ads_defconfig b/arch/powerpc/configs/mpc885_ads_defconfig
index 129355f87f80fced631c98ffc11468f1b1f41d62..9bc2758a6a9a5092f00fe285aa00f3a6eac7b991 100644
--- a/arch/powerpc/configs/mpc885_ads_defconfig
+++ b/arch/powerpc/configs/mpc885_ads_defconfig
@@ -70,7 +70,6 @@ CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
 CONFIG_CRYPTO=y
 CONFIG_CRYPTO_DEV_TALITOS=y
-CONFIG_CRC32_SLICEBY4=y
 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_FS=y
diff --git a/arch/powerpc/configs/tqm8xx_defconfig b/arch/powerpc/configs/tqm8xx_defconfig
index 383c0966e92fd5e44eb0036a34fb2cf66d45e46c..425f10837a1850c6c86c88b6dab6ff6f30e48505 100644
--- a/arch/powerpc/configs/tqm8xx_defconfig
+++ b/arch/powerpc/configs/tqm8xx_defconfig
@@ -54,7 +54,6 @@ CONFIG_TMPFS=y
 CONFIG_CRAMFS=y
 CONFIG_NFS_FS=y
 CONFIG_ROOT_NFS=y
-CONFIG_CRC32_SLICEBY4=y
 CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DETECT_HUNG_TASK=y
diff --git a/arch/powerpc/lib/crc-t10dif-glue.c b/arch/powerpc/lib/crc-t10dif-glue.c
index 730850dbc51d7aaa094a1ff4c4281da5933d81b5..f411b0120cc55a5016783a3b962a275968ba8130 100644
--- a/arch/powerpc/lib/crc-t10dif-glue.c
+++ b/arch/powerpc/lib/crc-t10dif-glue.c
@@ -78,12 +78,6 @@ static void __exit crc_t10dif_powerpc_exit(void)
 }
 module_exit(crc_t10dif_powerpc_exit);
 
-bool crc_t10dif_is_optimized(void)
-{
-	return static_key_enabled(&have_vec_crypto);
-}
-EXPORT_SYMBOL(crc_t10dif_is_optimized);
-
 MODULE_AUTHOR("Daniel Axtens <dja@axtens.net>");
 MODULE_DESCRIPTION("CRCT10DIF using vector polynomial multiply-sum instructions");
 MODULE_LICENSE("GPL");
diff --git a/arch/powerpc/lib/crc32-glue.c b/arch/powerpc/lib/crc32-glue.c
index 79cc954f499f1a4d14a2c687ea71ac6e798fc9f5..dbd10f339183dea8b9df84da45eb1ddf7da65bb4 100644
--- a/arch/powerpc/lib/crc32-glue.c
+++ b/arch/powerpc/lib/crc32-glue.c
@@ -23,18 +23,18 @@ u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
 }
 EXPORT_SYMBOL(crc32_le_arch);
 
-u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
 {
 	unsigned int prealign;
 	unsigned int tail;
 
 	if (len < (VECTOR_BREAKPOINT + VMX_ALIGN) ||
 	    !static_branch_likely(&have_vec_crypto) || !crypto_simd_usable())
-		return crc32c_le_base(crc, p, len);
+		return crc32c_base(crc, p, len);
 
 	if ((unsigned long)p & VMX_ALIGN_MASK) {
 		prealign = VMX_ALIGN - ((unsigned long)p & VMX_ALIGN_MASK);
-		crc = crc32c_le_base(crc, p, prealign);
+		crc = crc32c_base(crc, p, prealign);
 		len -= prealign;
 		p += prealign;
 	}
@@ -52,12 +52,12 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
 	tail = len & VMX_ALIGN_MASK;
 	if (tail) {
 		p += len & ~VMX_ALIGN_MASK;
-		crc = crc32c_le_base(crc, p, tail);
+		crc = crc32c_base(crc, p, tail);
 	}
 
 	return crc;
 }
-EXPORT_SYMBOL(crc32c_le_arch);
+EXPORT_SYMBOL(crc32c_arch);
 
 u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
 {
diff --git a/arch/riscv/lib/crc32-riscv.c b/arch/riscv/lib/crc32-riscv.c
index 53d56ab422c722254860850f5cd2226668f50f2e..b5cb752847c40d5a5c8c66e1a0a95e145bcb4dfb 100644
--- a/arch/riscv/lib/crc32-riscv.c
+++ b/arch/riscv/lib/crc32-riscv.c
@@ -175,10 +175,9 @@ static inline u32 crc32_le_unaligned(u32 crc, unsigned char const *p,
 	return crc;
 }
 
-static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
-					  size_t len, u32 poly,
-					  unsigned long poly_qt,
-					  fallback crc_fb)
+static inline u32 crc32_le_generic(u32 crc, unsigned char const *p, size_t len,
+				   u32 poly, unsigned long poly_qt,
+				   fallback crc_fb)
 {
 	size_t offset, head_len, tail_len;
 	unsigned long const *p_ul;
@@ -218,19 +217,19 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p,
 	return crc_fb(crc, p, len);
 }
 
-u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
 {
 	return crc32_le_generic(crc, p, len, CRC32_POLY_LE, CRC32_POLY_QT_LE,
 				crc32_le_base);
 }
 EXPORT_SYMBOL(crc32_le_arch);
 
-u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
 {
 	return crc32_le_generic(crc, p, len, CRC32C_POLY_LE,
-				CRC32C_POLY_QT_LE, crc32c_le_base);
+				CRC32C_POLY_QT_LE, crc32c_base);
 }
-EXPORT_SYMBOL(crc32c_le_arch);
+EXPORT_SYMBOL(crc32c_arch);
 
 static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
 				     size_t len)
@@ -256,7 +255,7 @@ static inline u32 crc32_be_unaligned(u32 crc, unsigned char const *p,
 	return crc;
 }
 
-u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
 {
 	size_t offset, head_len, tail_len;
 	unsigned long const *p_ul;
diff --git a/arch/s390/lib/crc32-glue.c b/arch/s390/lib/crc32-glue.c
index 137080e61f90172a48681afbad1e7d8c2fbe9f53..124214a273401037afbc9daa4e06d96ac5ab48e0 100644
--- a/arch/s390/lib/crc32-glue.c
+++ b/arch/s390/lib/crc32-glue.c
@@ -62,7 +62,7 @@ static DEFINE_STATIC_KEY_FALSE(have_vxrs);
 
 DEFINE_CRC32_VX(crc32_le_arch, crc32_le_vgfm_16, crc32_le_base)
 DEFINE_CRC32_VX(crc32_be_arch, crc32_be_vgfm_16, crc32_be_base)
-DEFINE_CRC32_VX(crc32c_le_arch, crc32c_le_vgfm_16, crc32c_le_base)
+DEFINE_CRC32_VX(crc32c_arch, crc32c_le_vgfm_16, crc32c_base)
 
 static int __init crc32_s390_init(void)
 {
diff --git a/arch/sparc/lib/crc32_glue.c b/arch/sparc/lib/crc32_glue.c
index 41076d2b1fd2d3b3df0053be1dbe4bfd0b260f83..a70752c729cf6983315c44682acf51fb5e9efcbe 100644
--- a/arch/sparc/lib/crc32_glue.c
+++ b/arch/sparc/lib/crc32_glue.c
@@ -27,17 +27,17 @@ EXPORT_SYMBOL(crc32_le_arch);
 
 void crc32c_sparc64(u32 *crcp, const u64 *data, size_t len);
 
-u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len)
+u32 crc32c_arch(u32 crc, const u8 *data, size_t len)
 {
 	size_t n = -(uintptr_t)data & 7;
 
 	if (!static_branch_likely(&have_crc32c_opcode))
-		return crc32c_le_base(crc, data, len);
+		return crc32c_base(crc, data, len);
 
 	if (n) {
 		/* Data isn't 8-byte aligned.  Align it. */
 		n = min(n, len);
-		crc = crc32c_le_base(crc, data, n);
+		crc = crc32c_base(crc, data, n);
 		data += n;
 		len -= n;
 	}
@@ -48,10 +48,10 @@ u32 crc32c_le_arch(u32 crc, const u8 *data, size_t len)
 		len -= n;
 	}
 	if (len)
-		crc = crc32c_le_base(crc, data, len);
+		crc = crc32c_base(crc, data, len);
 	return crc;
 }
-EXPORT_SYMBOL(crc32c_le_arch);
+EXPORT_SYMBOL(crc32c_arch);
 
 u32 crc32_be_arch(u32 crc, const u8 *data, size_t len)
 {
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 01a3947a2787f86c35f77c49366044a647b05b01..847a0c499db23c600fd9558a55f7603c13d66bfc 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -78,7 +78,8 @@ config X86
 	select ARCH_HAS_CPU_FINALIZE_INIT
 	select ARCH_HAS_CPU_PASID		if IOMMU_SVA
 	select ARCH_HAS_CRC32
-	select ARCH_HAS_CRC_T10DIF		if X86_64
+	select ARCH_HAS_CRC64			if X86_64
+	select ARCH_HAS_CRC_T10DIF
 	select ARCH_HAS_CURRENT_STACK_POINTER
 	select ARCH_HAS_DEBUG_VIRTUAL
 	select ARCH_HAS_DEBUG_VM_PGTABLE	if !X86_PAE
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index 82225fe50ec12d7b6d00a48a0f70484f2f915424..b59480ea97dc95621b271b3c2654c0bcb457fa68 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1551,26 +1551,6 @@ DEFINE_GCM_ALGS(vaes_avx10_512, FLAG_AVX10_512,
 		AES_GCM_KEY_AVX10_SIZE, 800);
 #endif /* CONFIG_AS_VAES && CONFIG_AS_VPCLMULQDQ */
 
-/*
- * This is a list of CPU models that are known to suffer from downclocking when
- * zmm registers (512-bit vectors) are used.  On these CPUs, the AES mode
- * implementations with zmm registers won't be used by default.  Implementations
- * with ymm registers (256-bit vectors) will be used by default instead.
- */
-static const struct x86_cpu_id zmm_exclusion_list[] = {
-	X86_MATCH_VFM(INTEL_SKYLAKE_X,		0),
-	X86_MATCH_VFM(INTEL_ICELAKE_X,		0),
-	X86_MATCH_VFM(INTEL_ICELAKE_D,		0),
-	X86_MATCH_VFM(INTEL_ICELAKE,		0),
-	X86_MATCH_VFM(INTEL_ICELAKE_L,		0),
-	X86_MATCH_VFM(INTEL_ICELAKE_NNPI,	0),
-	X86_MATCH_VFM(INTEL_TIGERLAKE_L,	0),
-	X86_MATCH_VFM(INTEL_TIGERLAKE,		0),
-	/* Allow Rocket Lake and later, and Sapphire Rapids and later. */
-	/* Also allow AMD CPUs (starting with Zen 4, the first with AVX-512). */
-	{},
-};
-
 static int __init register_avx_algs(void)
 {
 	int err;
@@ -1624,7 +1604,7 @@ static int __init register_avx_algs(void)
 	if (err)
 		return err;
 
-	if (x86_match_cpu(zmm_exclusion_list)) {
+	if (boot_cpu_has(X86_FEATURE_PREFER_YMM)) {
 		int i;
 
 		for (i = 0; i < ARRAY_SIZE(skcipher_algs_vaes_avx10_512); i++)
diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h
index 911ef67b6731408952fa538843f3f9c1bab1e48c..76dd9441c8ff60440d0cc0af71b782707e4a33b8 100644
--- a/arch/x86/include/asm/cpufeatures.h
+++ b/arch/x86/include/asm/cpufeatures.h
@@ -487,6 +487,7 @@
 #define X86_FEATURE_AMD_FAST_CPPC	(21*32 + 5) /* Fast CPPC */
 #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */
 #define X86_FEATURE_AMD_WORKLOAD_CLASS	(21*32 + 7) /* Workload Classification */
+#define X86_FEATURE_PREFER_YMM		(21*32 + 8) /* Avoid ZMM registers due to downclocking */
 
 /*
  * BUG word(s)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 045b439c653a5b17887ad057c73ebc38b7b1f94b..4856d99bbbac178f3bcf7cc8fff5d4a435673021 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -521,6 +521,25 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
 	wrmsrl(MSR_MISC_FEATURES_ENABLES, msr);
 }
 
+/*
+ * This is a list of Intel CPUs that are known to suffer from downclocking when
+ * ZMM registers (512-bit vectors) are used.  On these CPUs, when the kernel
+ * executes SIMD-optimized code such as cryptography functions or CRCs, it
+ * should prefer 256-bit (YMM) code to 512-bit (ZMM) code.
+ */
+static const struct x86_cpu_id zmm_exclusion_list[] = {
+	X86_MATCH_VFM(INTEL_SKYLAKE_X,		0),
+	X86_MATCH_VFM(INTEL_ICELAKE_X,		0),
+	X86_MATCH_VFM(INTEL_ICELAKE_D,		0),
+	X86_MATCH_VFM(INTEL_ICELAKE,		0),
+	X86_MATCH_VFM(INTEL_ICELAKE_L,		0),
+	X86_MATCH_VFM(INTEL_ICELAKE_NNPI,	0),
+	X86_MATCH_VFM(INTEL_TIGERLAKE_L,	0),
+	X86_MATCH_VFM(INTEL_TIGERLAKE,		0),
+	/* Allow Rocket Lake and later, and Sapphire Rapids and later. */
+	{},
+};
+
 static void init_intel(struct cpuinfo_x86 *c)
 {
 	early_init_intel(c);
@@ -601,6 +620,9 @@ static void init_intel(struct cpuinfo_x86 *c)
 	}
 #endif
 
+	if (x86_match_cpu(zmm_exclusion_list))
+		set_cpu_cap(c, X86_FEATURE_PREFER_YMM);
+
 	/* Work around errata */
 	srat_detect_node(c);
 
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index f453507649d4854cc5c721d7bd04fbbf660e1eab..00df270374970026ed5bfad38425028547716eef 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -42,8 +42,11 @@ obj-$(CONFIG_CRC32_ARCH) += crc32-x86.o
 crc32-x86-y := crc32-glue.o crc32-pclmul.o
 crc32-x86-$(CONFIG_64BIT) += crc32c-3way.o
 
+obj-$(CONFIG_CRC64_ARCH) += crc64-x86.o
+crc64-x86-y := crc64-glue.o crc64-pclmul.o
+
 obj-$(CONFIG_CRC_T10DIF_ARCH) += crc-t10dif-x86.o
-crc-t10dif-x86-y := crc-t10dif-glue.o crct10dif-pcl-asm_64.o
+crc-t10dif-x86-y := crc-t10dif-glue.o crc16-msb-pclmul.o
 
 obj-y += msr.o msr-reg.o msr-reg-export.o hweight.o
 obj-y += iomem.o
diff --git a/arch/x86/lib/crc-pclmul-consts.h b/arch/x86/lib/crc-pclmul-consts.h
new file mode 100644
index 0000000000000000000000000000000000000000..fcc63c0643330f5acb4f99a892cffa131075b06d
--- /dev/null
+++ b/arch/x86/lib/crc-pclmul-consts.h
@@ -0,0 +1,195 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * CRC constants generated by:
+ *
+ *	./scripts/gen-crc-consts.py x86_pclmul crc16_msb_0x8bb7,crc32_lsb_0xedb88320,crc64_msb_0x42f0e1eba9ea3693,crc64_lsb_0x9a6c9329ac4bc9b5
+ *
+ * Do not edit manually.
+ */
+
+/*
+ * CRC folding constants generated for most-significant-bit-first CRC-16 using
+ * G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
+ */
+static const struct {
+	u8 bswap_mask[16];
+	u64 fold_across_2048_bits_consts[2];
+	u64 fold_across_1024_bits_consts[2];
+	u64 fold_across_512_bits_consts[2];
+	u64 fold_across_256_bits_consts[2];
+	u64 fold_across_128_bits_consts[2];
+	u8 shuf_table[48];
+	u64 barrett_reduction_consts[2];
+} crc16_msb_0x8bb7_consts ____cacheline_aligned __maybe_unused = {
+	.bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
+	.fold_across_2048_bits_consts = {
+		0xdccf000000000000,	/* LO64_TERMS: (x^2000 mod G) * x^48 */
+		0x4b0b000000000000,	/* HI64_TERMS: (x^2064 mod G) * x^48 */
+	},
+	.fold_across_1024_bits_consts = {
+		0x9d9d000000000000,	/* LO64_TERMS: (x^976 mod G) * x^48 */
+		0x7cf5000000000000,	/* HI64_TERMS: (x^1040 mod G) * x^48 */
+	},
+	.fold_across_512_bits_consts = {
+		0x044c000000000000,	/* LO64_TERMS: (x^464 mod G) * x^48 */
+		0xe658000000000000,	/* HI64_TERMS: (x^528 mod G) * x^48 */
+	},
+	.fold_across_256_bits_consts = {
+		0x6ee3000000000000,	/* LO64_TERMS: (x^208 mod G) * x^48 */
+		0xe7b5000000000000,	/* HI64_TERMS: (x^272 mod G) * x^48 */
+	},
+	.fold_across_128_bits_consts = {
+		0x2d56000000000000,	/* LO64_TERMS: (x^80 mod G) * x^48 */
+		0x06df000000000000,	/* HI64_TERMS: (x^144 mod G) * x^48 */
+	},
+	.shuf_table = {
+		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	},
+	.barrett_reduction_consts = {
+		0x8bb7000000000000,	/* LO64_TERMS: (G - x^16) * x^48 */
+		0xf65a57f81d33a48a,	/* HI64_TERMS: (floor(x^79 / G) * x) - x^64 */
+	},
+};
+
+/*
+ * CRC folding constants generated for least-significant-bit-first CRC-32 using
+ * G(x) = x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 +
+ *        x^5 + x^4 + x^2 + x^1 + x^0
+ */
+static const struct {
+	u64 fold_across_2048_bits_consts[2];
+	u64 fold_across_1024_bits_consts[2];
+	u64 fold_across_512_bits_consts[2];
+	u64 fold_across_256_bits_consts[2];
+	u64 fold_across_128_bits_consts[2];
+	u8 shuf_table[48];
+	u64 barrett_reduction_consts[2];
+} crc32_lsb_0xedb88320_consts ____cacheline_aligned __maybe_unused = {
+	.fold_across_2048_bits_consts = {
+		0x00000000ce3371cb,	/* HI64_TERMS: (x^2079 mod G) * x^32 */
+		0x00000000e95c1271,	/* LO64_TERMS: (x^2015 mod G) * x^32 */
+	},
+	.fold_across_1024_bits_consts = {
+		0x0000000033fff533,	/* HI64_TERMS: (x^1055 mod G) * x^32 */
+		0x00000000910eeec1,	/* LO64_TERMS: (x^991 mod G) * x^32 */
+	},
+	.fold_across_512_bits_consts = {
+		0x000000008f352d95,	/* HI64_TERMS: (x^543 mod G) * x^32 */
+		0x000000001d9513d7,	/* LO64_TERMS: (x^479 mod G) * x^32 */
+	},
+	.fold_across_256_bits_consts = {
+		0x00000000f1da05aa,	/* HI64_TERMS: (x^287 mod G) * x^32 */
+		0x0000000081256527,	/* LO64_TERMS: (x^223 mod G) * x^32 */
+	},
+	.fold_across_128_bits_consts = {
+		0x00000000ae689191,	/* HI64_TERMS: (x^159 mod G) * x^32 */
+		0x00000000ccaa009e,	/* LO64_TERMS: (x^95 mod G) * x^32 */
+	},
+	.shuf_table = {
+		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	},
+	.barrett_reduction_consts = {
+		0xb4e5b025f7011641,	/* HI64_TERMS: floor(x^95 / G) */
+		0x00000001db710640,	/* LO64_TERMS: (G - x^32) * x^31 */
+	},
+};
+
+/*
+ * CRC folding constants generated for most-significant-bit-first CRC-64 using
+ * G(x) = x^64 + x^62 + x^57 + x^55 + x^54 + x^53 + x^52 + x^47 + x^46 + x^45 +
+ *        x^40 + x^39 + x^38 + x^37 + x^35 + x^33 + x^32 + x^31 + x^29 + x^27 +
+ *        x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
+ *        x^7 + x^4 + x^1 + x^0
+ */
+static const struct {
+	u8 bswap_mask[16];
+	u64 fold_across_2048_bits_consts[2];
+	u64 fold_across_1024_bits_consts[2];
+	u64 fold_across_512_bits_consts[2];
+	u64 fold_across_256_bits_consts[2];
+	u64 fold_across_128_bits_consts[2];
+	u8 shuf_table[48];
+	u64 barrett_reduction_consts[2];
+} crc64_msb_0x42f0e1eba9ea3693_consts ____cacheline_aligned __maybe_unused = {
+	.bswap_mask = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0},
+	.fold_across_2048_bits_consts = {
+		0x7f52691a60ddc70d,	/* LO64_TERMS: (x^2048 mod G) * x^0 */
+		0x7036b0389f6a0c82,	/* HI64_TERMS: (x^2112 mod G) * x^0 */
+	},
+	.fold_across_1024_bits_consts = {
+		0x05cf79dea9ac37d6,	/* LO64_TERMS: (x^1024 mod G) * x^0 */
+		0x001067e571d7d5c2,	/* HI64_TERMS: (x^1088 mod G) * x^0 */
+	},
+	.fold_across_512_bits_consts = {
+		0x5f6843ca540df020,	/* LO64_TERMS: (x^512 mod G) * x^0 */
+		0xddf4b6981205b83f,	/* HI64_TERMS: (x^576 mod G) * x^0 */
+	},
+	.fold_across_256_bits_consts = {
+		0x571bee0a227ef92b,	/* LO64_TERMS: (x^256 mod G) * x^0 */
+		0x44bef2a201b5200c,	/* HI64_TERMS: (x^320 mod G) * x^0 */
+	},
+	.fold_across_128_bits_consts = {
+		0x05f5c3c7eb52fab6,	/* LO64_TERMS: (x^128 mod G) * x^0 */
+		0x4eb938a7d257740e,	/* HI64_TERMS: (x^192 mod G) * x^0 */
+	},
+	.shuf_table = {
+		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	},
+	.barrett_reduction_consts = {
+		0x42f0e1eba9ea3693,	/* LO64_TERMS: (G - x^64) * x^0 */
+		0x578d29d06cc4f872,	/* HI64_TERMS: (floor(x^127 / G) * x) - x^64 */
+	},
+};
+
+/*
+ * CRC folding constants generated for least-significant-bit-first CRC-64 using
+ * G(x) = x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 +
+ *        x^47 + x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 +
+ *        x^26 + x^23 + x^22 + x^19 + x^16 + x^13 + x^12 + x^10 + x^9 + x^6 +
+ *        x^4 + x^3 + x^0
+ */
+static const struct {
+	u64 fold_across_2048_bits_consts[2];
+	u64 fold_across_1024_bits_consts[2];
+	u64 fold_across_512_bits_consts[2];
+	u64 fold_across_256_bits_consts[2];
+	u64 fold_across_128_bits_consts[2];
+	u8 shuf_table[48];
+	u64 barrett_reduction_consts[2];
+} crc64_lsb_0x9a6c9329ac4bc9b5_consts ____cacheline_aligned __maybe_unused = {
+	.fold_across_2048_bits_consts = {
+		0x37ccd3e14069cabc,	/* HI64_TERMS: (x^2111 mod G) * x^0 */
+		0xa043808c0f782663,	/* LO64_TERMS: (x^2047 mod G) * x^0 */
+	},
+	.fold_across_1024_bits_consts = {
+		0xa1ca681e733f9c40,	/* HI64_TERMS: (x^1087 mod G) * x^0 */
+		0x5f852fb61e8d92dc,	/* LO64_TERMS: (x^1023 mod G) * x^0 */
+	},
+	.fold_across_512_bits_consts = {
+		0x0c32cdb31e18a84a,	/* HI64_TERMS: (x^575 mod G) * x^0 */
+		0x62242240ace5045a,	/* LO64_TERMS: (x^511 mod G) * x^0 */
+	},
+	.fold_across_256_bits_consts = {
+		0xb0bc2e589204f500,	/* HI64_TERMS: (x^319 mod G) * x^0 */
+		0xe1e0bb9d45d7a44c,	/* LO64_TERMS: (x^255 mod G) * x^0 */
+	},
+	.fold_across_128_bits_consts = {
+		0xeadc41fd2ba3d420,	/* HI64_TERMS: (x^191 mod G) * x^0 */
+		0x21e9761e252621ac,	/* LO64_TERMS: (x^127 mod G) * x^0 */
+	},
+	.shuf_table = {
+		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+		 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15,
+		-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+	},
+	.barrett_reduction_consts = {
+		0x27ecfa329aef9f77,	/* HI64_TERMS: floor(x^127 / G) */
+		0x34d926535897936a,	/* LO64_TERMS: (G - x^64 - x^0) / x */
+	},
+};
diff --git a/arch/x86/lib/crc-pclmul-template.S b/arch/x86/lib/crc-pclmul-template.S
new file mode 100644
index 0000000000000000000000000000000000000000..a19b730b642d36d5d0394751e218714410c12bec
--- /dev/null
+++ b/arch/x86/lib/crc-pclmul-template.S
@@ -0,0 +1,589 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+//
+// Template to generate [V]PCLMULQDQ-based CRC functions for x86
+//
+// Copyright 2025 Google LLC
+//
+// Author: Eric Biggers <ebiggers@google.com>
+
+#include <linux/linkage.h>
+#include <linux/objtool.h>
+
+// Offsets within the generated constants table
+.set OFFSETOF_BSWAP_MASK,			-5*16	// msb-first CRCs only
+.set OFFSETOF_FOLD_ACROSS_2048_BITS_CONSTS,	-4*16	// must precede next
+.set OFFSETOF_FOLD_ACROSS_1024_BITS_CONSTS,	-3*16	// must precede next
+.set OFFSETOF_FOLD_ACROSS_512_BITS_CONSTS,	-2*16	// must precede next
+.set OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS,	-1*16	// must precede next
+.set OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS,	0*16	// must be 0
+.set OFFSETOF_SHUF_TABLE,			1*16
+.set OFFSETOF_BARRETT_REDUCTION_CONSTS,		4*16
+
+// Emit a VEX (or EVEX) coded instruction if allowed, or emulate it using the
+// corresponding non-VEX instruction plus any needed moves.  The supported
+// instruction formats are:
+//
+//     - Two-arg [src, dst], where the non-VEX format is the same.
+//     - Three-arg [src1, src2, dst] where the non-VEX format is
+//	 [src1, src2_and_dst].  If src2 != dst, then src1 must != dst too.
+//
+// \insn gives the instruction without a "v" prefix and including any immediate
+// argument if needed to make the instruction follow one of the above formats.
+// If \unaligned_mem_tmp is given, then the emitted non-VEX code moves \arg1 to
+// it first; this is needed when \arg1 is an unaligned mem operand.
+.macro	_cond_vex	insn:req, arg1:req, arg2:req, arg3, unaligned_mem_tmp
+.if AVX_LEVEL == 0
+  // VEX not allowed.  Emulate it.
+  .ifnb \arg3 // Three-arg [src1, src2, dst]
+    .ifc "\arg2", "\arg3" // src2 == dst?
+      .ifnb \unaligned_mem_tmp
+	movdqu		\arg1, \unaligned_mem_tmp
+	\insn		\unaligned_mem_tmp, \arg3
+      .else
+	\insn		\arg1, \arg3
+      .endif
+    .else // src2 != dst
+      .ifc "\arg1", "\arg3"
+	.error "Can't have src1 == dst when src2 != dst"
+      .endif
+      .ifnb \unaligned_mem_tmp
+	movdqu		\arg1, \unaligned_mem_tmp
+	movdqa		\arg2, \arg3
+	\insn		\unaligned_mem_tmp, \arg3
+      .else
+	movdqa		\arg2, \arg3
+	\insn		\arg1, \arg3
+      .endif
+    .endif
+  .else // Two-arg [src, dst]
+    .ifnb \unaligned_mem_tmp
+	movdqu		\arg1, \unaligned_mem_tmp
+	\insn		\unaligned_mem_tmp, \arg2
+    .else
+	\insn		\arg1, \arg2
+    .endif
+  .endif
+.else
+  // VEX is allowed.  Emit the desired instruction directly.
+  .ifnb \arg3
+	v\insn		\arg1, \arg2, \arg3
+  .else
+	v\insn		\arg1, \arg2
+  .endif
+.endif
+.endm
+
+// Broadcast an aligned 128-bit mem operand to all 128-bit lanes of a vector
+// register of length VL.
+.macro	_vbroadcast	src, dst
+.if VL == 16
+	_cond_vex movdqa,	\src, \dst
+.elseif VL == 32
+	vbroadcasti128		\src, \dst
+.else
+	vbroadcasti32x4		\src, \dst
+.endif
+.endm
+
+// Load \vl bytes from the unaligned mem operand \src into \dst, and if the CRC
+// is msb-first use \bswap_mask to reflect the bytes within each 128-bit lane.
+.macro	_load_data	vl, src, bswap_mask, dst
+.if \vl < 64
+	_cond_vex movdqu,	"\src", \dst
+.else
+	vmovdqu8		\src, \dst
+.endif
+.if !LSB_CRC
+	_cond_vex pshufb,	\bswap_mask, \dst, \dst
+.endif
+.endm
+
+.macro	_prepare_v0	vl, v0, v1, bswap_mask
+.if LSB_CRC
+  .if \vl < 64
+	_cond_vex pxor,		(BUF), \v0, \v0, unaligned_mem_tmp=\v1
+  .else
+	vpxorq			(BUF), \v0, \v0
+  .endif
+.else
+	_load_data		\vl, (BUF), \bswap_mask, \v1
+  .if \vl < 64
+	_cond_vex pxor,		\v1, \v0, \v0
+  .else
+	vpxorq			\v1, \v0, \v0
+  .endif
+.endif
+.endm
+
+// The x^0..x^63 terms, i.e. poly128 mod x^64, i.e. the physically low qword for
+// msb-first order or the physically high qword for lsb-first order
+#define LO64_TERMS 0
+
+// The x^64..x^127 terms, i.e. floor(poly128 / x^64), i.e. the physically high
+// qword for msb-first order or the physically low qword for lsb-first order
+#define HI64_TERMS 1
+
+// Multiply the given \src1_terms of each 128-bit lane of \src1 by the given
+// \src2_terms of each 128-bit lane of \src2, and write the result(s) to \dst.
+.macro	_pclmulqdq	src1, src1_terms, src2, src2_terms, dst
+	_cond_vex "pclmulqdq $((\src1_terms ^ LSB_CRC) << 4) ^ (\src2_terms ^ LSB_CRC),", \
+		  \src1, \src2, \dst
+.endm
+
+// Fold \acc into \data and store the result back into \acc.  \data can be an
+// unaligned mem operand if using VEX is allowed and the CRC is lsb-first so no
+// byte-reflection is needed; otherwise it must be a vector register.  \consts
+// is a vector register containing the needed fold constants, and \tmp is a
+// temporary vector register.  All arguments must be the same length.
+.macro	_fold_vec	acc, data, consts, tmp
+	_pclmulqdq	\consts, HI64_TERMS, \acc, HI64_TERMS, \tmp
+	_pclmulqdq	\consts, LO64_TERMS, \acc, LO64_TERMS, \acc
+.if AVX_LEVEL < 10
+	_cond_vex pxor,	\data, \tmp, \tmp
+	_cond_vex pxor,	\tmp, \acc, \acc
+.else
+	vpternlogq	$0x96, \data, \tmp, \acc
+.endif
+.endm
+
+// Fold \acc into \data and store the result back into \acc.  \data is an
+// unaligned mem operand, \consts is a vector register containing the needed
+// fold constants, \bswap_mask is a vector register containing the
+// byte-reflection table if the CRC is msb-first, and \tmp1 and \tmp2 are
+// temporary vector registers.  All arguments must have length \vl.
+.macro	_fold_vec_mem	vl, acc, data, consts, bswap_mask, tmp1, tmp2
+.if AVX_LEVEL == 0 || !LSB_CRC
+	_load_data	\vl, \data, \bswap_mask, \tmp1
+	_fold_vec	\acc, \tmp1, \consts, \tmp2
+.else
+	_fold_vec	\acc, \data, \consts, \tmp1
+.endif
+.endm
+
+// Load the constants for folding across 2**i vectors of length VL at a time
+// into all 128-bit lanes of the vector register CONSTS.
+.macro	_load_vec_folding_consts	i
+	_vbroadcast OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS+(4-LOG2_VL-\i)*16(CONSTS_PTR), \
+		    CONSTS
+.endm
+
+// Given vector registers \v0 and \v1 of length \vl, fold \v0 into \v1 and store
+// the result back into \v0.  If the remaining length mod \vl is nonzero, also
+// fold \vl data bytes from BUF.  For both operations the fold distance is \vl.
+// \consts must be a register of length \vl containing the fold constants.
+.macro	_fold_vec_final	vl, v0, v1, consts, bswap_mask, tmp1, tmp2
+	_fold_vec	\v0, \v1, \consts, \tmp1
+	test		$\vl, LEN8
+	jz		.Lfold_vec_final_done\@
+	_fold_vec_mem	\vl, \v0, (BUF), \consts, \bswap_mask, \tmp1, \tmp2
+	add		$\vl, BUF
+.Lfold_vec_final_done\@:
+.endm
+
+// This macro generates the body of a CRC function with the following prototype:
+//
+// crc_t crc_func(crc_t crc, const u8 *buf, size_t len, const void *consts);
+//
+// |crc| is the initial CRC, and crc_t is a data type wide enough to hold it.
+// |buf| is the data to checksum.  |len| is the data length in bytes, which must
+// be at least 16.  |consts| is a pointer to the fold_across_128_bits_consts
+// field of the constants struct that was generated for the chosen CRC variant.
+//
+// Moving onto the macro parameters, \n is the number of bits in the CRC, e.g.
+// 32 for a CRC-32.  Currently the supported values are 8, 16, 32, and 64.  If
+// the file is compiled in i386 mode, then the maximum supported value is 32.
+//
+// \lsb_crc is 1 if the CRC processes the least significant bit of each byte
+// first, i.e. maps bit0 to x^7, bit1 to x^6, ..., bit7 to x^0.  \lsb_crc is 0
+// if the CRC processes the most significant bit of each byte first, i.e. maps
+// bit0 to x^0, bit1 to x^1, bit7 to x^7.
+//
+// \vl is the maximum length of vector register to use in bytes: 16, 32, or 64.
+//
+// \avx_level is the level of AVX support to use: 0 for SSE only, 2 for AVX2, or
+// 10 for AVX10 or AVX512.
+//
+// If \vl == 16 && \avx_level == 0, the generated code requires:
+// PCLMULQDQ && SSE4.1.  (Note: all known CPUs with PCLMULQDQ also have SSE4.1.)
+//
+// If \vl == 32 && \avx_level == 2, the generated code requires:
+// VPCLMULQDQ && AVX2.
+//
+// If \vl == 32 && \avx_level == 10, the generated code requires:
+// VPCLMULQDQ && (AVX10/256 || (AVX512BW && AVX512VL))
+//
+// If \vl == 64 && \avx_level == 10, the generated code requires:
+// VPCLMULQDQ && (AVX10/512 || (AVX512BW && AVX512VL))
+//
+// Other \vl and \avx_level combinations are either not supported or not useful.
+.macro	_crc_pclmul	n, lsb_crc, vl, avx_level
+	.set	LSB_CRC,	\lsb_crc
+	.set	VL,		\vl
+	.set	AVX_LEVEL,	\avx_level
+
+	// Define aliases for the xmm, ymm, or zmm registers according to VL.
+.irp i, 0,1,2,3,4,5,6,7
+  .if VL == 16
+	.set	V\i,		%xmm\i
+	.set	LOG2_VL,	4
+  .elseif VL == 32
+	.set	V\i,		%ymm\i
+	.set	LOG2_VL,	5
+  .elseif VL == 64
+	.set	V\i,		%zmm\i
+	.set	LOG2_VL,	6
+  .else
+	.error "Unsupported vector length"
+  .endif
+.endr
+	// Define aliases for the function parameters.
+	// Note: when crc_t is shorter than u32, zero-extension to 32 bits is
+	// guaranteed by the ABI.  Zero-extension to 64 bits is *not* guaranteed
+	// when crc_t is shorter than u64.
+#ifdef __x86_64__
+.if \n <= 32
+	.set	CRC,		%edi
+.else
+	.set	CRC,		%rdi
+.endif
+	.set	BUF,		%rsi
+	.set	LEN,		%rdx
+	.set	LEN32,		%edx
+	.set	LEN8,		%dl
+	.set	CONSTS_PTR,	%rcx
+#else
+	// 32-bit support, assuming -mregparm=3 and not including support for
+	// CRC-64 (which would use both eax and edx to pass the crc parameter).
+	.set	CRC,		%eax
+	.set	BUF,		%edx
+	.set	LEN,		%ecx
+	.set	LEN32,		%ecx
+	.set	LEN8,		%cl
+	.set	CONSTS_PTR,	%ebx	// Passed on stack
+#endif
+
+	// Define aliases for some local variables.  V0-V5 are used without
+	// aliases (for accumulators, data, temporary values, etc).  Staying
+	// within the first 8 vector registers keeps the code 32-bit SSE
+	// compatible and reduces the size of 64-bit SSE code slightly.
+	.set	BSWAP_MASK,	V6
+	.set	BSWAP_MASK_YMM,	%ymm6
+	.set	BSWAP_MASK_XMM,	%xmm6
+	.set	CONSTS,		V7
+	.set	CONSTS_YMM,	%ymm7
+	.set	CONSTS_XMM,	%xmm7
+
+	// Use ANNOTATE_NOENDBR to suppress an objtool warning, since the
+	// functions generated by this macro are called only by static_call.
+	ANNOTATE_NOENDBR
+
+#ifdef __i386__
+	push		CONSTS_PTR
+	mov		8(%esp), CONSTS_PTR
+#endif
+
+	// Create a 128-bit vector that contains the initial CRC in the end
+	// representing the high-order polynomial coefficients, and the rest 0.
+	// If the CRC is msb-first, also load the byte-reflection table.
+.if \n <= 32
+	_cond_vex movd,	CRC, %xmm0
+.else
+	_cond_vex movq,	CRC, %xmm0
+.endif
+.if !LSB_CRC
+	_cond_vex pslldq, $(128-\n)/8, %xmm0, %xmm0
+	_vbroadcast	OFFSETOF_BSWAP_MASK(CONSTS_PTR), BSWAP_MASK
+.endif
+
+	// Load the first vector of data and XOR the initial CRC into the
+	// appropriate end of the first 128-bit lane of data.  If LEN < VL, then
+	// use a short vector and jump ahead to the final reduction.  (LEN >= 16
+	// is guaranteed here but not necessarily LEN >= VL.)
+.if VL >= 32
+	cmp		$VL, LEN
+	jae		.Lat_least_1vec\@
+  .if VL == 64
+	cmp		$32, LEN32
+	jb		.Lless_than_32bytes\@
+	_prepare_v0	32, %ymm0, %ymm1, BSWAP_MASK_YMM
+	add		$32, BUF
+	jmp		.Lreduce_256bits_to_128bits\@
+.Lless_than_32bytes\@:
+  .endif
+	_prepare_v0	16, %xmm0, %xmm1, BSWAP_MASK_XMM
+	add		$16, BUF
+	vmovdqa		OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
+	jmp		.Lcheck_for_partial_block\@
+.Lat_least_1vec\@:
+.endif
+	_prepare_v0	VL, V0, V1, BSWAP_MASK
+
+	// Handle VL <= LEN < 4*VL.
+	cmp		$4*VL-1, LEN
+	ja		.Lat_least_4vecs\@
+	add		$VL, BUF
+	// If VL <= LEN < 2*VL, then jump ahead to the reduction from 1 vector.
+	// If VL==16 then load fold_across_128_bits_consts first, as the final
+	// reduction depends on it and it won't be loaded anywhere else.
+	cmp		$2*VL-1, LEN32
+.if VL == 16
+	_cond_vex movdqa, OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
+.endif
+	jbe		.Lreduce_1vec_to_128bits\@
+	// Otherwise 2*VL <= LEN < 4*VL.  Load one more vector and jump ahead to
+	// the reduction from 2 vectors.
+	_load_data	VL, (BUF), BSWAP_MASK, V1
+	add		$VL, BUF
+	jmp		.Lreduce_2vecs_to_1\@
+
+.Lat_least_4vecs\@:
+	// Load 3 more vectors of data.
+	_load_data	VL, 1*VL(BUF), BSWAP_MASK, V1
+	_load_data	VL, 2*VL(BUF), BSWAP_MASK, V2
+	_load_data	VL, 3*VL(BUF), BSWAP_MASK, V3
+	sub		$-4*VL, BUF	// Shorter than 'add 4*VL' when VL=32
+	add		$-4*VL, LEN	// Shorter than 'sub 4*VL' when VL=32
+
+	// Main loop: while LEN >= 4*VL, fold the 4 vectors V0-V3 into the next
+	// 4 vectors of data and write the result back to V0-V3.
+	cmp		$4*VL-1, LEN	// Shorter than 'cmp 4*VL' when VL=32
+	jbe		.Lreduce_4vecs_to_2\@
+	_load_vec_folding_consts	2
+.Lfold_4vecs_loop\@:
+	_fold_vec_mem	VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
+	_fold_vec_mem	VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
+	_fold_vec_mem	VL, V2, 2*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
+	_fold_vec_mem	VL, V3, 3*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
+	sub		$-4*VL, BUF
+	add		$-4*VL, LEN
+	cmp		$4*VL-1, LEN
+	ja		.Lfold_4vecs_loop\@
+
+	// Fold V0,V1 into V2,V3 and write the result back to V0,V1.  Then fold
+	// two more vectors of data from BUF, if at least that much remains.
+.Lreduce_4vecs_to_2\@:
+	_load_vec_folding_consts	1
+	_fold_vec	V0, V2, CONSTS, V4
+	_fold_vec	V1, V3, CONSTS, V4
+	test		$2*VL, LEN8
+	jz		.Lreduce_2vecs_to_1\@
+	_fold_vec_mem	VL, V0, 0*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
+	_fold_vec_mem	VL, V1, 1*VL(BUF), CONSTS, BSWAP_MASK, V4, V5
+	sub		$-2*VL, BUF
+
+	// Fold V0 into V1 and write the result back to V0.  Then fold one more
+	// vector of data from BUF, if at least that much remains.
+.Lreduce_2vecs_to_1\@:
+	_load_vec_folding_consts	0
+	_fold_vec_final	VL, V0, V1, CONSTS, BSWAP_MASK, V4, V5
+
+.Lreduce_1vec_to_128bits\@:
+.if VL == 64
+	// Reduce 512-bit %zmm0 to 256-bit %ymm0.  Then fold 256 more bits of
+	// data from BUF, if at least that much remains.
+	vbroadcasti128	OFFSETOF_FOLD_ACROSS_256_BITS_CONSTS(CONSTS_PTR), CONSTS_YMM
+	vextracti64x4	$1, %zmm0, %ymm1
+	_fold_vec_final	32, %ymm0, %ymm1, CONSTS_YMM, BSWAP_MASK_YMM, %ymm4, %ymm5
+.Lreduce_256bits_to_128bits\@:
+.endif
+.if VL >= 32
+	// Reduce 256-bit %ymm0 to 128-bit %xmm0.  Then fold 128 more bits of
+	// data from BUF, if at least that much remains.
+	vmovdqa		OFFSETOF_FOLD_ACROSS_128_BITS_CONSTS(CONSTS_PTR), CONSTS_XMM
+	vextracti128	$1, %ymm0, %xmm1
+	_fold_vec_final	16, %xmm0, %xmm1, CONSTS_XMM, BSWAP_MASK_XMM, %xmm4, %xmm5
+.Lcheck_for_partial_block\@:
+.endif
+	and		$15, LEN32
+	jz		.Lreduce_128bits_to_crc\@
+
+	// 1 <= LEN <= 15 data bytes remain in BUF.  The polynomial is now
+	// A*(x^(8*LEN)) + B, where A is the 128-bit polynomial stored in %xmm0
+	// and B is the polynomial of the remaining LEN data bytes.  To reduce
+	// this to 128 bits without needing fold constants for each possible
+	// LEN, rearrange this expression into C1*(x^128) + C2, where
+	// C1 = floor(A / x^(128 - 8*LEN)) and C2 = A*x^(8*LEN) + B mod x^128.
+	// Then fold C1 into C2, which is just another fold across 128 bits.
+
+.if !LSB_CRC || AVX_LEVEL == 0
+	// Load the last 16 data bytes.  Note that originally LEN was >= 16.
+	_load_data	16, "-16(BUF,LEN)", BSWAP_MASK_XMM, %xmm2
+.endif // Else will use vpblendvb mem operand later.
+.if !LSB_CRC
+	neg		LEN	// Needed for indexing shuf_table
+.endif
+
+	// tmp = A*x^(8*LEN) mod x^128
+	// lsb: pshufb by [LEN, LEN+1, ..., 15, -1, -1, ..., -1]
+	//	i.e. right-shift by LEN bytes.
+	// msb: pshufb by [-1, -1, ..., -1, 0, 1, ..., 15-LEN]
+	//	i.e. left-shift by LEN bytes.
+	_cond_vex movdqu,	"OFFSETOF_SHUF_TABLE+16(CONSTS_PTR,LEN)", %xmm3
+	_cond_vex pshufb,	%xmm3, %xmm0, %xmm1
+
+	// C1 = floor(A / x^(128 - 8*LEN))
+	// lsb: pshufb by [-1, -1, ..., -1, 0, 1, ..., LEN-1]
+	//	i.e. left-shift by 16-LEN bytes.
+	// msb: pshufb by [16-LEN, 16-LEN+1, ..., 15, -1, -1, ..., -1]
+	//	i.e. right-shift by 16-LEN bytes.
+	_cond_vex pshufb,	"OFFSETOF_SHUF_TABLE+32*!LSB_CRC(CONSTS_PTR,LEN)", \
+				%xmm0, %xmm0, unaligned_mem_tmp=%xmm4
+
+	// C2 = tmp + B.  This is just a blend of tmp with the last 16 data
+	// bytes (reflected if msb-first).  The blend mask is the shuffle table
+	// that was used to create tmp.  0 selects tmp, and 1 last16databytes.
+.if AVX_LEVEL == 0
+	movdqa		%xmm0, %xmm4
+	movdqa		%xmm3, %xmm0
+	pblendvb	%xmm2, %xmm1	// uses %xmm0 as implicit operand
+	movdqa		%xmm4, %xmm0
+.elseif LSB_CRC
+	vpblendvb	%xmm3, -16(BUF,LEN), %xmm1, %xmm1
+.else
+	vpblendvb	%xmm3, %xmm2, %xmm1, %xmm1
+.endif
+
+	// Fold C1 into C2 and store the 128-bit result in %xmm0.
+	_fold_vec	%xmm0, %xmm1, CONSTS_XMM, %xmm4
+
+.Lreduce_128bits_to_crc\@:
+	// Compute the CRC as %xmm0 * x^n mod G.  Here %xmm0 means the 128-bit
+	// polynomial stored in %xmm0 (using either lsb-first or msb-first bit
+	// order according to LSB_CRC), and G is the CRC's generator polynomial.
+
+	// First, multiply %xmm0 by x^n and reduce the result to 64+n bits:
+	//
+	//	t0 := (x^(64+n) mod G) * floor(%xmm0 / x^64) +
+	//	      x^n * (%xmm0 mod x^64)
+	//
+	// Store t0 * x^(64-n) in %xmm0.  I.e., actually do:
+	//
+	//	%xmm0 := ((x^(64+n) mod G) * x^(64-n)) * floor(%xmm0 / x^64) +
+	//		 x^64 * (%xmm0 mod x^64)
+	//
+	// The extra unreduced factor of x^(64-n) makes floor(t0 / x^n) aligned
+	// to the HI64_TERMS of %xmm0 so that the next pclmulqdq can easily
+	// select it.  The 64-bit constant (x^(64+n) mod G) * x^(64-n) in the
+	// msb-first case, or (x^(63+n) mod G) * x^(64-n) in the lsb-first case
+	// (considering the extra factor of x that gets implicitly introduced by
+	// each pclmulqdq when using lsb-first order), is identical to the
+	// constant that was used earlier for folding the LO64_TERMS across 128
+	// bits.  Thus it's already available in LO64_TERMS of CONSTS_XMM.
+	_pclmulqdq		CONSTS_XMM, LO64_TERMS, %xmm0, HI64_TERMS, %xmm1
+.if LSB_CRC
+	_cond_vex psrldq,	$8, %xmm0, %xmm0  // x^64 * (%xmm0 mod x^64)
+.else
+	_cond_vex pslldq,	$8, %xmm0, %xmm0  // x^64 * (%xmm0 mod x^64)
+.endif
+	_cond_vex pxor,		%xmm1, %xmm0, %xmm0
+	// The HI64_TERMS of %xmm0 now contain floor(t0 / x^n).
+	// The LO64_TERMS of %xmm0 now contain (t0 mod x^n) * x^(64-n).
+
+	// First step of Barrett reduction: Compute floor(t0 / G).  This is the
+	// polynomial by which G needs to be multiplied to cancel out the x^n
+	// and higher terms of t0, i.e. to reduce t0 mod G.  First do:
+	//
+	//	t1 := floor(x^(63+n) / G) * x * floor(t0 / x^n)
+	//
+	// Then the desired value floor(t0 / G) is floor(t1 / x^64).  The 63 in
+	// x^(63+n) is the maximum degree of floor(t0 / x^n) and thus the lowest
+	// value that makes enough precision be carried through the calculation.
+	//
+	// The '* x' makes it so the result is floor(t1 / x^64) rather than
+	// floor(t1 / x^63), making it qword-aligned in HI64_TERMS so that it
+	// can be extracted much more easily in the next step.  In the lsb-first
+	// case the '* x' happens implicitly.  In the msb-first case it must be
+	// done explicitly; floor(x^(63+n) / G) * x is a 65-bit constant, so the
+	// constant passed to pclmulqdq is (floor(x^(63+n) / G) * x) - x^64, and
+	// the multiplication by the x^64 term is handled using a pxor.  The
+	// pxor causes the low 64 terms of t1 to be wrong, but they are unused.
+	_cond_vex movdqa,	OFFSETOF_BARRETT_REDUCTION_CONSTS(CONSTS_PTR), CONSTS_XMM
+	_pclmulqdq		CONSTS_XMM, HI64_TERMS, %xmm0, HI64_TERMS, %xmm1
+.if !LSB_CRC
+	_cond_vex pxor,		%xmm0, %xmm1, %xmm1 // += x^64 * floor(t0 / x^n)
+.endif
+	// The HI64_TERMS of %xmm1 now contain floor(t1 / x^64) = floor(t0 / G).
+
+	// Second step of Barrett reduction: Cancel out the x^n and higher terms
+	// of t0 by subtracting the needed multiple of G.  This gives the CRC:
+	//
+	//	crc := t0 - (G * floor(t0 / G))
+	//
+	// But %xmm0 contains t0 * x^(64-n), so it's more convenient to do:
+	//
+	//	crc := ((t0 * x^(64-n)) - ((G * x^(64-n)) * floor(t0 / G))) / x^(64-n)
+	//
+	// Furthermore, since the resulting CRC is n-bit, if mod x^n is
+	// explicitly applied to it then the x^n term of G makes no difference
+	// in the result and can be omitted.  This helps keep the constant
+	// multiplier in 64 bits in most cases.  This gives the following:
+	//
+	//	%xmm0 := %xmm0 - (((G - x^n) * x^(64-n)) * floor(t0 / G))
+	//	crc := (%xmm0 / x^(64-n)) mod x^n
+	//
+	// In the lsb-first case, each pclmulqdq implicitly introduces
+	// an extra factor of x, so in that case the constant that needs to be
+	// passed to pclmulqdq is actually '(G - x^n) * x^(63-n)' when n <= 63.
+	// For lsb-first CRCs where n=64, the extra factor of x cannot be as
+	// easily avoided.  In that case, instead pass '(G - x^n - x^0) / x' to
+	// pclmulqdq and handle the x^0 term (i.e. 1) separately.  (All CRC
+	// polynomials have nonzero x^n and x^0 terms.)  It works out as: the
+	// CRC has be XORed with the physically low qword of %xmm1, representing
+	// floor(t0 / G).  The most efficient way to do that is to move it to
+	// the physically high qword and use a ternlog to combine the two XORs.
+.if LSB_CRC && \n == 64
+	_cond_vex punpcklqdq,	%xmm1, %xmm2, %xmm2
+	_pclmulqdq		CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1
+    .if AVX_LEVEL < 10
+	_cond_vex pxor,		%xmm2, %xmm0, %xmm0
+	_cond_vex pxor,		%xmm1, %xmm0, %xmm0
+    .else
+	vpternlogq		$0x96, %xmm2, %xmm1, %xmm0
+    .endif
+	_cond_vex "pextrq $1,",	%xmm0, %rax  // (%xmm0 / x^0) mod x^64
+.else
+	_pclmulqdq		CONSTS_XMM, LO64_TERMS, %xmm1, HI64_TERMS, %xmm1
+	_cond_vex pxor,		%xmm1, %xmm0, %xmm0
+  .if \n == 8
+	_cond_vex "pextrb $7 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^56) mod x^8
+  .elseif \n == 16
+	_cond_vex "pextrw $3 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^48) mod x^16
+  .elseif \n == 32
+	_cond_vex "pextrd $1 + LSB_CRC,", %xmm0, %eax // (%xmm0 / x^32) mod x^32
+  .else // \n == 64 && !LSB_CRC
+	_cond_vex movq,		%xmm0, %rax  // (%xmm0 / x^0) mod x^64
+  .endif
+.endif
+
+.if VL > 16
+	vzeroupper	// Needed when ymm or zmm registers may have been used.
+.endif
+#ifdef __i386__
+	pop		CONSTS_PTR
+#endif
+	RET
+.endm
+
+#ifdef CONFIG_AS_VPCLMULQDQ
+#define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb)			\
+SYM_FUNC_START(prefix##_pclmul_sse);					\
+	_crc_pclmul	n=bits, lsb_crc=lsb, vl=16, avx_level=0;	\
+SYM_FUNC_END(prefix##_pclmul_sse);					\
+									\
+SYM_FUNC_START(prefix##_vpclmul_avx2);					\
+	_crc_pclmul	n=bits, lsb_crc=lsb, vl=32, avx_level=2;	\
+SYM_FUNC_END(prefix##_vpclmul_avx2);					\
+									\
+SYM_FUNC_START(prefix##_vpclmul_avx10_256);				\
+	_crc_pclmul	n=bits, lsb_crc=lsb, vl=32, avx_level=10;	\
+SYM_FUNC_END(prefix##_vpclmul_avx10_256);				\
+									\
+SYM_FUNC_START(prefix##_vpclmul_avx10_512);				\
+	_crc_pclmul	n=bits, lsb_crc=lsb, vl=64, avx_level=10;	\
+SYM_FUNC_END(prefix##_vpclmul_avx10_512);
+#else
+#define DEFINE_CRC_PCLMUL_FUNCS(prefix, bits, lsb)			\
+SYM_FUNC_START(prefix##_pclmul_sse);					\
+	_crc_pclmul	n=bits, lsb_crc=lsb, vl=16, avx_level=0;	\
+SYM_FUNC_END(prefix##_pclmul_sse);
+#endif // !CONFIG_AS_VPCLMULQDQ
diff --git a/arch/x86/lib/crc-pclmul-template.h b/arch/x86/lib/crc-pclmul-template.h
new file mode 100644
index 0000000000000000000000000000000000000000..7b89f0edbc179b03d6998ba1572dc9cd0ef0205e
--- /dev/null
+++ b/arch/x86/lib/crc-pclmul-template.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Macros for accessing the [V]PCLMULQDQ-based CRC functions that are
+ * instantiated by crc-pclmul-template.S
+ *
+ * Copyright 2025 Google LLC
+ *
+ * Author: Eric Biggers <ebiggers@google.com>
+ */
+#ifndef _CRC_PCLMUL_TEMPLATE_H
+#define _CRC_PCLMUL_TEMPLATE_H
+
+#include <asm/cpufeatures.h>
+#include <asm/simd.h>
+#include <crypto/internal/simd.h>
+#include <linux/static_call.h>
+#include "crc-pclmul-consts.h"
+
+#define DECLARE_CRC_PCLMUL_FUNCS(prefix, crc_t)				\
+crc_t prefix##_pclmul_sse(crc_t crc, const u8 *p, size_t len,		\
+			  const void *consts_ptr);			\
+crc_t prefix##_vpclmul_avx2(crc_t crc, const u8 *p, size_t len,		\
+			    const void *consts_ptr);			\
+crc_t prefix##_vpclmul_avx10_256(crc_t crc, const u8 *p, size_t len,	\
+				 const void *consts_ptr);		\
+crc_t prefix##_vpclmul_avx10_512(crc_t crc, const u8 *p, size_t len,	\
+				 const void *consts_ptr);		\
+DEFINE_STATIC_CALL(prefix##_pclmul, prefix##_pclmul_sse)
+
+#define INIT_CRC_PCLMUL(prefix)						\
+do {									\
+	if (IS_ENABLED(CONFIG_AS_VPCLMULQDQ) &&				\
+	    boot_cpu_has(X86_FEATURE_VPCLMULQDQ) &&			\
+	    boot_cpu_has(X86_FEATURE_AVX2) &&				\
+	    cpu_has_xfeatures(XFEATURE_MASK_YMM, NULL)) {		\
+		if (boot_cpu_has(X86_FEATURE_AVX512BW) &&		\
+		    boot_cpu_has(X86_FEATURE_AVX512VL) &&		\
+		    cpu_has_xfeatures(XFEATURE_MASK_AVX512, NULL)) {	\
+			if (boot_cpu_has(X86_FEATURE_PREFER_YMM))	\
+				static_call_update(prefix##_pclmul,	\
+						   prefix##_vpclmul_avx10_256); \
+			else						\
+				static_call_update(prefix##_pclmul,	\
+						   prefix##_vpclmul_avx10_512); \
+		} else {						\
+			static_call_update(prefix##_pclmul,		\
+					   prefix##_vpclmul_avx2);	\
+		}							\
+	}								\
+} while (0)
+
+/*
+ * Call a [V]PCLMULQDQ optimized CRC function if the data length is at least 16
+ * bytes, the CPU has PCLMULQDQ support, and the current context may use SIMD.
+ *
+ * 16 bytes is the minimum length supported by the [V]PCLMULQDQ functions.
+ * There is overhead associated with kernel_fpu_begin() and kernel_fpu_end(),
+ * varying by CPU and factors such as which parts of the "FPU" state userspace
+ * has touched, which could result in a larger cutoff being better.  Indeed, a
+ * larger cutoff is usually better for a *single* message.  However, the
+ * overhead of the FPU section gets amortized if multiple FPU sections get
+ * executed before returning to userspace, since the XSAVE and XRSTOR occur only
+ * once.  Considering that and the fact that the [V]PCLMULQDQ code is lighter on
+ * the dcache than the table-based code is, a 16-byte cutoff seems to work well.
+ */
+#define CRC_PCLMUL(crc, p, len, prefix, consts, have_pclmulqdq)		\
+do {									\
+	if ((len) >= 16 && static_branch_likely(&(have_pclmulqdq)) &&	\
+	    crypto_simd_usable()) {					\
+		const void *consts_ptr;					\
+									\
+		consts_ptr = (consts).fold_across_128_bits_consts;	\
+		kernel_fpu_begin();					\
+		crc = static_call(prefix##_pclmul)((crc), (p), (len),	\
+						   consts_ptr);		\
+		kernel_fpu_end();					\
+		return crc;						\
+	}								\
+} while (0)
+
+#endif /* _CRC_PCLMUL_TEMPLATE_H */
diff --git a/arch/x86/lib/crc-t10dif-glue.c b/arch/x86/lib/crc-t10dif-glue.c
index 13f07ddc9122cbc315164a632466f7ef4c285e26..f89c335cde3c5b0604c2bdc1f10f09921627f345 100644
--- a/arch/x86/lib/crc-t10dif-glue.c
+++ b/arch/x86/lib/crc-t10dif-glue.c
@@ -1,37 +1,32 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /*
- * CRC-T10DIF using PCLMULQDQ instructions
+ * CRC-T10DIF using [V]PCLMULQDQ instructions
  *
  * Copyright 2024 Google LLC
  */
 
-#include <asm/cpufeatures.h>
-#include <asm/simd.h>
-#include <crypto/internal/simd.h>
 #include <linux/crc-t10dif.h>
 #include <linux/module.h>
+#include "crc-pclmul-template.h"
 
 static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
 
-asmlinkage u16 crc_t10dif_pcl(u16 init_crc, const u8 *buf, size_t len);
+DECLARE_CRC_PCLMUL_FUNCS(crc16_msb, u16);
 
 u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len)
 {
-	if (len >= 16 &&
-	    static_key_enabled(&have_pclmulqdq) && crypto_simd_usable()) {
-		kernel_fpu_begin();
-		crc = crc_t10dif_pcl(crc, p, len);
-		kernel_fpu_end();
-		return crc;
-	}
+	CRC_PCLMUL(crc, p, len, crc16_msb, crc16_msb_0x8bb7_consts,
+		   have_pclmulqdq);
 	return crc_t10dif_generic(crc, p, len);
 }
 EXPORT_SYMBOL(crc_t10dif_arch);
 
 static int __init crc_t10dif_x86_init(void)
 {
-	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ))
+	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
 		static_branch_enable(&have_pclmulqdq);
+		INIT_CRC_PCLMUL(crc16_msb);
+	}
 	return 0;
 }
 arch_initcall(crc_t10dif_x86_init);
@@ -41,11 +36,5 @@ static void __exit crc_t10dif_x86_exit(void)
 }
 module_exit(crc_t10dif_x86_exit);
 
-bool crc_t10dif_is_optimized(void)
-{
-	return static_key_enabled(&have_pclmulqdq);
-}
-EXPORT_SYMBOL(crc_t10dif_is_optimized);
-
-MODULE_DESCRIPTION("CRC-T10DIF using PCLMULQDQ instructions");
+MODULE_DESCRIPTION("CRC-T10DIF using [V]PCLMULQDQ instructions");
 MODULE_LICENSE("GPL");
diff --git a/arch/x86/lib/crc16-msb-pclmul.S b/arch/x86/lib/crc16-msb-pclmul.S
new file mode 100644
index 0000000000000000000000000000000000000000..e9fe248093a887cff116749afd6b5b0a0b32a35c
--- /dev/null
+++ b/arch/x86/lib/crc16-msb-pclmul.S
@@ -0,0 +1,6 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+// Copyright 2025 Google LLC
+
+#include "crc-pclmul-template.S"
+
+DEFINE_CRC_PCLMUL_FUNCS(crc16_msb, /* bits= */ 16, /* lsb= */ 0)
diff --git a/arch/x86/lib/crc32-glue.c b/arch/x86/lib/crc32-glue.c
index 2dd18a886ded8b85c03ff7290edd9b08b6669b8a..4b4721176799a9a89e9e8265f4d2f56df468be07 100644
--- a/arch/x86/lib/crc32-glue.c
+++ b/arch/x86/lib/crc32-glue.c
@@ -7,43 +7,20 @@
  * Copyright 2024 Google LLC
  */
 
-#include <asm/cpufeatures.h>
-#include <asm/simd.h>
-#include <crypto/internal/simd.h>
 #include <linux/crc32.h>
-#include <linux/linkage.h>
 #include <linux/module.h>
-
-/* minimum size of buffer for crc32_pclmul_le_16 */
-#define CRC32_PCLMUL_MIN_LEN	64
+#include "crc-pclmul-template.h"
 
 static DEFINE_STATIC_KEY_FALSE(have_crc32);
 static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
 
-u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
+DECLARE_CRC_PCLMUL_FUNCS(crc32_lsb, u32);
 
 u32 crc32_le_arch(u32 crc, const u8 *p, size_t len)
 {
-	if (len >= CRC32_PCLMUL_MIN_LEN + 15 &&
-	    static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
-		size_t n = -(uintptr_t)p & 15;
-
-		/* align p to 16-byte boundary */
-		if (n) {
-			crc = crc32_le_base(crc, p, n);
-			p += n;
-			len -= n;
-		}
-		n = round_down(len, 16);
-		kernel_fpu_begin();
-		crc = crc32_pclmul_le_16(crc, p, n);
-		kernel_fpu_end();
-		p += n;
-		len -= n;
-	}
-	if (len)
-		crc = crc32_le_base(crc, p, len);
-	return crc;
+	CRC_PCLMUL(crc, p, len, crc32_lsb, crc32_lsb_0xedb88320_consts,
+		   have_pclmulqdq);
+	return crc32_le_base(crc, p, len);
 }
 EXPORT_SYMBOL(crc32_le_arch);
 
@@ -61,12 +38,12 @@ EXPORT_SYMBOL(crc32_le_arch);
 
 asmlinkage u32 crc32c_x86_3way(u32 crc, const u8 *buffer, size_t len);
 
-u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len)
 {
 	size_t num_longs;
 
 	if (!static_branch_likely(&have_crc32))
-		return crc32c_le_base(crc, p, len);
+		return crc32c_base(crc, p, len);
 
 	if (IS_ENABLED(CONFIG_X86_64) && len >= CRC32C_PCLMUL_BREAKEVEN &&
 	    static_branch_likely(&have_pclmulqdq) && crypto_simd_usable()) {
@@ -78,14 +55,14 @@ u32 crc32c_le_arch(u32 crc, const u8 *p, size_t len)
 
 	for (num_longs = len / sizeof(unsigned long);
 	     num_longs != 0; num_longs--, p += sizeof(unsigned long))
-		asm(CRC32_INST : "+r" (crc) : "rm" (*(unsigned long *)p));
+		asm(CRC32_INST : "+r" (crc) : ASM_INPUT_RM (*(unsigned long *)p));
 
 	for (len %= sizeof(unsigned long); len; len--, p++)
-		asm("crc32b %1, %0" : "+r" (crc) : "rm" (*p));
+		asm("crc32b %1, %0" : "+r" (crc) : ASM_INPUT_RM (*p));
 
 	return crc;
 }
-EXPORT_SYMBOL(crc32c_le_arch);
+EXPORT_SYMBOL(crc32c_arch);
 
 u32 crc32_be_arch(u32 crc, const u8 *p, size_t len)
 {
@@ -97,8 +74,10 @@ static int __init crc32_x86_init(void)
 {
 	if (boot_cpu_has(X86_FEATURE_XMM4_2))
 		static_branch_enable(&have_crc32);
-	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ))
+	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
 		static_branch_enable(&have_pclmulqdq);
+		INIT_CRC_PCLMUL(crc32_lsb);
+	}
 	return 0;
 }
 arch_initcall(crc32_x86_init);
diff --git a/arch/x86/lib/crc32-pclmul.S b/arch/x86/lib/crc32-pclmul.S
index f9637789cac19b4702eea336c41f320cffcd4801..f20f40fb0172d7b00fc5d79b366060735013405d 100644
--- a/arch/x86/lib/crc32-pclmul.S
+++ b/arch/x86/lib/crc32-pclmul.S
@@ -1,217 +1,6 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2012 Xyratex Technology Limited
- *
- * Using hardware provided PCLMULQDQ instruction to accelerate the CRC32
- * calculation.
- * CRC32 polynomial:0x04c11db7(BE)/0xEDB88320(LE)
- * PCLMULQDQ is a new instruction in Intel SSE4.2, the reference can be found
- * at:
- * http://www.intel.com/products/processor/manuals/
- * Intel(R) 64 and IA-32 Architectures Software Developer's Manual
- * Volume 2B: Instruction Set Reference, N-Z
- *
- * Authors:   Gregory Prestas <Gregory_Prestas@us.xyratex.com>
- *	      Alexander Boyko <Alexander_Boyko@xyratex.com>
- */
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+// Copyright 2025 Google LLC
 
-#include <linux/linkage.h>
+#include "crc-pclmul-template.S"
 
-
-.section .rodata
-.align 16
-/*
- * [x4*128+32 mod P(x) << 32)]'  << 1   = 0x154442bd4
- * #define CONSTANT_R1  0x154442bd4LL
- *
- * [(x4*128-32 mod P(x) << 32)]' << 1   = 0x1c6e41596
- * #define CONSTANT_R2  0x1c6e41596LL
- */
-.Lconstant_R2R1:
-	.octa 0x00000001c6e415960000000154442bd4
-/*
- * [(x128+32 mod P(x) << 32)]'   << 1   = 0x1751997d0
- * #define CONSTANT_R3  0x1751997d0LL
- *
- * [(x128-32 mod P(x) << 32)]'   << 1   = 0x0ccaa009e
- * #define CONSTANT_R4  0x0ccaa009eLL
- */
-.Lconstant_R4R3:
-	.octa 0x00000000ccaa009e00000001751997d0
-/*
- * [(x64 mod P(x) << 32)]'       << 1   = 0x163cd6124
- * #define CONSTANT_R5  0x163cd6124LL
- */
-.Lconstant_R5:
-	.octa 0x00000000000000000000000163cd6124
-.Lconstant_mask32:
-	.octa 0x000000000000000000000000FFFFFFFF
-/*
- * #define CRCPOLY_TRUE_LE_FULL 0x1DB710641LL
- *
- * Barrett Reduction constant (u64`) = u` = (x**64 / P(x))` = 0x1F7011641LL
- * #define CONSTANT_RU  0x1F7011641LL
- */
-.Lconstant_RUpoly:
-	.octa 0x00000001F701164100000001DB710641
-
-#define CONSTANT %xmm0
-
-#ifdef __x86_64__
-#define CRC     %edi
-#define BUF     %rsi
-#define LEN     %rdx
-#else
-#define CRC     %eax
-#define BUF     %edx
-#define LEN     %ecx
-#endif
-
-
-
-.text
-/**
- *      Calculate crc32
- *      CRC - initial crc32
- *      BUF - buffer (16 bytes aligned)
- *      LEN - sizeof buffer (16 bytes aligned), LEN should be greater than 63
- *      return %eax crc32
- *      u32 crc32_pclmul_le_16(u32 crc, const u8 *buffer, size_t len);
- */
-
-SYM_FUNC_START(crc32_pclmul_le_16) /* buffer and buffer size are 16 bytes aligned */
-	movdqa  (BUF), %xmm1
-	movdqa  0x10(BUF), %xmm2
-	movdqa  0x20(BUF), %xmm3
-	movdqa  0x30(BUF), %xmm4
-	movd    CRC, CONSTANT
-	pxor    CONSTANT, %xmm1
-	sub     $0x40, LEN
-	add     $0x40, BUF
-	cmp     $0x40, LEN
-	jb      .Lless_64
-
-#ifdef __x86_64__
-	movdqa .Lconstant_R2R1(%rip), CONSTANT
-#else
-	movdqa .Lconstant_R2R1, CONSTANT
-#endif
-
-.Lloop_64:/*  64 bytes Full cache line folding */
-	prefetchnta    0x40(BUF)
-	movdqa  %xmm1, %xmm5
-	movdqa  %xmm2, %xmm6
-	movdqa  %xmm3, %xmm7
-#ifdef __x86_64__
-	movdqa  %xmm4, %xmm8
-#endif
-	pclmulqdq $0x00, CONSTANT, %xmm1
-	pclmulqdq $0x00, CONSTANT, %xmm2
-	pclmulqdq $0x00, CONSTANT, %xmm3
-#ifdef __x86_64__
-	pclmulqdq $0x00, CONSTANT, %xmm4
-#endif
-	pclmulqdq $0x11, CONSTANT, %xmm5
-	pclmulqdq $0x11, CONSTANT, %xmm6
-	pclmulqdq $0x11, CONSTANT, %xmm7
-#ifdef __x86_64__
-	pclmulqdq $0x11, CONSTANT, %xmm8
-#endif
-	pxor    %xmm5, %xmm1
-	pxor    %xmm6, %xmm2
-	pxor    %xmm7, %xmm3
-#ifdef __x86_64__
-	pxor    %xmm8, %xmm4
-#else
-	/* xmm8 unsupported for x32 */
-	movdqa  %xmm4, %xmm5
-	pclmulqdq $0x00, CONSTANT, %xmm4
-	pclmulqdq $0x11, CONSTANT, %xmm5
-	pxor    %xmm5, %xmm4
-#endif
-
-	pxor    (BUF), %xmm1
-	pxor    0x10(BUF), %xmm2
-	pxor    0x20(BUF), %xmm3
-	pxor    0x30(BUF), %xmm4
-
-	sub     $0x40, LEN
-	add     $0x40, BUF
-	cmp     $0x40, LEN
-	jge     .Lloop_64
-.Lless_64:/*  Folding cache line into 128bit */
-#ifdef __x86_64__
-	movdqa  .Lconstant_R4R3(%rip), CONSTANT
-#else
-	movdqa  .Lconstant_R4R3, CONSTANT
-#endif
-	prefetchnta     (BUF)
-
-	movdqa  %xmm1, %xmm5
-	pclmulqdq $0x00, CONSTANT, %xmm1
-	pclmulqdq $0x11, CONSTANT, %xmm5
-	pxor    %xmm5, %xmm1
-	pxor    %xmm2, %xmm1
-
-	movdqa  %xmm1, %xmm5
-	pclmulqdq $0x00, CONSTANT, %xmm1
-	pclmulqdq $0x11, CONSTANT, %xmm5
-	pxor    %xmm5, %xmm1
-	pxor    %xmm3, %xmm1
-
-	movdqa  %xmm1, %xmm5
-	pclmulqdq $0x00, CONSTANT, %xmm1
-	pclmulqdq $0x11, CONSTANT, %xmm5
-	pxor    %xmm5, %xmm1
-	pxor    %xmm4, %xmm1
-
-	cmp     $0x10, LEN
-	jb      .Lfold_64
-.Lloop_16:/* Folding rest buffer into 128bit */
-	movdqa  %xmm1, %xmm5
-	pclmulqdq $0x00, CONSTANT, %xmm1
-	pclmulqdq $0x11, CONSTANT, %xmm5
-	pxor    %xmm5, %xmm1
-	pxor    (BUF), %xmm1
-	sub     $0x10, LEN
-	add     $0x10, BUF
-	cmp     $0x10, LEN
-	jge     .Lloop_16
-
-.Lfold_64:
-	/* perform the last 64 bit fold, also adds 32 zeroes
-	 * to the input stream */
-	pclmulqdq $0x01, %xmm1, CONSTANT /* R4 * xmm1.low */
-	psrldq  $0x08, %xmm1
-	pxor    CONSTANT, %xmm1
-
-	/* final 32-bit fold */
-	movdqa  %xmm1, %xmm2
-#ifdef __x86_64__
-	movdqa  .Lconstant_R5(%rip), CONSTANT
-	movdqa  .Lconstant_mask32(%rip), %xmm3
-#else
-	movdqa  .Lconstant_R5, CONSTANT
-	movdqa  .Lconstant_mask32, %xmm3
-#endif
-	psrldq  $0x04, %xmm2
-	pand    %xmm3, %xmm1
-	pclmulqdq $0x00, CONSTANT, %xmm1
-	pxor    %xmm2, %xmm1
-
-	/* Finish up with the bit-reversed barrett reduction 64 ==> 32 bits */
-#ifdef __x86_64__
-	movdqa  .Lconstant_RUpoly(%rip), CONSTANT
-#else
-	movdqa  .Lconstant_RUpoly, CONSTANT
-#endif
-	movdqa  %xmm1, %xmm2
-	pand    %xmm3, %xmm1
-	pclmulqdq $0x10, CONSTANT, %xmm1
-	pand    %xmm3, %xmm1
-	pclmulqdq $0x00, CONSTANT, %xmm1
-	pxor    %xmm2, %xmm1
-	pextrd  $0x01, %xmm1, %eax
-
-	RET
-SYM_FUNC_END(crc32_pclmul_le_16)
+DEFINE_CRC_PCLMUL_FUNCS(crc32_lsb, /* bits= */ 32, /* lsb= */ 1)
diff --git a/arch/x86/lib/crc64-glue.c b/arch/x86/lib/crc64-glue.c
new file mode 100644
index 0000000000000000000000000000000000000000..b0e1b719ecbfb8ba876ec19c75aecf8bd581d2eb
--- /dev/null
+++ b/arch/x86/lib/crc64-glue.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * CRC64 using [V]PCLMULQDQ instructions
+ *
+ * Copyright 2025 Google LLC
+ */
+
+#include <linux/crc64.h>
+#include <linux/module.h>
+#include "crc-pclmul-template.h"
+
+static DEFINE_STATIC_KEY_FALSE(have_pclmulqdq);
+
+DECLARE_CRC_PCLMUL_FUNCS(crc64_msb, u64);
+DECLARE_CRC_PCLMUL_FUNCS(crc64_lsb, u64);
+
+u64 crc64_be_arch(u64 crc, const u8 *p, size_t len)
+{
+	CRC_PCLMUL(crc, p, len, crc64_msb, crc64_msb_0x42f0e1eba9ea3693_consts,
+		   have_pclmulqdq);
+	return crc64_be_generic(crc, p, len);
+}
+EXPORT_SYMBOL_GPL(crc64_be_arch);
+
+u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len)
+{
+	CRC_PCLMUL(crc, p, len, crc64_lsb, crc64_lsb_0x9a6c9329ac4bc9b5_consts,
+		   have_pclmulqdq);
+	return crc64_nvme_generic(crc, p, len);
+}
+EXPORT_SYMBOL_GPL(crc64_nvme_arch);
+
+static int __init crc64_x86_init(void)
+{
+	if (boot_cpu_has(X86_FEATURE_PCLMULQDQ)) {
+		static_branch_enable(&have_pclmulqdq);
+		INIT_CRC_PCLMUL(crc64_msb);
+		INIT_CRC_PCLMUL(crc64_lsb);
+	}
+	return 0;
+}
+arch_initcall(crc64_x86_init);
+
+static void __exit crc64_x86_exit(void)
+{
+}
+module_exit(crc64_x86_exit);
+
+MODULE_DESCRIPTION("CRC64 using [V]PCLMULQDQ instructions");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/lib/crc64-pclmul.S b/arch/x86/lib/crc64-pclmul.S
new file mode 100644
index 0000000000000000000000000000000000000000..4173051b5197cdf9cc820c6b8bab6c0a9624d893
--- /dev/null
+++ b/arch/x86/lib/crc64-pclmul.S
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+// Copyright 2025 Google LLC
+
+#include "crc-pclmul-template.S"
+
+DEFINE_CRC_PCLMUL_FUNCS(crc64_msb, /* bits= */ 64, /* lsb= */ 0)
+DEFINE_CRC_PCLMUL_FUNCS(crc64_lsb, /* bits= */ 64, /* lsb= */ 1)
diff --git a/arch/x86/lib/crct10dif-pcl-asm_64.S b/arch/x86/lib/crct10dif-pcl-asm_64.S
deleted file mode 100644
index 5286db5b8165e63244d305304fe9286ba5dd8fc3..0000000000000000000000000000000000000000
--- a/arch/x86/lib/crct10dif-pcl-asm_64.S
+++ /dev/null
@@ -1,332 +0,0 @@
-########################################################################
-# Implement fast CRC-T10DIF computation with SSE and PCLMULQDQ instructions
-#
-# Copyright (c) 2013, Intel Corporation
-#
-# Authors:
-#     Erdinc Ozturk <erdinc.ozturk@intel.com>
-#     Vinodh Gopal <vinodh.gopal@intel.com>
-#     James Guilford <james.guilford@intel.com>
-#     Tim Chen <tim.c.chen@linux.intel.com>
-#
-# This software is available to you under a choice of one of two
-# licenses.  You may choose to be licensed under the terms of the GNU
-# General Public License (GPL) Version 2, available from the file
-# COPYING in the main directory of this source tree, or the
-# OpenIB.org BSD license below:
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions are
-# met:
-#
-# * Redistributions of source code must retain the above copyright
-#   notice, this list of conditions and the following disclaimer.
-#
-# * Redistributions in binary form must reproduce the above copyright
-#   notice, this list of conditions and the following disclaimer in the
-#   documentation and/or other materials provided with the
-#   distribution.
-#
-# * Neither the name of the Intel Corporation nor the names of its
-#   contributors may be used to endorse or promote products derived from
-#   this software without specific prior written permission.
-#
-#
-# THIS SOFTWARE IS PROVIDED BY INTEL CORPORATION ""AS IS"" AND ANY
-# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL CORPORATION OR
-# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
-# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
-# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-#
-#       Reference paper titled "Fast CRC Computation for Generic
-#	Polynomials Using PCLMULQDQ Instruction"
-#       URL: http://www.intel.com/content/dam/www/public/us/en/documents
-#  /white-papers/fast-crc-computation-generic-polynomials-pclmulqdq-paper.pdf
-#
-
-#include <linux/linkage.h>
-
-.text
-
-#define		init_crc	%edi
-#define		buf		%rsi
-#define		len		%rdx
-
-#define		FOLD_CONSTS	%xmm10
-#define		BSWAP_MASK	%xmm11
-
-# Fold reg1, reg2 into the next 32 data bytes, storing the result back into
-# reg1, reg2.
-.macro	fold_32_bytes	offset, reg1, reg2
-	movdqu	\offset(buf), %xmm9
-	movdqu	\offset+16(buf), %xmm12
-	pshufb	BSWAP_MASK, %xmm9
-	pshufb	BSWAP_MASK, %xmm12
-	movdqa	\reg1, %xmm8
-	movdqa	\reg2, %xmm13
-	pclmulqdq	$0x00, FOLD_CONSTS, \reg1
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm8
-	pclmulqdq	$0x00, FOLD_CONSTS, \reg2
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm13
-	pxor	%xmm9 , \reg1
-	xorps	%xmm8 , \reg1
-	pxor	%xmm12, \reg2
-	xorps	%xmm13, \reg2
-.endm
-
-# Fold src_reg into dst_reg.
-.macro	fold_16_bytes	src_reg, dst_reg
-	movdqa	\src_reg, %xmm8
-	pclmulqdq	$0x11, FOLD_CONSTS, \src_reg
-	pclmulqdq	$0x00, FOLD_CONSTS, %xmm8
-	pxor	%xmm8, \dst_reg
-	xorps	\src_reg, \dst_reg
-.endm
-
-#
-# u16 crc_t10dif_pcl(u16 init_crc, const *u8 buf, size_t len);
-#
-# Assumes len >= 16.
-#
-SYM_FUNC_START(crc_t10dif_pcl)
-
-	movdqa	.Lbswap_mask(%rip), BSWAP_MASK
-
-	# For sizes less than 256 bytes, we can't fold 128 bytes at a time.
-	cmp	$256, len
-	jl	.Lless_than_256_bytes
-
-	# Load the first 128 data bytes.  Byte swapping is necessary to make the
-	# bit order match the polynomial coefficient order.
-	movdqu	16*0(buf), %xmm0
-	movdqu	16*1(buf), %xmm1
-	movdqu	16*2(buf), %xmm2
-	movdqu	16*3(buf), %xmm3
-	movdqu	16*4(buf), %xmm4
-	movdqu	16*5(buf), %xmm5
-	movdqu	16*6(buf), %xmm6
-	movdqu	16*7(buf), %xmm7
-	add	$128, buf
-	pshufb	BSWAP_MASK, %xmm0
-	pshufb	BSWAP_MASK, %xmm1
-	pshufb	BSWAP_MASK, %xmm2
-	pshufb	BSWAP_MASK, %xmm3
-	pshufb	BSWAP_MASK, %xmm4
-	pshufb	BSWAP_MASK, %xmm5
-	pshufb	BSWAP_MASK, %xmm6
-	pshufb	BSWAP_MASK, %xmm7
-
-	# XOR the first 16 data *bits* with the initial CRC value.
-	pxor	%xmm8, %xmm8
-	pinsrw	$7, init_crc, %xmm8
-	pxor	%xmm8, %xmm0
-
-	movdqa	.Lfold_across_128_bytes_consts(%rip), FOLD_CONSTS
-
-	# Subtract 128 for the 128 data bytes just consumed.  Subtract another
-	# 128 to simplify the termination condition of the following loop.
-	sub	$256, len
-
-	# While >= 128 data bytes remain (not counting xmm0-7), fold the 128
-	# bytes xmm0-7 into them, storing the result back into xmm0-7.
-.Lfold_128_bytes_loop:
-	fold_32_bytes	0, %xmm0, %xmm1
-	fold_32_bytes	32, %xmm2, %xmm3
-	fold_32_bytes	64, %xmm4, %xmm5
-	fold_32_bytes	96, %xmm6, %xmm7
-	add	$128, buf
-	sub	$128, len
-	jge	.Lfold_128_bytes_loop
-
-	# Now fold the 112 bytes in xmm0-xmm6 into the 16 bytes in xmm7.
-
-	# Fold across 64 bytes.
-	movdqa	.Lfold_across_64_bytes_consts(%rip), FOLD_CONSTS
-	fold_16_bytes	%xmm0, %xmm4
-	fold_16_bytes	%xmm1, %xmm5
-	fold_16_bytes	%xmm2, %xmm6
-	fold_16_bytes	%xmm3, %xmm7
-	# Fold across 32 bytes.
-	movdqa	.Lfold_across_32_bytes_consts(%rip), FOLD_CONSTS
-	fold_16_bytes	%xmm4, %xmm6
-	fold_16_bytes	%xmm5, %xmm7
-	# Fold across 16 bytes.
-	movdqa	.Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
-	fold_16_bytes	%xmm6, %xmm7
-
-	# Add 128 to get the correct number of data bytes remaining in 0...127
-	# (not counting xmm7), following the previous extra subtraction by 128.
-	# Then subtract 16 to simplify the termination condition of the
-	# following loop.
-	add	$128-16, len
-
-	# While >= 16 data bytes remain (not counting xmm7), fold the 16 bytes
-	# xmm7 into them, storing the result back into xmm7.
-	jl	.Lfold_16_bytes_loop_done
-.Lfold_16_bytes_loop:
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7
-	pclmulqdq	$0x00, FOLD_CONSTS, %xmm8
-	pxor	%xmm8, %xmm7
-	movdqu	(buf), %xmm0
-	pshufb	BSWAP_MASK, %xmm0
-	pxor	%xmm0 , %xmm7
-	add	$16, buf
-	sub	$16, len
-	jge	.Lfold_16_bytes_loop
-
-.Lfold_16_bytes_loop_done:
-	# Add 16 to get the correct number of data bytes remaining in 0...15
-	# (not counting xmm7), following the previous extra subtraction by 16.
-	add	$16, len
-	je	.Lreduce_final_16_bytes
-
-.Lhandle_partial_segment:
-	# Reduce the last '16 + len' bytes where 1 <= len <= 15 and the first 16
-	# bytes are in xmm7 and the rest are the remaining data in 'buf'.  To do
-	# this without needing a fold constant for each possible 'len', redivide
-	# the bytes into a first chunk of 'len' bytes and a second chunk of 16
-	# bytes, then fold the first chunk into the second.
-
-	movdqa	%xmm7, %xmm2
-
-	# xmm1 = last 16 original data bytes
-	movdqu	-16(buf, len), %xmm1
-	pshufb	BSWAP_MASK, %xmm1
-
-	# xmm2 = high order part of second chunk: xmm7 left-shifted by 'len' bytes.
-	lea	.Lbyteshift_table+16(%rip), %rax
-	sub	len, %rax
-	movdqu	(%rax), %xmm0
-	pshufb	%xmm0, %xmm2
-
-	# xmm7 = first chunk: xmm7 right-shifted by '16-len' bytes.
-	pxor	.Lmask1(%rip), %xmm0
-	pshufb	%xmm0, %xmm7
-
-	# xmm1 = second chunk: 'len' bytes from xmm1 (low-order bytes),
-	# then '16-len' bytes from xmm2 (high-order bytes).
-	pblendvb	%xmm2, %xmm1	#xmm0 is implicit
-
-	# Fold the first chunk into the second chunk, storing the result in xmm7.
-	movdqa	%xmm7, %xmm8
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7
-	pclmulqdq	$0x00, FOLD_CONSTS, %xmm8
-	pxor	%xmm8, %xmm7
-	pxor	%xmm1, %xmm7
-
-.Lreduce_final_16_bytes:
-	# Reduce the 128-bit value M(x), stored in xmm7, to the final 16-bit CRC
-
-	# Load 'x^48 * (x^48 mod G(x))' and 'x^48 * (x^80 mod G(x))'.
-	movdqa	.Lfinal_fold_consts(%rip), FOLD_CONSTS
-
-	# Fold the high 64 bits into the low 64 bits, while also multiplying by
-	# x^64.  This produces a 128-bit value congruent to x^64 * M(x) and
-	# whose low 48 bits are 0.
-	movdqa	%xmm7, %xmm0
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7 # high bits * x^48 * (x^80 mod G(x))
-	pslldq	$8, %xmm0
-	pxor	%xmm0, %xmm7			  # + low bits * x^64
-
-	# Fold the high 32 bits into the low 96 bits.  This produces a 96-bit
-	# value congruent to x^64 * M(x) and whose low 48 bits are 0.
-	movdqa	%xmm7, %xmm0
-	pand	.Lmask2(%rip), %xmm0		  # zero high 32 bits
-	psrldq	$12, %xmm7			  # extract high 32 bits
-	pclmulqdq	$0x00, FOLD_CONSTS, %xmm7 # high 32 bits * x^48 * (x^48 mod G(x))
-	pxor	%xmm0, %xmm7			  # + low bits
-
-	# Load G(x) and floor(x^48 / G(x)).
-	movdqa	.Lbarrett_reduction_consts(%rip), FOLD_CONSTS
-
-	# Use Barrett reduction to compute the final CRC value.
-	movdqa	%xmm7, %xmm0
-	pclmulqdq	$0x11, FOLD_CONSTS, %xmm7 # high 32 bits * floor(x^48 / G(x))
-	psrlq	$32, %xmm7			  # /= x^32
-	pclmulqdq	$0x00, FOLD_CONSTS, %xmm7 # *= G(x)
-	psrlq	$48, %xmm0
-	pxor	%xmm7, %xmm0		     # + low 16 nonzero bits
-	# Final CRC value (x^16 * M(x)) mod G(x) is in low 16 bits of xmm0.
-
-	pextrw	$0, %xmm0, %eax
-	RET
-
-.align 16
-.Lless_than_256_bytes:
-	# Checksumming a buffer of length 16...255 bytes
-
-	# Load the first 16 data bytes.
-	movdqu	(buf), %xmm7
-	pshufb	BSWAP_MASK, %xmm7
-	add	$16, buf
-
-	# XOR the first 16 data *bits* with the initial CRC value.
-	pxor	%xmm0, %xmm0
-	pinsrw	$7, init_crc, %xmm0
-	pxor	%xmm0, %xmm7
-
-	movdqa	.Lfold_across_16_bytes_consts(%rip), FOLD_CONSTS
-	cmp	$16, len
-	je	.Lreduce_final_16_bytes		# len == 16
-	sub	$32, len
-	jge	.Lfold_16_bytes_loop		# 32 <= len <= 255
-	add	$16, len
-	jmp	.Lhandle_partial_segment	# 17 <= len <= 31
-SYM_FUNC_END(crc_t10dif_pcl)
-
-.section	.rodata, "a", @progbits
-.align 16
-
-# Fold constants precomputed from the polynomial 0x18bb7
-# G(x) = x^16 + x^15 + x^11 + x^9 + x^8 + x^7 + x^5 + x^4 + x^2 + x^1 + x^0
-.Lfold_across_128_bytes_consts:
-	.quad		0x0000000000006123	# x^(8*128)	mod G(x)
-	.quad		0x0000000000002295	# x^(8*128+64)	mod G(x)
-.Lfold_across_64_bytes_consts:
-	.quad		0x0000000000001069	# x^(4*128)	mod G(x)
-	.quad		0x000000000000dd31	# x^(4*128+64)	mod G(x)
-.Lfold_across_32_bytes_consts:
-	.quad		0x000000000000857d	# x^(2*128)	mod G(x)
-	.quad		0x0000000000007acc	# x^(2*128+64)	mod G(x)
-.Lfold_across_16_bytes_consts:
-	.quad		0x000000000000a010	# x^(1*128)	mod G(x)
-	.quad		0x0000000000001faa	# x^(1*128+64)	mod G(x)
-.Lfinal_fold_consts:
-	.quad		0x1368000000000000	# x^48 * (x^48 mod G(x))
-	.quad		0x2d56000000000000	# x^48 * (x^80 mod G(x))
-.Lbarrett_reduction_consts:
-	.quad		0x0000000000018bb7	# G(x)
-	.quad		0x00000001f65a57f8	# floor(x^48 / G(x))
-
-.section	.rodata.cst16.mask1, "aM", @progbits, 16
-.align 16
-.Lmask1:
-	.octa	0x80808080808080808080808080808080
-
-.section	.rodata.cst16.mask2, "aM", @progbits, 16
-.align 16
-.Lmask2:
-	.octa	0x00000000FFFFFFFFFFFFFFFFFFFFFFFF
-
-.section	.rodata.cst16.bswap_mask, "aM", @progbits, 16
-.align 16
-.Lbswap_mask:
-	.octa	0x000102030405060708090A0B0C0D0E0F
-
-.section	.rodata.cst32.byteshift_table, "aM", @progbits, 32
-.align 16
-# For 1 <= len <= 15, the 16-byte vector beginning at &byteshift_table[16 - len]
-# is the index vector to shift left by 'len' bytes, and is also {0x80, ...,
-# 0x80} XOR the index vector to shift right by '16 - len' bytes.
-.Lbyteshift_table:
-	.byte		 0x0, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87
-	.byte		0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f
-	.byte		 0x0,  0x1,  0x2,  0x3,  0x4,  0x5,  0x6,  0x7
-	.byte		 0x8,  0x9,  0xa,  0xb,  0xc,  0xd,  0xe , 0x0
diff --git a/block/Kconfig b/block/Kconfig
index 5b623b876d3b4a2c55c2a7a2f69c4dc76a5d470a..df8973bc0539118de514088d7533083bad753411 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -63,7 +63,7 @@ config BLK_DEV_BSGLIB
 config BLK_DEV_INTEGRITY
 	bool "Block layer data integrity support"
 	select CRC_T10DIF
-	select CRC64_ROCKSOFT
+	select CRC64
 	help
 	Some storage devices allow extra information to be
 	stored/retrieved to help protect the data.  The block layer
diff --git a/block/t10-pi.c b/block/t10-pi.c
index de172d56b1f3b5dd5ef963e2482eeae7afe45ab6..851db518ee5e89fed724a5c60878ee62c773e968 100644
--- a/block/t10-pi.c
+++ b/block/t10-pi.c
@@ -210,7 +210,7 @@ static void t10_pi_type1_complete(struct request *rq, unsigned int nr_bytes)
 
 static __be64 ext_pi_crc64(u64 crc, void *data, unsigned int len)
 {
-	return cpu_to_be64(crc64_rocksoft_update(crc, data, len));
+	return cpu_to_be64(crc64_nvme(crc, data, len));
 }
 
 static void ext_pi_crc64_generate(struct blk_integrity_iter *iter,
diff --git a/crypto/Kconfig b/crypto/Kconfig
index 74ae5f52b784053848c7d93acc663532a3e5cd42..b8a436edf4c3cd9584ecabef3ab1afa2d8697259 100644
--- a/crypto/Kconfig
+++ b/crypto/Kconfig
@@ -1081,26 +1081,6 @@ config CRYPTO_CRC32
 
 	  Used by RoCEv2 and f2fs.
 
-config CRYPTO_CRCT10DIF
-	tristate "CRCT10DIF"
-	select CRYPTO_HASH
-	select CRC_T10DIF
-	help
-	  CRC16 CRC algorithm used for the T10 (SCSI) Data Integrity Field (DIF)
-
-	  CRC algorithm used by the SCSI Block Commands standard.
-
-config CRYPTO_CRC64_ROCKSOFT
-	tristate "CRC64 based on Rocksoft Model algorithm"
-	depends on CRC64
-	select CRYPTO_HASH
-	help
-	  CRC64 CRC algorithm based on the Rocksoft Model CRC Algorithm
-
-	  Used by the NVMe implementation of T10 DIF (BLK_DEV_INTEGRITY)
-
-	  See https://zlib.net/crc_v3.txt
-
 endmenu
 
 menu "Compression"
diff --git a/crypto/Makefile b/crypto/Makefile
index f67e853c469021df630f94114ef4da00a292973c..2f7d64eeb0080ce3a1d0bcb002fb680581660c8d 100644
--- a/crypto/Makefile
+++ b/crypto/Makefile
@@ -155,9 +155,6 @@ obj-$(CONFIG_CRYPTO_CRC32C) += crc32c_generic.o
 obj-$(CONFIG_CRYPTO_CRC32) += crc32_generic.o
 CFLAGS_crc32c_generic.o += -DARCH=$(ARCH)
 CFLAGS_crc32_generic.o += -DARCH=$(ARCH)
-obj-$(CONFIG_CRYPTO_CRCT10DIF) += crct10dif_generic.o
-CFLAGS_crct10dif_generic.o += -DARCH=$(ARCH)
-obj-$(CONFIG_CRYPTO_CRC64_ROCKSOFT) += crc64_rocksoft_generic.o
 obj-$(CONFIG_CRYPTO_AUTHENC) += authenc.o authencesn.o
 obj-$(CONFIG_CRYPTO_LZO) += lzo.o lzo-rle.o
 obj-$(CONFIG_CRYPTO_LZ4) += lz4.o
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c
index 985da981d6e2ac96a03d2a8b17b87cba46131fa0..b1a36d32dc50cdab9511212f0cf12326ddcb19d3 100644
--- a/crypto/crc32c_generic.c
+++ b/crypto/crc32c_generic.c
@@ -85,7 +85,7 @@ static int chksum_update(struct shash_desc *desc, const u8 *data,
 {
 	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
 
-	ctx->crc = crc32c_le_base(ctx->crc, data, length);
+	ctx->crc = crc32c_base(ctx->crc, data, length);
 	return 0;
 }
 
@@ -94,7 +94,7 @@ static int chksum_update_arch(struct shash_desc *desc, const u8 *data,
 {
 	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
 
-	ctx->crc = __crc32c_le(ctx->crc, data, length);
+	ctx->crc = crc32c(ctx->crc, data, length);
 	return 0;
 }
 
@@ -108,14 +108,14 @@ static int chksum_final(struct shash_desc *desc, u8 *out)
 
 static int __chksum_finup(u32 *crcp, const u8 *data, unsigned int len, u8 *out)
 {
-	put_unaligned_le32(~crc32c_le_base(*crcp, data, len), out);
+	put_unaligned_le32(~crc32c_base(*crcp, data, len), out);
 	return 0;
 }
 
 static int __chksum_finup_arch(u32 *crcp, const u8 *data, unsigned int len,
 			       u8 *out)
 {
-	put_unaligned_le32(~__crc32c_le(*crcp, data, len), out);
+	put_unaligned_le32(~crc32c(*crcp, data, len), out);
 	return 0;
 }
 
diff --git a/crypto/crc64_rocksoft_generic.c b/crypto/crc64_rocksoft_generic.c
deleted file mode 100644
index ce0f3059b9125a99d4a11c219a076b7aa02f0c40..0000000000000000000000000000000000000000
--- a/crypto/crc64_rocksoft_generic.c
+++ /dev/null
@@ -1,89 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include <linux/crc64.h>
-#include <linux/module.h>
-#include <crypto/internal/hash.h>
-#include <linux/unaligned.h>
-
-static int chksum_init(struct shash_desc *desc)
-{
-	u64 *crc = shash_desc_ctx(desc);
-
-	*crc = 0;
-
-	return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	u64 *crc = shash_desc_ctx(desc);
-
-	*crc = crc64_rocksoft_generic(*crc, data, length);
-
-	return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-	u64 *crc = shash_desc_ctx(desc);
-
-	put_unaligned_le64(*crc, out);
-	return 0;
-}
-
-static int __chksum_finup(u64 crc, const u8 *data, unsigned int len, u8 *out)
-{
-	crc = crc64_rocksoft_generic(crc, data, len);
-	put_unaligned_le64(crc, out);
-	return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	u64 *crc = shash_desc_ctx(desc);
-
-	return __chksum_finup(*crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	return __chksum_finup(0, data, length, out);
-}
-
-static struct shash_alg alg = {
-	.digestsize	= 	sizeof(u64),
-	.init		=	chksum_init,
-	.update		=	chksum_update,
-	.final		=	chksum_final,
-	.finup		=	chksum_finup,
-	.digest		=	chksum_digest,
-	.descsize	=	sizeof(u64),
-	.base		=	{
-		.cra_name		=	CRC64_ROCKSOFT_STRING,
-		.cra_driver_name	=	"crc64-rocksoft-generic",
-		.cra_priority		=	200,
-		.cra_blocksize		=	1,
-		.cra_module		=	THIS_MODULE,
-	}
-};
-
-static int __init crc64_rocksoft_init(void)
-{
-	return crypto_register_shash(&alg);
-}
-
-static void __exit crc64_rocksoft_exit(void)
-{
-	crypto_unregister_shash(&alg);
-}
-
-module_init(crc64_rocksoft_init);
-module_exit(crc64_rocksoft_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_DESCRIPTION("Rocksoft model CRC64 calculation.");
-MODULE_ALIAS_CRYPTO("crc64-rocksoft");
-MODULE_ALIAS_CRYPTO("crc64-rocksoft-generic");
diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c
deleted file mode 100644
index 259cb01932cb5e970c5dd91f13d025882651d061..0000000000000000000000000000000000000000
--- a/crypto/crct10dif_generic.c
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Cryptographic API.
- *
- * T10 Data Integrity Field CRC16 Crypto Transform
- *
- * Copyright (c) 2007 Oracle Corporation.  All rights reserved.
- * Written by Martin K. Petersen <martin.petersen@oracle.com>
- * Copyright (C) 2013 Intel Corporation
- * Author: Tim Chen <tim.c.chen@linux.intel.com>
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- *
- */
-
-#include <linux/module.h>
-#include <linux/crc-t10dif.h>
-#include <crypto/internal/hash.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-struct chksum_desc_ctx {
-	__u16 crc;
-};
-
-/*
- * Steps through buffer one byte at a time, calculates reflected
- * crc using table.
- */
-
-static int chksum_init(struct shash_desc *desc)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = 0;
-
-	return 0;
-}
-
-static int chksum_update(struct shash_desc *desc, const u8 *data,
-			 unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = crc_t10dif_generic(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_update_arch(struct shash_desc *desc, const u8 *data,
-			      unsigned int length)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	ctx->crc = crc_t10dif_update(ctx->crc, data, length);
-	return 0;
-}
-
-static int chksum_final(struct shash_desc *desc, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	*(__u16 *)out = ctx->crc;
-	return 0;
-}
-
-static int __chksum_finup(__u16 crc, const u8 *data, unsigned int len, u8 *out)
-{
-	*(__u16 *)out = crc_t10dif_generic(crc, data, len);
-	return 0;
-}
-
-static int __chksum_finup_arch(__u16 crc, const u8 *data, unsigned int len,
-			       u8 *out)
-{
-	*(__u16 *)out = crc_t10dif_update(crc, data, len);
-	return 0;
-}
-
-static int chksum_finup(struct shash_desc *desc, const u8 *data,
-			unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup(ctx->crc, data, len, out);
-}
-
-static int chksum_finup_arch(struct shash_desc *desc, const u8 *data,
-			     unsigned int len, u8 *out)
-{
-	struct chksum_desc_ctx *ctx = shash_desc_ctx(desc);
-
-	return __chksum_finup_arch(ctx->crc, data, len, out);
-}
-
-static int chksum_digest(struct shash_desc *desc, const u8 *data,
-			 unsigned int length, u8 *out)
-{
-	return __chksum_finup(0, data, length, out);
-}
-
-static int chksum_digest_arch(struct shash_desc *desc, const u8 *data,
-			      unsigned int length, u8 *out)
-{
-	return __chksum_finup_arch(0, data, length, out);
-}
-
-static struct shash_alg algs[] = {{
-	.digestsize		= CRC_T10DIF_DIGEST_SIZE,
-	.init			= chksum_init,
-	.update			= chksum_update,
-	.final			= chksum_final,
-	.finup			= chksum_finup,
-	.digest			= chksum_digest,
-	.descsize		= sizeof(struct chksum_desc_ctx),
-	.base.cra_name		= "crct10dif",
-	.base.cra_driver_name	= "crct10dif-generic",
-	.base.cra_priority	= 100,
-	.base.cra_blocksize	= CRC_T10DIF_BLOCK_SIZE,
-	.base.cra_module	= THIS_MODULE,
-}, {
-	.digestsize		= CRC_T10DIF_DIGEST_SIZE,
-	.init			= chksum_init,
-	.update			= chksum_update_arch,
-	.final			= chksum_final,
-	.finup			= chksum_finup_arch,
-	.digest			= chksum_digest_arch,
-	.descsize		= sizeof(struct chksum_desc_ctx),
-	.base.cra_name		= "crct10dif",
-	.base.cra_driver_name	= "crct10dif-" __stringify(ARCH),
-	.base.cra_priority	= 150,
-	.base.cra_blocksize	= CRC_T10DIF_BLOCK_SIZE,
-	.base.cra_module	= THIS_MODULE,
-}};
-
-static int num_algs;
-
-static int __init crct10dif_mod_init(void)
-{
-	/* register the arch flavor only if it differs from the generic one */
-	num_algs = 1 + crc_t10dif_is_optimized();
-
-	return crypto_register_shashes(algs, num_algs);
-}
-
-static void __exit crct10dif_mod_fini(void)
-{
-	crypto_unregister_shashes(algs, num_algs);
-}
-
-subsys_initcall(crct10dif_mod_init);
-module_exit(crct10dif_mod_fini);
-
-MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
-MODULE_DESCRIPTION("T10 DIF CRC calculation.");
-MODULE_LICENSE("GPL");
-MODULE_ALIAS_CRYPTO("crct10dif");
-MODULE_ALIAS_CRYPTO("crct10dif-generic");
diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c
index f618f61c5615c6ecf2a9f902bd2e1cc4920b7aca..96f4a66be14c7bdbf36a8783c3ea7950628703e0 100644
--- a/crypto/tcrypt.c
+++ b/crypto/tcrypt.c
@@ -1855,10 +1855,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		ret = min(ret, tcrypt_test("ghash"));
 		break;
 
-	case 47:
-		ret = min(ret, tcrypt_test("crct10dif"));
-		break;
-
 	case 48:
 		ret = min(ret, tcrypt_test("sha3-224"));
 		break;
@@ -2473,10 +2469,6 @@ static int do_test(const char *alg, u32 type, u32 mask, int m, u32 num_mb)
 		test_hash_speed("crc32c", sec, generic_hash_speed_template);
 		if (mode > 300 && mode < 400) break;
 		fallthrough;
-	case 320:
-		test_hash_speed("crct10dif", sec, generic_hash_speed_template);
-		if (mode > 300 && mode < 400) break;
-		fallthrough;
 	case 321:
 		test_hash_speed("poly1305", sec, poly1305_speed_template);
 		if (mode > 300 && mode < 400) break;
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index b69877db3f33dee9affbe2d343cd4fc900a37ffd..14171dd4ad26f4a8e3482b1a5f3ff2a887fdbdd2 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -4760,20 +4760,6 @@ static const struct alg_test_desc alg_test_descs[] = {
 		.suite = {
 			.hash = __VECS(crc32c_tv_template)
 		}
-	}, {
-		.alg = "crc64-rocksoft",
-		.test = alg_test_hash,
-		.fips_allowed = 1,
-		.suite = {
-			.hash = __VECS(crc64_rocksoft_tv_template)
-		}
-	}, {
-		.alg = "crct10dif",
-		.test = alg_test_hash,
-		.fips_allowed = 1,
-		.suite = {
-			.hash = __VECS(crct10dif_tv_template)
-		}
 	}, {
 		.alg = "ctr(aes)",
 		.test = alg_test_skcipher,
diff --git a/crypto/testmgr.h b/crypto/testmgr.h
index d754ab997186681d221d79dac6baf20fcc8f3543..d3a99d15a3e57ef122323eb77fc960de2beb0c4e 100644
--- a/crypto/testmgr.h
+++ b/crypto/testmgr.h
@@ -6017,309 +6017,6 @@ static const struct hash_testvec rmd160_tv_template[] = {
 	}
 };
 
-static const u8 zeroes[4096] = { [0 ... 4095] = 0 };
-static const u8 ones[4096] = { [0 ... 4095] = 0xff };
-
-static const struct hash_testvec crc64_rocksoft_tv_template[] = {
-	{
-		.plaintext	= zeroes,
-		.psize		= 4096,
-		.digest         = "\x4e\xb6\x22\xeb\x67\xd3\x82\x64",
-	}, {
-		.plaintext	= ones,
-		.psize		= 4096,
-		.digest         = "\xac\xa3\xec\x02\x73\xba\xdd\xc0",
-	}
-};
-
-static const struct hash_testvec crct10dif_tv_template[] = {
-	{
-		.plaintext	= "abc",
-		.psize		= 3,
-		.digest		= (u8 *)(u16 []){ 0x443b },
-	}, {
-		.plaintext 	= "1234567890123456789012345678901234567890"
-				  "123456789012345678901234567890123456789",
-		.psize		= 79,
-		.digest 	= (u8 *)(u16 []){ 0x4b70 },
-	}, {
-		.plaintext	= "abcdddddddddddddddddddddddddddddddddddddddd"
-				  "ddddddddddddd",
-		.psize		= 56,
-		.digest		= (u8 *)(u16 []){ 0x9ce3 },
-	}, {
-		.plaintext 	= "1234567890123456789012345678901234567890"
-				  "1234567890123456789012345678901234567890"
-				  "1234567890123456789012345678901234567890"
-				  "1234567890123456789012345678901234567890"
-				  "1234567890123456789012345678901234567890"
-				  "1234567890123456789012345678901234567890"
-				  "1234567890123456789012345678901234567890"
-				  "123456789012345678901234567890123456789",
-		.psize		= 319,
-		.digest		= (u8 *)(u16 []){ 0x44c6 },
-	}, {
-		.plaintext =	"\x6e\x05\x79\x10\xa7\x1b\xb2\x49"
-				"\xe0\x54\xeb\x82\x19\x8d\x24\xbb"
-				"\x2f\xc6\x5d\xf4\x68\xff\x96\x0a"
-				"\xa1\x38\xcf\x43\xda\x71\x08\x7c"
-				"\x13\xaa\x1e\xb5\x4c\xe3\x57\xee"
-				"\x85\x1c\x90\x27\xbe\x32\xc9\x60"
-				"\xf7\x6b\x02\x99\x0d\xa4\x3b\xd2"
-				"\x46\xdd\x74\x0b\x7f\x16\xad\x21"
-				"\xb8\x4f\xe6\x5a\xf1\x88\x1f\x93"
-				"\x2a\xc1\x35\xcc\x63\xfa\x6e\x05"
-				"\x9c\x10\xa7\x3e\xd5\x49\xe0\x77"
-				"\x0e\x82\x19\xb0\x24\xbb\x52\xe9"
-				"\x5d\xf4\x8b\x22\x96\x2d\xc4\x38"
-				"\xcf\x66\xfd\x71\x08\x9f\x13\xaa"
-				"\x41\xd8\x4c\xe3\x7a\x11\x85\x1c"
-				"\xb3\x27\xbe\x55\xec\x60\xf7\x8e"
-				"\x02\x99\x30\xc7\x3b\xd2\x69\x00"
-				"\x74\x0b\xa2\x16\xad\x44\xdb\x4f"
-				"\xe6\x7d\x14\x88\x1f\xb6\x2a\xc1"
-				"\x58\xef\x63\xfa\x91\x05\x9c\x33"
-				"\xca\x3e\xd5\x6c\x03\x77\x0e\xa5"
-				"\x19\xb0\x47\xde\x52\xe9\x80\x17"
-				"\x8b\x22\xb9\x2d\xc4\x5b\xf2\x66"
-				"\xfd\x94\x08\x9f\x36\xcd\x41\xd8"
-				"\x6f\x06\x7a\x11\xa8\x1c\xb3\x4a"
-				"\xe1\x55\xec\x83\x1a\x8e\x25\xbc"
-				"\x30\xc7\x5e\xf5\x69\x00\x97\x0b"
-				"\xa2\x39\xd0\x44\xdb\x72\x09\x7d"
-				"\x14\xab\x1f\xb6\x4d\xe4\x58\xef"
-				"\x86\x1d\x91\x28\xbf\x33\xca\x61"
-				"\xf8\x6c\x03\x9a\x0e\xa5\x3c\xd3"
-				"\x47\xde\x75\x0c\x80\x17\xae\x22"
-				"\xb9\x50\xe7\x5b\xf2\x89\x20\x94"
-				"\x2b\xc2\x36\xcd\x64\xfb\x6f\x06"
-				"\x9d\x11\xa8\x3f\xd6\x4a\xe1\x78"
-				"\x0f\x83\x1a\xb1\x25\xbc\x53\xea"
-				"\x5e\xf5\x8c\x00\x97\x2e\xc5\x39"
-				"\xd0\x67\xfe\x72\x09\xa0\x14\xab"
-				"\x42\xd9\x4d\xe4\x7b\x12\x86\x1d"
-				"\xb4\x28\xbf\x56\xed\x61\xf8\x8f"
-				"\x03\x9a\x31\xc8\x3c\xd3\x6a\x01"
-				"\x75\x0c\xa3\x17\xae\x45\xdc\x50"
-				"\xe7\x7e\x15\x89\x20\xb7\x2b\xc2"
-				"\x59\xf0\x64\xfb\x92\x06\x9d\x34"
-				"\xcb\x3f\xd6\x6d\x04\x78\x0f\xa6"
-				"\x1a\xb1\x48\xdf\x53\xea\x81\x18"
-				"\x8c\x23\xba\x2e\xc5\x5c\xf3\x67"
-				"\xfe\x95\x09\xa0\x37\xce\x42\xd9"
-				"\x70\x07\x7b\x12\xa9\x1d\xb4\x4b"
-				"\xe2\x56\xed\x84\x1b\x8f\x26\xbd"
-				"\x31\xc8\x5f\xf6\x6a\x01\x98\x0c"
-				"\xa3\x3a\xd1\x45\xdc\x73\x0a\x7e"
-				"\x15\xac\x20\xb7\x4e\xe5\x59\xf0"
-				"\x87\x1e\x92\x29\xc0\x34\xcb\x62"
-				"\xf9\x6d\x04\x9b\x0f\xa6\x3d\xd4"
-				"\x48\xdf\x76\x0d\x81\x18\xaf\x23"
-				"\xba\x51\xe8\x5c\xf3\x8a\x21\x95"
-				"\x2c\xc3\x37\xce\x65\xfc\x70\x07"
-				"\x9e\x12\xa9\x40\xd7\x4b\xe2\x79"
-				"\x10\x84\x1b\xb2\x26\xbd\x54\xeb"
-				"\x5f\xf6\x8d\x01\x98\x2f\xc6\x3a"
-				"\xd1\x68\xff\x73\x0a\xa1\x15\xac"
-				"\x43\xda\x4e\xe5\x7c\x13\x87\x1e"
-				"\xb5\x29\xc0\x57\xee\x62\xf9\x90"
-				"\x04\x9b\x32\xc9\x3d\xd4\x6b\x02"
-				"\x76\x0d\xa4\x18\xaf\x46\xdd\x51"
-				"\xe8\x7f\x16\x8a\x21\xb8\x2c\xc3"
-				"\x5a\xf1\x65\xfc\x93\x07\x9e\x35"
-				"\xcc\x40\xd7\x6e\x05\x79\x10\xa7"
-				"\x1b\xb2\x49\xe0\x54\xeb\x82\x19"
-				"\x8d\x24\xbb\x2f\xc6\x5d\xf4\x68"
-				"\xff\x96\x0a\xa1\x38\xcf\x43\xda"
-				"\x71\x08\x7c\x13\xaa\x1e\xb5\x4c"
-				"\xe3\x57\xee\x85\x1c\x90\x27\xbe"
-				"\x32\xc9\x60\xf7\x6b\x02\x99\x0d"
-				"\xa4\x3b\xd2\x46\xdd\x74\x0b\x7f"
-				"\x16\xad\x21\xb8\x4f\xe6\x5a\xf1"
-				"\x88\x1f\x93\x2a\xc1\x35\xcc\x63"
-				"\xfa\x6e\x05\x9c\x10\xa7\x3e\xd5"
-				"\x49\xe0\x77\x0e\x82\x19\xb0\x24"
-				"\xbb\x52\xe9\x5d\xf4\x8b\x22\x96"
-				"\x2d\xc4\x38\xcf\x66\xfd\x71\x08"
-				"\x9f\x13\xaa\x41\xd8\x4c\xe3\x7a"
-				"\x11\x85\x1c\xb3\x27\xbe\x55\xec"
-				"\x60\xf7\x8e\x02\x99\x30\xc7\x3b"
-				"\xd2\x69\x00\x74\x0b\xa2\x16\xad"
-				"\x44\xdb\x4f\xe6\x7d\x14\x88\x1f"
-				"\xb6\x2a\xc1\x58\xef\x63\xfa\x91"
-				"\x05\x9c\x33\xca\x3e\xd5\x6c\x03"
-				"\x77\x0e\xa5\x19\xb0\x47\xde\x52"
-				"\xe9\x80\x17\x8b\x22\xb9\x2d\xc4"
-				"\x5b\xf2\x66\xfd\x94\x08\x9f\x36"
-				"\xcd\x41\xd8\x6f\x06\x7a\x11\xa8"
-				"\x1c\xb3\x4a\xe1\x55\xec\x83\x1a"
-				"\x8e\x25\xbc\x30\xc7\x5e\xf5\x69"
-				"\x00\x97\x0b\xa2\x39\xd0\x44\xdb"
-				"\x72\x09\x7d\x14\xab\x1f\xb6\x4d"
-				"\xe4\x58\xef\x86\x1d\x91\x28\xbf"
-				"\x33\xca\x61\xf8\x6c\x03\x9a\x0e"
-				"\xa5\x3c\xd3\x47\xde\x75\x0c\x80"
-				"\x17\xae\x22\xb9\x50\xe7\x5b\xf2"
-				"\x89\x20\x94\x2b\xc2\x36\xcd\x64"
-				"\xfb\x6f\x06\x9d\x11\xa8\x3f\xd6"
-				"\x4a\xe1\x78\x0f\x83\x1a\xb1\x25"
-				"\xbc\x53\xea\x5e\xf5\x8c\x00\x97"
-				"\x2e\xc5\x39\xd0\x67\xfe\x72\x09"
-				"\xa0\x14\xab\x42\xd9\x4d\xe4\x7b"
-				"\x12\x86\x1d\xb4\x28\xbf\x56\xed"
-				"\x61\xf8\x8f\x03\x9a\x31\xc8\x3c"
-				"\xd3\x6a\x01\x75\x0c\xa3\x17\xae"
-				"\x45\xdc\x50\xe7\x7e\x15\x89\x20"
-				"\xb7\x2b\xc2\x59\xf0\x64\xfb\x92"
-				"\x06\x9d\x34\xcb\x3f\xd6\x6d\x04"
-				"\x78\x0f\xa6\x1a\xb1\x48\xdf\x53"
-				"\xea\x81\x18\x8c\x23\xba\x2e\xc5"
-				"\x5c\xf3\x67\xfe\x95\x09\xa0\x37"
-				"\xce\x42\xd9\x70\x07\x7b\x12\xa9"
-				"\x1d\xb4\x4b\xe2\x56\xed\x84\x1b"
-				"\x8f\x26\xbd\x31\xc8\x5f\xf6\x6a"
-				"\x01\x98\x0c\xa3\x3a\xd1\x45\xdc"
-				"\x73\x0a\x7e\x15\xac\x20\xb7\x4e"
-				"\xe5\x59\xf0\x87\x1e\x92\x29\xc0"
-				"\x34\xcb\x62\xf9\x6d\x04\x9b\x0f"
-				"\xa6\x3d\xd4\x48\xdf\x76\x0d\x81"
-				"\x18\xaf\x23\xba\x51\xe8\x5c\xf3"
-				"\x8a\x21\x95\x2c\xc3\x37\xce\x65"
-				"\xfc\x70\x07\x9e\x12\xa9\x40\xd7"
-				"\x4b\xe2\x79\x10\x84\x1b\xb2\x26"
-				"\xbd\x54\xeb\x5f\xf6\x8d\x01\x98"
-				"\x2f\xc6\x3a\xd1\x68\xff\x73\x0a"
-				"\xa1\x15\xac\x43\xda\x4e\xe5\x7c"
-				"\x13\x87\x1e\xb5\x29\xc0\x57\xee"
-				"\x62\xf9\x90\x04\x9b\x32\xc9\x3d"
-				"\xd4\x6b\x02\x76\x0d\xa4\x18\xaf"
-				"\x46\xdd\x51\xe8\x7f\x16\x8a\x21"
-				"\xb8\x2c\xc3\x5a\xf1\x65\xfc\x93"
-				"\x07\x9e\x35\xcc\x40\xd7\x6e\x05"
-				"\x79\x10\xa7\x1b\xb2\x49\xe0\x54"
-				"\xeb\x82\x19\x8d\x24\xbb\x2f\xc6"
-				"\x5d\xf4\x68\xff\x96\x0a\xa1\x38"
-				"\xcf\x43\xda\x71\x08\x7c\x13\xaa"
-				"\x1e\xb5\x4c\xe3\x57\xee\x85\x1c"
-				"\x90\x27\xbe\x32\xc9\x60\xf7\x6b"
-				"\x02\x99\x0d\xa4\x3b\xd2\x46\xdd"
-				"\x74\x0b\x7f\x16\xad\x21\xb8\x4f"
-				"\xe6\x5a\xf1\x88\x1f\x93\x2a\xc1"
-				"\x35\xcc\x63\xfa\x6e\x05\x9c\x10"
-				"\xa7\x3e\xd5\x49\xe0\x77\x0e\x82"
-				"\x19\xb0\x24\xbb\x52\xe9\x5d\xf4"
-				"\x8b\x22\x96\x2d\xc4\x38\xcf\x66"
-				"\xfd\x71\x08\x9f\x13\xaa\x41\xd8"
-				"\x4c\xe3\x7a\x11\x85\x1c\xb3\x27"
-				"\xbe\x55\xec\x60\xf7\x8e\x02\x99"
-				"\x30\xc7\x3b\xd2\x69\x00\x74\x0b"
-				"\xa2\x16\xad\x44\xdb\x4f\xe6\x7d"
-				"\x14\x88\x1f\xb6\x2a\xc1\x58\xef"
-				"\x63\xfa\x91\x05\x9c\x33\xca\x3e"
-				"\xd5\x6c\x03\x77\x0e\xa5\x19\xb0"
-				"\x47\xde\x52\xe9\x80\x17\x8b\x22"
-				"\xb9\x2d\xc4\x5b\xf2\x66\xfd\x94"
-				"\x08\x9f\x36\xcd\x41\xd8\x6f\x06"
-				"\x7a\x11\xa8\x1c\xb3\x4a\xe1\x55"
-				"\xec\x83\x1a\x8e\x25\xbc\x30\xc7"
-				"\x5e\xf5\x69\x00\x97\x0b\xa2\x39"
-				"\xd0\x44\xdb\x72\x09\x7d\x14\xab"
-				"\x1f\xb6\x4d\xe4\x58\xef\x86\x1d"
-				"\x91\x28\xbf\x33\xca\x61\xf8\x6c"
-				"\x03\x9a\x0e\xa5\x3c\xd3\x47\xde"
-				"\x75\x0c\x80\x17\xae\x22\xb9\x50"
-				"\xe7\x5b\xf2\x89\x20\x94\x2b\xc2"
-				"\x36\xcd\x64\xfb\x6f\x06\x9d\x11"
-				"\xa8\x3f\xd6\x4a\xe1\x78\x0f\x83"
-				"\x1a\xb1\x25\xbc\x53\xea\x5e\xf5"
-				"\x8c\x00\x97\x2e\xc5\x39\xd0\x67"
-				"\xfe\x72\x09\xa0\x14\xab\x42\xd9"
-				"\x4d\xe4\x7b\x12\x86\x1d\xb4\x28"
-				"\xbf\x56\xed\x61\xf8\x8f\x03\x9a"
-				"\x31\xc8\x3c\xd3\x6a\x01\x75\x0c"
-				"\xa3\x17\xae\x45\xdc\x50\xe7\x7e"
-				"\x15\x89\x20\xb7\x2b\xc2\x59\xf0"
-				"\x64\xfb\x92\x06\x9d\x34\xcb\x3f"
-				"\xd6\x6d\x04\x78\x0f\xa6\x1a\xb1"
-				"\x48\xdf\x53\xea\x81\x18\x8c\x23"
-				"\xba\x2e\xc5\x5c\xf3\x67\xfe\x95"
-				"\x09\xa0\x37\xce\x42\xd9\x70\x07"
-				"\x7b\x12\xa9\x1d\xb4\x4b\xe2\x56"
-				"\xed\x84\x1b\x8f\x26\xbd\x31\xc8"
-				"\x5f\xf6\x6a\x01\x98\x0c\xa3\x3a"
-				"\xd1\x45\xdc\x73\x0a\x7e\x15\xac"
-				"\x20\xb7\x4e\xe5\x59\xf0\x87\x1e"
-				"\x92\x29\xc0\x34\xcb\x62\xf9\x6d"
-				"\x04\x9b\x0f\xa6\x3d\xd4\x48\xdf"
-				"\x76\x0d\x81\x18\xaf\x23\xba\x51"
-				"\xe8\x5c\xf3\x8a\x21\x95\x2c\xc3"
-				"\x37\xce\x65\xfc\x70\x07\x9e\x12"
-				"\xa9\x40\xd7\x4b\xe2\x79\x10\x84"
-				"\x1b\xb2\x26\xbd\x54\xeb\x5f\xf6"
-				"\x8d\x01\x98\x2f\xc6\x3a\xd1\x68"
-				"\xff\x73\x0a\xa1\x15\xac\x43\xda"
-				"\x4e\xe5\x7c\x13\x87\x1e\xb5\x29"
-				"\xc0\x57\xee\x62\xf9\x90\x04\x9b"
-				"\x32\xc9\x3d\xd4\x6b\x02\x76\x0d"
-				"\xa4\x18\xaf\x46\xdd\x51\xe8\x7f"
-				"\x16\x8a\x21\xb8\x2c\xc3\x5a\xf1"
-				"\x65\xfc\x93\x07\x9e\x35\xcc\x40"
-				"\xd7\x6e\x05\x79\x10\xa7\x1b\xb2"
-				"\x49\xe0\x54\xeb\x82\x19\x8d\x24"
-				"\xbb\x2f\xc6\x5d\xf4\x68\xff\x96"
-				"\x0a\xa1\x38\xcf\x43\xda\x71\x08"
-				"\x7c\x13\xaa\x1e\xb5\x4c\xe3\x57"
-				"\xee\x85\x1c\x90\x27\xbe\x32\xc9"
-				"\x60\xf7\x6b\x02\x99\x0d\xa4\x3b"
-				"\xd2\x46\xdd\x74\x0b\x7f\x16\xad"
-				"\x21\xb8\x4f\xe6\x5a\xf1\x88\x1f"
-				"\x93\x2a\xc1\x35\xcc\x63\xfa\x6e"
-				"\x05\x9c\x10\xa7\x3e\xd5\x49\xe0"
-				"\x77\x0e\x82\x19\xb0\x24\xbb\x52"
-				"\xe9\x5d\xf4\x8b\x22\x96\x2d\xc4"
-				"\x38\xcf\x66\xfd\x71\x08\x9f\x13"
-				"\xaa\x41\xd8\x4c\xe3\x7a\x11\x85"
-				"\x1c\xb3\x27\xbe\x55\xec\x60\xf7"
-				"\x8e\x02\x99\x30\xc7\x3b\xd2\x69"
-				"\x00\x74\x0b\xa2\x16\xad\x44\xdb"
-				"\x4f\xe6\x7d\x14\x88\x1f\xb6\x2a"
-				"\xc1\x58\xef\x63\xfa\x91\x05\x9c"
-				"\x33\xca\x3e\xd5\x6c\x03\x77\x0e"
-				"\xa5\x19\xb0\x47\xde\x52\xe9\x80"
-				"\x17\x8b\x22\xb9\x2d\xc4\x5b\xf2"
-				"\x66\xfd\x94\x08\x9f\x36\xcd\x41"
-				"\xd8\x6f\x06\x7a\x11\xa8\x1c\xb3"
-				"\x4a\xe1\x55\xec\x83\x1a\x8e\x25"
-				"\xbc\x30\xc7\x5e\xf5\x69\x00\x97"
-				"\x0b\xa2\x39\xd0\x44\xdb\x72\x09"
-				"\x7d\x14\xab\x1f\xb6\x4d\xe4\x58"
-				"\xef\x86\x1d\x91\x28\xbf\x33\xca"
-				"\x61\xf8\x6c\x03\x9a\x0e\xa5\x3c"
-				"\xd3\x47\xde\x75\x0c\x80\x17\xae"
-				"\x22\xb9\x50\xe7\x5b\xf2\x89\x20"
-				"\x94\x2b\xc2\x36\xcd\x64\xfb\x6f"
-				"\x06\x9d\x11\xa8\x3f\xd6\x4a\xe1"
-				"\x78\x0f\x83\x1a\xb1\x25\xbc\x53"
-				"\xea\x5e\xf5\x8c\x00\x97\x2e\xc5"
-				"\x39\xd0\x67\xfe\x72\x09\xa0\x14"
-				"\xab\x42\xd9\x4d\xe4\x7b\x12\x86"
-				"\x1d\xb4\x28\xbf\x56\xed\x61\xf8"
-				"\x8f\x03\x9a\x31\xc8\x3c\xd3\x6a"
-				"\x01\x75\x0c\xa3\x17\xae\x45\xdc"
-				"\x50\xe7\x7e\x15\x89\x20\xb7\x2b"
-				"\xc2\x59\xf0\x64\xfb\x92\x06\x9d"
-				"\x34\xcb\x3f\xd6\x6d\x04\x78\x0f"
-				"\xa6\x1a\xb1\x48\xdf\x53\xea\x81"
-				"\x18\x8c\x23\xba\x2e\xc5\x5c\xf3"
-				"\x67\xfe\x95\x09\xa0\x37\xce\x42"
-				"\xd9\x70\x07\x7b\x12\xa9\x1d\xb4"
-				"\x4b\xe2\x56\xed\x84\x1b\x8f\x26"
-				"\xbd\x31\xc8\x5f\xf6\x6a\x01\x98",
-		.psize = 2048,
-		.digest		= (u8 *)(u16 []){ 0x23ca },
-	}
-};
-
 /*
  * Streebog test vectors from RFC 6986 and GOST R 34.11-2012
  */
diff --git a/drivers/crypto/stm32/stm32-crc32.c b/drivers/crypto/stm32/stm32-crc32.c
index de4d0402f13391080e5f7719c61f164a28acc4ff..fd29785a3ecf31e6d701960356e05b713d432722 100644
--- a/drivers/crypto/stm32/stm32-crc32.c
+++ b/drivers/crypto/stm32/stm32-crc32.c
@@ -162,7 +162,7 @@ static int burst_update(struct shash_desc *desc, const u8 *d8,
 		if (mctx->poly == CRC32_POLY_LE)
 			ctx->partial = crc32_le(ctx->partial, d8, length);
 		else
-			ctx->partial = __crc32c_le(ctx->partial, d8, length);
+			ctx->partial = crc32c(ctx->partial, d8, length);
 
 		goto pm_out;
 	}
diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index ea5eee50dc39d05886a99f8af2bd4189e0dab485..4e692de1da933186e5b821177c510f49fbf9a1ed 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -676,8 +676,8 @@ static inline __wsum siw_csum_update(const void *buff, int len, __wsum sum)
 static inline __wsum siw_csum_combine(__wsum csum, __wsum csum2, int offset,
 				      int len)
 {
-	return (__force __wsum)__crc32c_le_combine((__force __u32)csum,
-						   (__force __u32)csum2, len);
+	return (__force __wsum)crc32c_combine((__force __u32)csum,
+					      (__force __u32)csum2, len);
 }
 
 static inline void siw_crc_skb(struct siw_rx_stream *srx, unsigned int len)
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index e530271cb86bbcb477cef6d61c6c33dc68cc3913..ba768ca7f4229e7520ae6f91230bc3bc2d3cd826 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -714,7 +714,7 @@ static void r5l_submit_current_io(struct r5l_log *log)
 
 	block = page_address(io->meta_page);
 	block->meta_size = cpu_to_le32(io->meta_offset);
-	crc = crc32c_le(log->uuid_checksum, block, PAGE_SIZE);
+	crc = crc32c(log->uuid_checksum, block, PAGE_SIZE);
 	block->checksum = cpu_to_le32(crc);
 
 	log->current_io = NULL;
@@ -1020,8 +1020,8 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
 		if (test_bit(STRIPE_LOG_TRAPPED, &sh->state))
 			continue;
 		addr = kmap_local_page(sh->dev[i].page);
-		sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
-						    addr, PAGE_SIZE);
+		sh->dev[i].log_checksum = crc32c(log->uuid_checksum,
+						 addr, PAGE_SIZE);
 		kunmap_local(addr);
 	}
 	parity_pages = 1 + !!(sh->qd_idx >= 0);
@@ -1741,7 +1741,7 @@ static int r5l_recovery_read_meta_block(struct r5l_log *log,
 	    le64_to_cpu(mb->position) != ctx->pos)
 		return -EINVAL;
 
-	crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE);
+	crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE);
 	if (stored_crc != crc)
 		return -EINVAL;
 
@@ -1780,8 +1780,7 @@ static int r5l_log_write_empty_meta_block(struct r5l_log *log, sector_t pos,
 		return -ENOMEM;
 	r5l_recovery_create_empty_meta_block(log, page, pos, seq);
 	mb = page_address(page);
-	mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
-					     mb, PAGE_SIZE));
+	mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum, mb, PAGE_SIZE));
 	if (!sync_page_io(log->rdev, pos, PAGE_SIZE, page, REQ_OP_WRITE |
 			  REQ_SYNC | REQ_FUA, false)) {
 		__free_page(page);
@@ -1976,7 +1975,7 @@ r5l_recovery_verify_data_checksum(struct r5l_log *log,
 
 	r5l_recovery_read_page(log, ctx, page, log_offset);
 	addr = kmap_local_page(page);
-	checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE);
+	checksum = crc32c(log->uuid_checksum, addr, PAGE_SIZE);
 	kunmap_local(addr);
 	return (le32_to_cpu(log_checksum) == checksum) ? 0 : -EINVAL;
 }
@@ -2379,8 +2378,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 					raid5_compute_blocknr(sh, i, 0));
 				addr = kmap_local_page(dev->page);
 				payload->checksum[0] = cpu_to_le32(
-					crc32c_le(log->uuid_checksum, addr,
-						  PAGE_SIZE));
+					crc32c(log->uuid_checksum, addr,
+					       PAGE_SIZE));
 				kunmap_local(addr);
 				sync_page_io(log->rdev, write_pos, PAGE_SIZE,
 					     dev->page, REQ_OP_WRITE, false);
@@ -2392,8 +2391,8 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
 			}
 		}
 		mb->meta_size = cpu_to_le32(offset);
-		mb->checksum = cpu_to_le32(crc32c_le(log->uuid_checksum,
-						     mb, PAGE_SIZE));
+		mb->checksum = cpu_to_le32(crc32c(log->uuid_checksum,
+						  mb, PAGE_SIZE));
 		sync_page_io(log->rdev, ctx->pos, PAGE_SIZE, page,
 			     REQ_OP_WRITE | REQ_SYNC | REQ_FUA, false);
 		sh->log_start = ctx->pos;
@@ -2885,8 +2884,8 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
 		if (!test_bit(R5_Wantwrite, &sh->dev[i].flags))
 			continue;
 		addr = kmap_local_page(sh->dev[i].page);
-		sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
-						    addr, PAGE_SIZE);
+		sh->dev[i].log_checksum = crc32c(log->uuid_checksum,
+						 addr, PAGE_SIZE);
 		kunmap_local(addr);
 		pages++;
 	}
@@ -2969,7 +2968,7 @@ static int r5l_load_log(struct r5l_log *log)
 	}
 	stored_crc = le32_to_cpu(mb->checksum);
 	mb->checksum = 0;
-	expected_crc = crc32c_le(log->uuid_checksum, mb, PAGE_SIZE);
+	expected_crc = crc32c(log->uuid_checksum, mb, PAGE_SIZE);
 	if (stored_crc != expected_crc) {
 		create_super = true;
 		goto create;
@@ -3077,8 +3076,8 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
 		return -ENOMEM;
 	log->rdev = rdev;
 	log->need_cache_flush = bdev_write_cache(rdev->bdev);
-	log->uuid_checksum = crc32c_le(~0, rdev->mddev->uuid,
-				       sizeof(rdev->mddev->uuid));
+	log->uuid_checksum = crc32c(~0, rdev->mddev->uuid,
+				    sizeof(rdev->mddev->uuid));
 
 	mutex_init(&log->io_mutex);
 
diff --git a/drivers/md/raid5-ppl.c b/drivers/md/raid5-ppl.c
index 37c4da5311ca71aa78e762fef476744678cbb4ca..c0fb335311aa6ca6ceecbb9945d8e582923178d3 100644
--- a/drivers/md/raid5-ppl.c
+++ b/drivers/md/raid5-ppl.c
@@ -346,9 +346,9 @@ static int ppl_log_stripe(struct ppl_log *log, struct stripe_head *sh)
 	if (!test_bit(STRIPE_FULL_WRITE, &sh->state)) {
 		le32_add_cpu(&e->pp_size, PAGE_SIZE);
 		io->pp_size += PAGE_SIZE;
-		e->checksum = cpu_to_le32(crc32c_le(le32_to_cpu(e->checksum),
-						    page_address(sh->ppl_page),
-						    PAGE_SIZE));
+		e->checksum = cpu_to_le32(crc32c(le32_to_cpu(e->checksum),
+						 page_address(sh->ppl_page),
+						 PAGE_SIZE));
 	}
 
 	list_add_tail(&sh->log_list, &io->stripe_list);
@@ -454,7 +454,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
 	}
 
 	pplhdr->entries_count = cpu_to_le32(io->entries_count);
-	pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PPL_HEADER_SIZE));
+	pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PPL_HEADER_SIZE));
 
 	/* Rewind the buffer if current PPL is larger then remaining space */
 	if (log->use_multippl &&
@@ -998,7 +998,7 @@ static int ppl_recover(struct ppl_log *log, struct ppl_header *pplhdr,
 				goto out;
 			}
 
-			crc = crc32c_le(crc, page_address(page), s);
+			crc = crc32c(crc, page_address(page), s);
 
 			pp_size -= s;
 			sector += s >> 9;
@@ -1052,7 +1052,7 @@ static int ppl_write_empty_header(struct ppl_log *log)
 			    log->rdev->ppl.size, GFP_NOIO, 0);
 	memset(pplhdr->reserved, 0xff, PPL_HDR_RESERVED);
 	pplhdr->signature = cpu_to_le32(log->ppl_conf->signature);
-	pplhdr->checksum = cpu_to_le32(~crc32c_le(~0, pplhdr, PAGE_SIZE));
+	pplhdr->checksum = cpu_to_le32(~crc32c(~0, pplhdr, PAGE_SIZE));
 
 	if (!sync_page_io(rdev, rdev->ppl.sector - rdev->data_offset,
 			  PPL_HEADER_SIZE, page, REQ_OP_WRITE | REQ_SYNC |
@@ -1106,7 +1106,7 @@ static int ppl_load_distributed(struct ppl_log *log)
 		/* check header validity */
 		crc_stored = le32_to_cpu(pplhdr->checksum);
 		pplhdr->checksum = 0;
-		crc = ~crc32c_le(~0, pplhdr, PAGE_SIZE);
+		crc = ~crc32c(~0, pplhdr, PAGE_SIZE);
 
 		if (crc_stored != crc) {
 			pr_debug("%s: ppl header crc does not match: stored: 0x%x calculated: 0x%x (offset: %llu)\n",
@@ -1390,7 +1390,7 @@ int ppl_init_log(struct r5conf *conf)
 	spin_lock_init(&ppl_conf->no_mem_stripes_lock);
 
 	if (!mddev->external) {
-		ppl_conf->signature = ~crc32c_le(~0, mddev->uuid, sizeof(mddev->uuid));
+		ppl_conf->signature = ~crc32c(~0, mddev->uuid, sizeof(mddev->uuid));
 		ppl_conf->block_size = 512;
 	} else {
 		ppl_conf->block_size =
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
index 8e04552d2216ce261d18ac8e30e9da52c24f1237..02c8213915a5d673c65771263d4dae7ad81307c4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.c
@@ -2593,7 +2593,7 @@ void bnx2x_init_rx_mode_obj(struct bnx2x *bp,
 /********************* Multicast verbs: SET, CLEAR ****************************/
 static inline u8 bnx2x_mcast_bin_from_mac(u8 *mac)
 {
-	return (crc32c_le(0, mac, ETH_ALEN) >> 24) & 0xff;
+	return (crc32c(0, mac, ETH_ALEN) >> 24) & 0xff;
 }
 
 struct bnx2x_mcast_mac_elem {
diff --git a/drivers/thunderbolt/ctl.c b/drivers/thunderbolt/ctl.c
index dc1f456736dc4606d6d1e2a51cedf8ffd6149182..cd15e84c47f475994e3b5d02a33d9a2c0ecd3431 100644
--- a/drivers/thunderbolt/ctl.c
+++ b/drivers/thunderbolt/ctl.c
@@ -312,7 +312,7 @@ static void tb_cfg_print_error(struct tb_ctl *ctl, enum tb_cfg_space space,
 
 static __be32 tb_crc(const void *data, size_t len)
 {
-	return cpu_to_be32(~__crc32c_le(~0, data, len));
+	return cpu_to_be32(~crc32c(~0, data, len));
 }
 
 static void tb_ctl_pkg_free(struct ctl_pkg *pkg)
diff --git a/drivers/thunderbolt/eeprom.c b/drivers/thunderbolt/eeprom.c
index 9c1d65d2655310a104e75ba18644268da5ebc0cb..e66183a72cf94e03c39be93589d52a5fc6fcf6e7 100644
--- a/drivers/thunderbolt/eeprom.c
+++ b/drivers/thunderbolt/eeprom.c
@@ -211,7 +211,7 @@ static u8 tb_crc8(u8 *data, int len)
 
 static u32 tb_crc32(void *data, size_t len)
 {
-	return ~__crc32c_le(~0, data, len);
+	return ~crc32c(~0, data, len);
 }
 
 #define TB_DROM_DATA_START		13
diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
index 16787c1cee21c65588dad2e7508065a400ee1b76..a559fdff3f7e201b93cad6dd867983cbb28ac094 100644
--- a/include/linux/crc-t10dif.h
+++ b/include/linux/crc-t10dif.h
@@ -4,9 +4,6 @@
 
 #include <linux/types.h>
 
-#define CRC_T10DIF_DIGEST_SIZE 2
-#define CRC_T10DIF_BLOCK_SIZE 1
-
 u16 crc_t10dif_arch(u16 crc, const u8 *p, size_t len);
 u16 crc_t10dif_generic(u16 crc, const u8 *p, size_t len);
 
@@ -22,13 +19,4 @@ static inline u16 crc_t10dif(const u8 *p, size_t len)
 	return crc_t10dif_update(0, p, len);
 }
 
-#if IS_ENABLED(CONFIG_CRC_T10DIF_ARCH)
-bool crc_t10dif_is_optimized(void);
-#else
-static inline bool crc_t10dif_is_optimized(void)
-{
-	return false;
-}
-#endif
-
 #endif
diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index e9bd40056687a4efa9eabfc6e2e8526607ed2613..69c2e8bb37829915f750bb281f7ff4150e4bcff2 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -8,33 +8,32 @@
 #include <linux/types.h>
 #include <linux/bitrev.h>
 
-u32 __pure crc32_le_arch(u32 crc, const u8 *p, size_t len);
-u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len);
-u32 __pure crc32_be_arch(u32 crc, const u8 *p, size_t len);
-u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len);
-u32 __pure crc32c_le_arch(u32 crc, const u8 *p, size_t len);
-u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len);
+u32 crc32_le_arch(u32 crc, const u8 *p, size_t len);
+u32 crc32_le_base(u32 crc, const u8 *p, size_t len);
+u32 crc32_be_arch(u32 crc, const u8 *p, size_t len);
+u32 crc32_be_base(u32 crc, const u8 *p, size_t len);
+u32 crc32c_arch(u32 crc, const u8 *p, size_t len);
+u32 crc32c_base(u32 crc, const u8 *p, size_t len);
 
-static inline u32 __pure crc32_le(u32 crc, const u8 *p, size_t len)
+static inline u32 crc32_le(u32 crc, const void *p, size_t len)
 {
 	if (IS_ENABLED(CONFIG_CRC32_ARCH))
 		return crc32_le_arch(crc, p, len);
 	return crc32_le_base(crc, p, len);
 }
 
-static inline u32 __pure crc32_be(u32 crc, const u8 *p, size_t len)
+static inline u32 crc32_be(u32 crc, const void *p, size_t len)
 {
 	if (IS_ENABLED(CONFIG_CRC32_ARCH))
 		return crc32_be_arch(crc, p, len);
 	return crc32_be_base(crc, p, len);
 }
 
-/* TODO: leading underscores should be dropped once callers have been updated */
-static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
+static inline u32 crc32c(u32 crc, const void *p, size_t len)
 {
 	if (IS_ENABLED(CONFIG_CRC32_ARCH))
-		return crc32c_le_arch(crc, p, len);
-	return crc32c_le_base(crc, p, len);
+		return crc32c_arch(crc, p, len);
+	return crc32c_base(crc, p, len);
 }
 
 /*
@@ -45,7 +44,7 @@ static inline u32 __pure __crc32c_le(u32 crc, const u8 *p, size_t len)
  */
 #define CRC32_LE_OPTIMIZATION	BIT(0) /* crc32_le() is optimized */
 #define CRC32_BE_OPTIMIZATION	BIT(1) /* crc32_be() is optimized */
-#define CRC32C_OPTIMIZATION	BIT(2) /* __crc32c_le() is optimized */
+#define CRC32C_OPTIMIZATION	BIT(2) /* crc32c() is optimized */
 #if IS_ENABLED(CONFIG_CRC32_ARCH)
 u32 crc32_optimizations(void);
 #else
@@ -70,36 +69,34 @@ static inline u32 crc32_optimizations(void) { return 0; }
  * 	   with the same initializer as crc1, and crc2 seed was 0. See
  * 	   also crc32_combine_test().
  */
-u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len);
+u32 crc32_le_shift(u32 crc, size_t len);
 
 static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
 {
 	return crc32_le_shift(crc1, len2) ^ crc2;
 }
 
+u32 crc32c_shift(u32 crc, size_t len);
+
 /**
- * __crc32c_le_combine - Combine two crc32c check values into one. For two
- * 			 sequences of bytes, seq1 and seq2 with lengths len1
- * 			 and len2, __crc32c_le() check values were calculated
- * 			 for each, crc1 and crc2.
+ * crc32c_combine - Combine two crc32c check values into one. For two sequences
+ *		    of bytes, seq1 and seq2 with lengths len1 and len2, crc32c()
+ *		    check values were calculated for each, crc1 and crc2.
  *
  * @crc1: crc32c of the first block
  * @crc2: crc32c of the second block
  * @len2: length of the second block
  *
- * Return: The __crc32c_le() check value of seq1 and seq2 concatenated,
- * 	   requiring only crc1, crc2, and len2. Note: If seq_full denotes
- * 	   the concatenated memory area of seq1 with seq2, and crc_full
- * 	   the __crc32c_le() value of seq_full, then crc_full ==
- * 	   __crc32c_le_combine(crc1, crc2, len2) when crc_full was
- * 	   seeded with the same initializer as crc1, and crc2 seed
- * 	   was 0. See also crc32c_combine_test().
+ * Return: The crc32c() check value of seq1 and seq2 concatenated, requiring
+ *	   only crc1, crc2, and len2. Note: If seq_full denotes the concatenated
+ *	   memory area of seq1 with seq2, and crc_full the crc32c() value of
+ *	   seq_full, then crc_full == crc32c_combine(crc1, crc2, len2) when
+ *	   crc_full was seeded with the same initializer as crc1, and crc2 seed
+ *	   was 0. See also crc_combine_test().
  */
-u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len);
-
-static inline u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
+static inline u32 crc32c_combine(u32 crc1, u32 crc2, size_t len2)
 {
-	return __crc32c_le_shift(crc1, len2) ^ crc2;
+	return crc32c_shift(crc1, len2) ^ crc2;
 }
 
 #define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)(data), length)
diff --git a/include/linux/crc32c.h b/include/linux/crc32c.h
index 47eb78003c2654f419164dd2378015048ab787bc..b8cff2f4309a70efb6521491438dce0be19e0d74 100644
--- a/include/linux/crc32c.h
+++ b/include/linux/crc32c.h
@@ -4,12 +4,4 @@
 
 #include <linux/crc32.h>
 
-static inline u32 crc32c(u32 crc, const void *address, unsigned int length)
-{
-	return __crc32c_le(crc, address, length);
-}
-
-/* This macro exists for backwards-compatibility. */
-#define crc32c_le crc32c
-
 #endif	/* _LINUX_CRC32C_H */
diff --git a/include/linux/crc64.h b/include/linux/crc64.h
index e044c60d1e611224e19a3bcf9369cd26dcaf3a92..41de30b907dff891fec961a05b12474a234104b0 100644
--- a/include/linux/crc64.h
+++ b/include/linux/crc64.h
@@ -7,12 +7,40 @@
 
 #include <linux/types.h>
 
-#define CRC64_ROCKSOFT_STRING "crc64-rocksoft"
+u64 crc64_be_arch(u64 crc, const u8 *p, size_t len);
+u64 crc64_be_generic(u64 crc, const u8 *p, size_t len);
+u64 crc64_nvme_arch(u64 crc, const u8 *p, size_t len);
+u64 crc64_nvme_generic(u64 crc, const u8 *p, size_t len);
 
-u64 __pure crc64_be(u64 crc, const void *p, size_t len);
-u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len);
+/**
+ * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64
+ * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation,
+ *       or the previous crc64 value if computing incrementally.
+ * @p: pointer to buffer over which CRC64 is run
+ * @len: length of buffer @p
+ */
+static inline u64 crc64_be(u64 crc, const void *p, size_t len)
+{
+	if (IS_ENABLED(CONFIG_CRC64_ARCH))
+		return crc64_be_arch(crc, p, len);
+	return crc64_be_generic(crc, p, len);
+}
 
-u64 crc64_rocksoft(const unsigned char *buffer, size_t len);
-u64 crc64_rocksoft_update(u64 crc, const unsigned char *buffer, size_t len);
+/**
+ * crc64_nvme - Calculate CRC64-NVME
+ * @crc: seed value for computation. 0 for a new CRC calculation, or the
+ *	 previous crc64 value if computing incrementally.
+ * @p: pointer to buffer over which CRC64 is run
+ * @len: length of buffer @p
+ *
+ * This computes the CRC64 defined in the NVME NVM Command Set Specification,
+ * *including the bitwise inversion at the beginning and end*.
+ */
+static inline u64 crc64_nvme(u64 crc, const void *p, size_t len)
+{
+	if (IS_ENABLED(CONFIG_CRC64_ARCH))
+		return ~crc64_nvme_arch(~crc, p, len);
+	return ~crc64_nvme_generic(~crc, p, len);
+}
 
 #endif /* _LINUX_CRC64_H */
diff --git a/include/net/sctp/checksum.h b/include/net/sctp/checksum.h
index f514a0aa849eaa7ead9327f6689b6458f38785fb..291465c2581024faa1dc701f3050a6b3749e4676 100644
--- a/include/net/sctp/checksum.h
+++ b/include/net/sctp/checksum.h
@@ -30,17 +30,14 @@
 
 static inline __wsum sctp_csum_update(const void *buff, int len, __wsum sum)
 {
-	/* This uses the crypto implementation of crc32c, which is either
-	 * implemented w/ hardware support or resolves to __crc32c_le().
-	 */
 	return (__force __wsum)crc32c((__force __u32)sum, buff, len);
 }
 
 static inline __wsum sctp_csum_combine(__wsum csum, __wsum csum2,
 				       int offset, int len)
 {
-	return (__force __wsum)__crc32c_le_combine((__force __u32)csum,
-						   (__force __u32)csum2, len);
+	return (__force __wsum)crc32c_combine((__force __u32)csum,
+					      (__force __u32)csum2, len);
 }
 
 static const struct skb_checksum_ops sctp_csum_ops = {
diff --git a/lib/Kconfig b/lib/Kconfig
index dccb61b7d698e1474fddfa29cc4146701e2de8c1..67bbf4f64dd986054be42db6cf14498f2fd79e72 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -168,15 +168,6 @@ config CRC_T10DIF_ARCH
 	tristate
 	default CRC_T10DIF if ARCH_HAS_CRC_T10DIF && CRC_OPTIMIZATIONS
 
-config CRC64_ROCKSOFT
-	tristate "CRC calculation for the Rocksoft model CRC64"
-	select CRC64
-	select CRYPTO
-	select CRYPTO_CRC64_ROCKSOFT
-	help
-	  This option provides a CRC64 API to a registered crypto driver.
-	  This is used with the block layer's data integrity subsystem.
-
 config CRC_ITU_T
 	tristate "CRC ITU-T V.41 functions"
 	help
@@ -210,6 +201,13 @@ config CRC64
 	  the kernel tree does. Such modules that use library CRC64
 	  functions require M here.
 
+config ARCH_HAS_CRC64
+	bool
+
+config CRC64_ARCH
+	tristate
+	default CRC64 if ARCH_HAS_CRC64 && CRC_OPTIMIZATIONS
+
 config CRC4
 	tristate "CRC4 functions"
 	help
diff --git a/lib/Makefile b/lib/Makefile
index bc05750e32ca8192217080b6419df15da311a477..7bab71e59019535d0f654d9d5f998a3019c2d312 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -155,7 +155,6 @@ obj-$(CONFIG_CRC64)     += crc64.o
 obj-$(CONFIG_CRC4)	+= crc4.o
 obj-$(CONFIG_CRC7)	+= crc7.o
 obj-$(CONFIG_CRC8)	+= crc8.o
-obj-$(CONFIG_CRC64_ROCKSOFT) += crc64-rocksoft.o
 obj-$(CONFIG_XXHASH)	+= xxhash.o
 obj-$(CONFIG_GENERIC_ALLOCATOR) += genalloc.o
 
diff --git a/lib/crc32.c b/lib/crc32.c
index ede6131f66fc4f6e18ff602687c64748f46d2386..fddd424ff22451204ba84cd50665734a0eb13ba0 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -37,7 +37,7 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
 MODULE_DESCRIPTION("Various CRC32 calculations");
 MODULE_LICENSE("GPL");
 
-u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
+u32 crc32_le_base(u32 crc, const u8 *p, size_t len)
 {
 	while (len--)
 		crc = (crc >> 8) ^ crc32table_le[(crc & 255) ^ *p++];
@@ -45,20 +45,20 @@ u32 __pure crc32_le_base(u32 crc, const u8 *p, size_t len)
 }
 EXPORT_SYMBOL(crc32_le_base);
 
-u32 __pure crc32c_le_base(u32 crc, const u8 *p, size_t len)
+u32 crc32c_base(u32 crc, const u8 *p, size_t len)
 {
 	while (len--)
 		crc = (crc >> 8) ^ crc32ctable_le[(crc & 255) ^ *p++];
 	return crc;
 }
-EXPORT_SYMBOL(crc32c_le_base);
+EXPORT_SYMBOL(crc32c_base);
 
 /*
  * This multiplies the polynomials x and y modulo the given modulus.
  * This follows the "little-endian" CRC convention that the lsbit
  * represents the highest power of x, and the msbit represents x^0.
  */
-static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus)
+static u32 gf2_multiply(u32 x, u32 y, u32 modulus)
 {
 	u32 product = x & 1 ? y : 0;
 	int i;
@@ -84,8 +84,7 @@ static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus)
  * as appending len bytes of zero to the data), in time proportional
  * to log(len).
  */
-static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
-						   u32 polynomial)
+static u32 crc32_generic_shift(u32 crc, size_t len, u32 polynomial)
 {
 	u32 power = polynomial;	/* CRC of x^32 */
 	int i;
@@ -114,19 +113,19 @@ static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
 	return crc;
 }
 
-u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len)
+u32 crc32_le_shift(u32 crc, size_t len)
 {
 	return crc32_generic_shift(crc, len, CRC32_POLY_LE);
 }
+EXPORT_SYMBOL(crc32_le_shift);
 
-u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
+u32 crc32c_shift(u32 crc, size_t len)
 {
 	return crc32_generic_shift(crc, len, CRC32C_POLY_LE);
 }
-EXPORT_SYMBOL(crc32_le_shift);
-EXPORT_SYMBOL(__crc32c_le_shift);
+EXPORT_SYMBOL(crc32c_shift);
 
-u32 __pure crc32_be_base(u32 crc, const u8 *p, size_t len)
+u32 crc32_be_base(u32 crc, const u8 *p, size_t len)
 {
 	while (len--)
 		crc = (crc << 8) ^ crc32table_be[(crc >> 24) ^ *p++];
diff --git a/lib/crc64-rocksoft.c b/lib/crc64-rocksoft.c
deleted file mode 100644
index fc9ae0da5df784827b5116f67e5bfa2aebb9f137..0000000000000000000000000000000000000000
--- a/lib/crc64-rocksoft.c
+++ /dev/null
@@ -1,126 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-
-#include <linux/types.h>
-#include <linux/module.h>
-#include <linux/crc64.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <crypto/hash.h>
-#include <crypto/algapi.h>
-#include <linux/static_key.h>
-#include <linux/notifier.h>
-
-static struct crypto_shash __rcu *crc64_rocksoft_tfm;
-static DEFINE_STATIC_KEY_TRUE(crc64_rocksoft_fallback);
-static DEFINE_MUTEX(crc64_rocksoft_mutex);
-static struct work_struct crc64_rocksoft_rehash_work;
-
-static int crc64_rocksoft_notify(struct notifier_block *self, unsigned long val, void *data)
-{
-	struct crypto_alg *alg = data;
-
-	if (val != CRYPTO_MSG_ALG_LOADED ||
-	    strcmp(alg->cra_name, CRC64_ROCKSOFT_STRING))
-		return NOTIFY_DONE;
-
-	schedule_work(&crc64_rocksoft_rehash_work);
-	return NOTIFY_OK;
-}
-
-static void crc64_rocksoft_rehash(struct work_struct *work)
-{
-	struct crypto_shash *new, *old;
-
-	mutex_lock(&crc64_rocksoft_mutex);
-	old = rcu_dereference_protected(crc64_rocksoft_tfm,
-					lockdep_is_held(&crc64_rocksoft_mutex));
-	new = crypto_alloc_shash(CRC64_ROCKSOFT_STRING, 0, 0);
-	if (IS_ERR(new)) {
-		mutex_unlock(&crc64_rocksoft_mutex);
-		return;
-	}
-	rcu_assign_pointer(crc64_rocksoft_tfm, new);
-	mutex_unlock(&crc64_rocksoft_mutex);
-
-	if (old) {
-		synchronize_rcu();
-		crypto_free_shash(old);
-	} else {
-		static_branch_disable(&crc64_rocksoft_fallback);
-	}
-}
-
-static struct notifier_block crc64_rocksoft_nb = {
-	.notifier_call = crc64_rocksoft_notify,
-};
-
-u64 crc64_rocksoft_update(u64 crc, const unsigned char *buffer, size_t len)
-{
-	struct {
-		struct shash_desc shash;
-		u64 crc;
-	} desc;
-	int err;
-
-	if (static_branch_unlikely(&crc64_rocksoft_fallback))
-		return crc64_rocksoft_generic(crc, buffer, len);
-
-	rcu_read_lock();
-	desc.shash.tfm = rcu_dereference(crc64_rocksoft_tfm);
-	desc.crc = crc;
-	err = crypto_shash_update(&desc.shash, buffer, len);
-	rcu_read_unlock();
-
-	BUG_ON(err);
-
-	return desc.crc;
-}
-EXPORT_SYMBOL_GPL(crc64_rocksoft_update);
-
-u64 crc64_rocksoft(const unsigned char *buffer, size_t len)
-{
-	return crc64_rocksoft_update(0, buffer, len);
-}
-EXPORT_SYMBOL_GPL(crc64_rocksoft);
-
-static int __init crc64_rocksoft_mod_init(void)
-{
-	INIT_WORK(&crc64_rocksoft_rehash_work, crc64_rocksoft_rehash);
-	crypto_register_notifier(&crc64_rocksoft_nb);
-	crc64_rocksoft_rehash(&crc64_rocksoft_rehash_work);
-	return 0;
-}
-
-static void __exit crc64_rocksoft_mod_fini(void)
-{
-	crypto_unregister_notifier(&crc64_rocksoft_nb);
-	cancel_work_sync(&crc64_rocksoft_rehash_work);
-	crypto_free_shash(rcu_dereference_protected(crc64_rocksoft_tfm, 1));
-}
-
-module_init(crc64_rocksoft_mod_init);
-module_exit(crc64_rocksoft_mod_fini);
-
-static int crc64_rocksoft_transform_show(char *buffer, const struct kernel_param *kp)
-{
-	struct crypto_shash *tfm;
-	int len;
-
-	if (static_branch_unlikely(&crc64_rocksoft_fallback))
-		return sprintf(buffer, "fallback\n");
-
-	rcu_read_lock();
-	tfm = rcu_dereference(crc64_rocksoft_tfm);
-	len = snprintf(buffer, PAGE_SIZE, "%s\n",
-		       crypto_shash_driver_name(tfm));
-	rcu_read_unlock();
-
-	return len;
-}
-
-module_param_call(transform, NULL, crc64_rocksoft_transform_show, NULL, 0444);
-
-MODULE_AUTHOR("Keith Busch <kbusch@kernel.org>");
-MODULE_DESCRIPTION("Rocksoft model CRC64 calculation (library API)");
-MODULE_LICENSE("GPL");
-MODULE_SOFTDEP("pre: crc64");
diff --git a/lib/crc64.c b/lib/crc64.c
index 61ae8dfb6a1c5981be03e04ef0ad43cbf67995bf..5b1b17057f0aef3dd5d2cc5afd3ab576fe82c3c9 100644
--- a/lib/crc64.c
+++ b/lib/crc64.c
@@ -22,8 +22,8 @@
  * x^24 + x^23 + x^22 + x^21 + x^19 + x^17 + x^13 + x^12 + x^10 + x^9 +
  * x^7 + x^4 + x + 1
  *
- * crc64rocksoft[256] table is from the Rocksoft specification polynomial
- * defined as,
+ * crc64nvmetable[256] uses the CRC64 polynomial from the NVME NVM Command Set
+ * Specification and uses least-significant-bit first bit order:
  *
  * x^64 + x^63 + x^61 + x^59 + x^58 + x^56 + x^55 + x^52 + x^49 + x^48 + x^47 +
  * x^46 + x^44 + x^41 + x^37 + x^36 + x^34 + x^32 + x^31 + x^28 + x^26 + x^23 +
@@ -41,45 +41,18 @@
 MODULE_DESCRIPTION("CRC64 calculations");
 MODULE_LICENSE("GPL v2");
 
-/**
- * crc64_be - Calculate bitwise big-endian ECMA-182 CRC64
- * @crc: seed value for computation. 0 or (u64)~0 for a new CRC calculation,
- *       or the previous crc64 value if computing incrementally.
- * @p: pointer to buffer over which CRC64 is run
- * @len: length of buffer @p
- */
-u64 __pure crc64_be(u64 crc, const void *p, size_t len)
+u64 crc64_be_generic(u64 crc, const u8 *p, size_t len)
 {
-	size_t i, t;
-
-	const unsigned char *_p = p;
-
-	for (i = 0; i < len; i++) {
-		t = ((crc >> 56) ^ (*_p++)) & 0xFF;
-		crc = crc64table[t] ^ (crc << 8);
-	}
-
+	while (len--)
+		crc = (crc << 8) ^ crc64table[(crc >> 56) ^ *p++];
 	return crc;
 }
-EXPORT_SYMBOL_GPL(crc64_be);
+EXPORT_SYMBOL_GPL(crc64_be_generic);
 
-/**
- * crc64_rocksoft_generic - Calculate bitwise Rocksoft CRC64
- * @crc: seed value for computation. 0 for a new CRC calculation, or the
- * 	 previous crc64 value if computing incrementally.
- * @p: pointer to buffer over which CRC64 is run
- * @len: length of buffer @p
- */
-u64 __pure crc64_rocksoft_generic(u64 crc, const void *p, size_t len)
+u64 crc64_nvme_generic(u64 crc, const u8 *p, size_t len)
 {
-	const unsigned char *_p = p;
-	size_t i;
-
-	crc = ~crc;
-
-	for (i = 0; i < len; i++)
-		crc = (crc >> 8) ^ crc64rocksofttable[(crc & 0xff) ^ *_p++];
-
-	return ~crc;
+	while (len--)
+		crc = (crc >> 8) ^ crc64nvmetable[(crc & 0xff) ^ *p++];
+	return crc;
 }
-EXPORT_SYMBOL_GPL(crc64_rocksoft_generic);
+EXPORT_SYMBOL_GPL(crc64_nvme_generic);
diff --git a/lib/gen_crc64table.c b/lib/gen_crc64table.c
index 55e222acd0b884cb9e8ab8ae09d379cd2919e4c0..e05a4230a0a0826473aa8c6ac74d38b6b4c82291 100644
--- a/lib/gen_crc64table.c
+++ b/lib/gen_crc64table.c
@@ -17,10 +17,10 @@
 #include <stdio.h>
 
 #define CRC64_ECMA182_POLY 0x42F0E1EBA9EA3693ULL
-#define CRC64_ROCKSOFT_POLY 0x9A6C9329AC4BC9B5ULL
+#define CRC64_NVME_POLY 0x9A6C9329AC4BC9B5ULL
 
 static uint64_t crc64_table[256] = {0};
-static uint64_t crc64_rocksoft_table[256] = {0};
+static uint64_t crc64_nvme_table[256] = {0};
 
 static void generate_reflected_crc64_table(uint64_t table[256], uint64_t poly)
 {
@@ -82,14 +82,14 @@ static void print_crc64_tables(void)
 	printf("static const u64 ____cacheline_aligned crc64table[256] = {\n");
 	output_table(crc64_table);
 
-	printf("\nstatic const u64 ____cacheline_aligned crc64rocksofttable[256] = {\n");
-	output_table(crc64_rocksoft_table);
+	printf("\nstatic const u64 ____cacheline_aligned crc64nvmetable[256] = {\n");
+	output_table(crc64_nvme_table);
 }
 
 int main(int argc, char *argv[])
 {
 	generate_crc64_table(crc64_table, CRC64_ECMA182_POLY);
-	generate_reflected_crc64_table(crc64_rocksoft_table, CRC64_ROCKSOFT_POLY);
+	generate_reflected_crc64_table(crc64_nvme_table, CRC64_NVME_POLY);
 	print_crc64_tables();
 	return 0;
 }
diff --git a/lib/tests/crc_kunit.c b/lib/tests/crc_kunit.c
index 6a61d4b5fd45a8cf733342605e7a603460ef5ab3..40b4b41f218474d5ae3c1dfd178242bc28e60c1e 100644
--- a/lib/tests/crc_kunit.c
+++ b/lib/tests/crc_kunit.c
@@ -32,7 +32,8 @@ static size_t test_buflen;
  * @poly: The generator polynomial with the highest-order term omitted.
  *	  Bit-reversed if @le is true.
  * @func: The function to compute a CRC.  The type signature uses u64 so that it
- *	  can fit any CRC up to CRC-64.
+ *	  can fit any CRC up to CRC-64.  The function is expected to *not*
+ *	  invert the CRC at the beginning and end.
  * @combine_func: Optional function to combine two CRCs.
  */
 struct crc_variant {
@@ -362,7 +363,7 @@ static u64 crc32c_wrapper(u64 crc, const u8 *p, size_t len)
 
 static u64 crc32c_combine_wrapper(u64 crc1, u64 crc2, size_t len2)
 {
-	return __crc32c_le_combine(crc1, crc2, len2);
+	return crc32c_combine(crc1, crc2, len2);
 }
 
 static const struct crc_variant crc_variant_crc32c = {
@@ -407,6 +408,31 @@ static void crc64_be_benchmark(struct kunit *test)
 	crc_benchmark(test, crc64_be_wrapper);
 }
 
+/* crc64_nvme */
+
+static u64 crc64_nvme_wrapper(u64 crc, const u8 *p, size_t len)
+{
+	/* The inversions that crc64_nvme() does have to be undone here. */
+	return ~crc64_nvme(~crc, p, len);
+}
+
+static const struct crc_variant crc_variant_crc64_nvme = {
+	.bits = 64,
+	.le = true,
+	.poly = 0x9a6c9329ac4bc9b5,
+	.func = crc64_nvme_wrapper,
+};
+
+static void crc64_nvme_test(struct kunit *test)
+{
+	crc_test(test, &crc_variant_crc64_nvme);
+}
+
+static void crc64_nvme_benchmark(struct kunit *test)
+{
+	crc_benchmark(test, crc64_nvme_wrapper);
+}
+
 static struct kunit_case crc_test_cases[] = {
 	KUNIT_CASE(crc16_test),
 	KUNIT_CASE(crc16_benchmark),
@@ -420,6 +446,8 @@ static struct kunit_case crc_test_cases[] = {
 	KUNIT_CASE(crc32c_benchmark),
 	KUNIT_CASE(crc64_be_test),
 	KUNIT_CASE(crc64_be_benchmark),
+	KUNIT_CASE(crc64_nvme_test),
+	KUNIT_CASE(crc64_nvme_benchmark),
 	{},
 };
 
diff --git a/scripts/gen-crc-consts.py b/scripts/gen-crc-consts.py
new file mode 100755
index 0000000000000000000000000000000000000000..aa678a50897d96d0d4aa489688844caff897f5e7
--- /dev/null
+++ b/scripts/gen-crc-consts.py
@@ -0,0 +1,238 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0-or-later
+#
+# Script that generates constants for computing the given CRC variant(s).
+#
+# Copyright 2025 Google LLC
+#
+# Author: Eric Biggers <ebiggers@google.com>
+
+import sys
+
+# XOR (add) an iterable of polynomials.
+def xor(iterable):
+    res = 0
+    for val in iterable:
+        res ^= val
+    return res
+
+# Multiply two polynomials.
+def clmul(a, b):
+    return xor(a << i for i in range(b.bit_length()) if (b & (1 << i)) != 0)
+
+# Polynomial division floor(a / b).
+def div(a, b):
+    q = 0
+    while a.bit_length() >= b.bit_length():
+        q ^= 1 << (a.bit_length() - b.bit_length())
+        a ^= b << (a.bit_length() - b.bit_length())
+    return q
+
+# Reduce the polynomial 'a' modulo the polynomial 'b'.
+def reduce(a, b):
+    return a ^ clmul(div(a, b), b)
+
+# Reflect the bits of a polynomial.
+def bitreflect(poly, num_bits):
+    assert poly.bit_length() <= num_bits
+    return xor(((poly >> i) & 1) << (num_bits - 1 - i) for i in range(num_bits))
+
+# Format a polynomial as hex.  Bit-reflect it if the CRC is lsb-first.
+def fmt_poly(variant, poly, num_bits):
+    if variant.lsb:
+        poly = bitreflect(poly, num_bits)
+    return f'0x{poly:0{2*num_bits//8}x}'
+
+# Print a pair of 64-bit polynomial multipliers.  They are always passed in the
+# order [HI64_TERMS, LO64_TERMS] but will be printed in the appropriate order.
+def print_mult_pair(variant, mults):
+    mults = list(mults if variant.lsb else reversed(mults))
+    terms = ['HI64_TERMS', 'LO64_TERMS'] if variant.lsb else ['LO64_TERMS', 'HI64_TERMS']
+    for i in range(2):
+        print(f'\t\t{fmt_poly(variant, mults[i]["val"], 64)},\t/* {terms[i]}: {mults[i]["desc"]} */')
+
+# Pretty-print a polynomial.
+def pprint_poly(prefix, poly):
+    terms = [f'x^{i}' for i in reversed(range(poly.bit_length()))
+             if (poly & (1 << i)) != 0]
+    j = 0
+    while j < len(terms):
+        s = prefix + terms[j] + (' +' if j < len(terms) - 1 else '')
+        j += 1
+        while j < len(terms) and len(s) < 73:
+            s += ' ' + terms[j] + (' +' if j < len(terms) - 1 else '')
+            j += 1
+        print(s)
+        prefix = ' * ' + (' ' * (len(prefix) - 3))
+
+# Print a comment describing constants generated for the given CRC variant.
+def print_header(variant, what):
+    print('/*')
+    s = f'{"least" if variant.lsb else "most"}-significant-bit-first CRC-{variant.bits}'
+    print(f' * {what} generated for {s} using')
+    pprint_poly(' * G(x) = ', variant.G)
+    print(' */')
+
+class CrcVariant:
+    def __init__(self, bits, generator_poly, bit_order):
+        self.bits = bits
+        if bit_order not in ['lsb', 'msb']:
+            raise ValueError('Invalid value for bit_order')
+        self.lsb = bit_order == 'lsb'
+        self.name = f'crc{bits}_{bit_order}_0x{generator_poly:0{(2*bits+7)//8}x}'
+        if self.lsb:
+            generator_poly = bitreflect(generator_poly, bits)
+        self.G = generator_poly ^ (1 << bits)
+
+# Generate tables for CRC computation using the "slice-by-N" method.
+# N=1 corresponds to the traditional byte-at-a-time table.
+def gen_slicebyN_tables(variants, n):
+    for v in variants:
+        print('')
+        print_header(v, f'Slice-by-{n} CRC table')
+        print(f'static const u{v.bits} __maybe_unused {v.name}_table[{256*n}] = {{')
+        s = ''
+        for i in range(256 * n):
+            # The i'th table entry is the CRC of the message consisting of byte
+            # i % 256 followed by i // 256 zero bytes.
+            poly = (bitreflect(i % 256, 8) if v.lsb else i % 256) << (v.bits + 8*(i//256))
+            next_entry = fmt_poly(v, reduce(poly, v.G), v.bits) + ','
+            if len(s + next_entry) > 71:
+                print(f'\t{s}')
+                s = ''
+            s += (' ' if s else '') + next_entry
+        if s:
+            print(f'\t{s}')
+        print('};')
+
+# Generate constants for carryless multiplication based CRC computation.
+def gen_x86_pclmul_consts(variants):
+    # These are the distances, in bits, to generate folding constants for.
+    FOLD_DISTANCES = [2048, 1024, 512, 256, 128]
+
+    for v in variants:
+        (G, n, lsb) = (v.G, v.bits, v.lsb)
+        print('')
+        print_header(v, 'CRC folding constants')
+        print('static const struct {')
+        if not lsb:
+            print('\tu8 bswap_mask[16];')
+        for i in FOLD_DISTANCES:
+            print(f'\tu64 fold_across_{i}_bits_consts[2];')
+        print('\tu8 shuf_table[48];')
+        print('\tu64 barrett_reduction_consts[2];')
+        print(f'}} {v.name}_consts ____cacheline_aligned __maybe_unused = {{')
+
+        # Byte-reflection mask, needed for msb-first CRCs
+        if not lsb:
+            print('\t.bswap_mask = {' + ', '.join(str(i) for i in reversed(range(16))) + '},')
+
+        # Fold constants for all distances down to 128 bits
+        for i in FOLD_DISTANCES:
+            print(f'\t.fold_across_{i}_bits_consts = {{')
+            # Given 64x64 => 128 bit carryless multiplication instructions, two
+            # 64-bit fold constants are needed per "fold distance" i: one for
+            # HI64_TERMS that is basically x^(i+64) mod G and one for LO64_TERMS
+            # that is basically x^i mod G.  The exact values however undergo a
+            # couple adjustments, described below.
+            mults = []
+            for j in [64, 0]:
+                pow_of_x = i + j
+                if lsb:
+                    # Each 64x64 => 128 bit carryless multiplication instruction
+                    # actually generates a 127-bit product in physical bits 0
+                    # through 126, which in the lsb-first case represent the
+                    # coefficients of x^1 through x^127, not x^0 through x^126.
+                    # Thus in the lsb-first case, each such instruction
+                    # implicitly adds an extra factor of x.  The below removes a
+                    # factor of x from each constant to compensate for this.
+                    # For n < 64 the x could be removed from either the reduced
+                    # part or unreduced part, but for n == 64 the reduced part
+                    # is the only option.  Just always use the reduced part.
+                    pow_of_x -= 1
+                # Make a factor of x^(64-n) be applied unreduced rather than
+                # reduced, to cause the product to use only the x^(64-n) and
+                # higher terms and always be zero in the lower terms.  Usually
+                # this makes no difference as it does not affect the product's
+                # congruence class mod G and the constant remains 64-bit, but
+                # part of the final reduction from 128 bits does rely on this
+                # property when it reuses one of the constants.
+                pow_of_x -= 64 - n
+                mults.append({ 'val': reduce(1 << pow_of_x, G) << (64 - n),
+                               'desc': f'(x^{pow_of_x} mod G) * x^{64-n}' })
+            print_mult_pair(v, mults)
+            print('\t},')
+
+        # Shuffle table for handling 1..15 bytes at end
+        print('\t.shuf_table = {')
+        print('\t\t' + (16*'-1, ').rstrip())
+        print('\t\t' + ''.join(f'{i:2}, ' for i in range(16)).rstrip())
+        print('\t\t' + (16*'-1, ').rstrip())
+        print('\t},')
+
+        # Barrett reduction constants for reducing 128 bits to the final CRC
+        print('\t.barrett_reduction_consts = {')
+        mults = []
+
+        val = div(1 << (63+n), G)
+        desc = f'floor(x^{63+n} / G)'
+        if not lsb:
+            val = (val << 1) - (1 << 64)
+            desc = f'({desc} * x) - x^64'
+        mults.append({ 'val': val, 'desc': desc })
+
+        val = G - (1 << n)
+        desc = f'G - x^{n}'
+        if lsb and n == 64:
+            assert (val & 1) != 0  # The x^0 term should always be nonzero.
+            val >>= 1
+            desc = f'({desc} - x^0) / x'
+        else:
+            pow_of_x = 64 - n - (1 if lsb else 0)
+            val <<= pow_of_x
+            desc = f'({desc}) * x^{pow_of_x}'
+        mults.append({ 'val': val, 'desc': desc })
+
+        print_mult_pair(v, mults)
+        print('\t},')
+
+        print('};')
+
+def parse_crc_variants(vars_string):
+    variants = []
+    for var_string in vars_string.split(','):
+        bits, bit_order, generator_poly = var_string.split('_')
+        assert bits.startswith('crc')
+        bits = int(bits.removeprefix('crc'))
+        assert generator_poly.startswith('0x')
+        generator_poly = generator_poly.removeprefix('0x')
+        assert len(generator_poly) % 2 == 0
+        generator_poly = int(generator_poly, 16)
+        variants.append(CrcVariant(bits, generator_poly, bit_order))
+    return variants
+
+if len(sys.argv) != 3:
+    sys.stderr.write(f'Usage: {sys.argv[0]} CONSTS_TYPE[,CONSTS_TYPE]... CRC_VARIANT[,CRC_VARIANT]...\n')
+    sys.stderr.write('  CONSTS_TYPE can be sliceby[1-8] or x86_pclmul\n')
+    sys.stderr.write('  CRC_VARIANT is crc${num_bits}_${bit_order}_${generator_poly_as_hex}\n')
+    sys.stderr.write('     E.g. crc16_msb_0x8bb7 or crc32_lsb_0xedb88320\n')
+    sys.stderr.write('     Polynomial must use the given bit_order and exclude x^{num_bits}\n')
+    sys.exit(1)
+
+print('/* SPDX-License-Identifier: GPL-2.0-or-later */')
+print('/*')
+print(' * CRC constants generated by:')
+print(' *')
+print(f' *\t{sys.argv[0]} {" ".join(sys.argv[1:])}')
+print(' *')
+print(' * Do not edit manually.')
+print(' */')
+consts_types = sys.argv[1].split(',')
+variants = parse_crc_variants(sys.argv[2])
+for consts_type in consts_types:
+    if consts_type.startswith('sliceby'):
+        gen_slicebyN_tables(variants, int(consts_type.removeprefix('sliceby')))
+    elif consts_type == 'x86_pclmul':
+        gen_x86_pclmul_consts(variants)
+    else:
+        raise ValueError(f'Unknown consts_type: {consts_type}')
diff --git a/sound/soc/codecs/aw88395/aw88395_device.c b/sound/soc/codecs/aw88395/aw88395_device.c
index 6b333d1c6e9461053457399ec186aedb7ecd0615..b7ea8be0d0cb0c412ec1cfae87a774ad47f933d3 100644
--- a/sound/soc/codecs/aw88395/aw88395_device.c
+++ b/sound/soc/codecs/aw88395/aw88395_device.c
@@ -424,7 +424,7 @@ static int aw_dev_dsp_set_crc32(struct aw_device *aw_dev)
 		return -EINVAL;
 	}
 
-	crc_value = __crc32c_le(0xFFFFFFFF, crc_dsp_cfg->data, crc_data_len) ^ 0xFFFFFFFF;
+	crc_value = crc32c(0xFFFFFFFF, crc_dsp_cfg->data, crc_data_len) ^ 0xFFFFFFFF;
 
 	return aw_dev_dsp_write(aw_dev, AW88395_DSP_REG_CRC_ADDR, crc_value,
 						AW88395_DSP_32_DATA);
diff --git a/tools/testing/selftests/arm64/fp/kernel-test.c b/tools/testing/selftests/arm64/fp/kernel-test.c
index 348e8bef62c7a0fc8225ccec6d881c7db0231626..e3cec3723ffa967c1570668a0b73085d0d9a4c8f 100644
--- a/tools/testing/selftests/arm64/fp/kernel-test.c
+++ b/tools/testing/selftests/arm64/fp/kernel-test.c
@@ -46,7 +46,6 @@ static void handle_kick_signal(int sig, siginfo_t *info, void *context)
 }
 
 static char *drivers[] = {
-	"crct10dif-arm64",
 	"sha1-ce",
 	"sha224-arm64",
 	"sha224-arm64-neon",