diff --git a/Documentation/devicetree/bindings/interrupt-controller/openrisc,ompic.txt b/Documentation/devicetree/bindings/interrupt-controller/openrisc,ompic.txt
new file mode 100644
index 0000000000000000000000000000000000000000..caec07cc71496d1b0acc8fa268039e0cecef2920
--- /dev/null
+++ b/Documentation/devicetree/bindings/interrupt-controller/openrisc,ompic.txt
@@ -0,0 +1,22 @@
+Open Multi-Processor Interrupt Controller
+
+Required properties:
+
+- compatible : This should be "openrisc,ompic"
+- reg : Specifies base physical address and size of the register space. The
+  size is based on the number of cores the controller has been configured
+  to handle, this should be set to 8 bytes per cpu core.
+- interrupt-controller : Identifies the node as an interrupt controller.
+- #interrupt-cells : This should be set to 0 as this will not be an irq
+  parent.
+- interrupts : Specifies the interrupt line to which the ompic is wired.
+
+Example:
+
+ompic: interrupt-controller@98000000 {
+	compatible = "openrisc,ompic";
+	reg = <0x98000000 16>;
+	interrupt-controller;
+	#interrupt-cells = <0>;
+	interrupts = <1>;
+};
diff --git a/Documentation/devicetree/bindings/openrisc/opencores/or1ksim.txt b/Documentation/devicetree/bindings/openrisc/opencores/or1ksim.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4950c794ecbb5fea57cb39ed4de4898798c305c7
--- /dev/null
+++ b/Documentation/devicetree/bindings/openrisc/opencores/or1ksim.txt
@@ -0,0 +1,39 @@
+OpenRISC Generic SoC
+====================
+
+Boards and FPGA SoC's which support the OpenRISC standard platform.  The
+platform essentially follows the conventions of the OpenRISC architecture
+specification, however some aspects, such as the boot protocol have been defined
+by the Linux port.
+
+Required properties
+-------------------
+ - compatible: Must include "opencores,or1ksim"
+
+CPU nodes:
+----------
+A "cpus" node is required.  Required properties:
+ - #address-cells: Must be 1.
+ - #size-cells: Must be 0.
+A CPU sub-node is also required for at least CPU 0.  Since the topology may
+be probed via CPS, it is not necessary to specify secondary CPUs.  Required
+properties:
+ - compatible: Must be "opencores,or1200-rtlsvn481".
+ - reg: CPU number.
+ - clock-frequency: The CPU clock frequency in Hz.
+Example:
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		cpu@0 {
+			compatible = "opencores,or1200-rtlsvn481";
+			reg = <0>;
+			clock-frequency = <20000000>;
+		};
+	};
+
+
+Boot protocol
+-------------
+The bootloader may pass the following arguments to the kernel:
+ - r3:  address of a flattened device-tree blob or 0x0.
diff --git a/Documentation/devicetree/bindings/vendor-prefixes.txt b/Documentation/devicetree/bindings/vendor-prefixes.txt
index 1afd298eddd73147ebf6a1dcbc56bf404559cff1..b1eeca851d6f55adde03c945561b56085289ee37 100644
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@@ -246,6 +246,7 @@ onion	Onion Corporation
 onnn	ON Semiconductor Corp.
 ontat	On Tat Industrial Company
 opencores	OpenCores.org
+openrisc	OpenRISC.io
 option	Option NV
 ORCL	Oracle Corporation
 ortustech	Ortus Technology Co., Ltd.
diff --git a/arch/openrisc/README.openrisc b/Documentation/openrisc/README
similarity index 56%
rename from arch/openrisc/README.openrisc
rename to Documentation/openrisc/README
index 072069ab51003bd8091c16c75fa0dd4755fb4d01..777a893d533d14cb161987085ab471645f2b4c24 100644
--- a/arch/openrisc/README.openrisc
+++ b/Documentation/openrisc/README
@@ -7,13 +7,7 @@ target architecture, specifically, is the 32-bit OpenRISC 1000 family (or1k).
 For information about OpenRISC processors and ongoing development:
 
 	website		http://openrisc.io
-
-For more information about Linux on OpenRISC, please contact South Pole AB.
-
-	email:		info@southpole.se
-
-	website:	http://southpole.se
-			http://southpoleconsulting.com
+	email		openrisc@lists.librecores.org
 
 ---------------------------------------------------------------------
 
@@ -24,37 +18,54 @@ In order to build and run Linux for OpenRISC, you'll need at least a basic
 toolchain and, perhaps, the architectural simulator.  Steps to get these bits
 in place are outlined here.
 
-1)  The toolchain can be obtained from openrisc.io.  Instructions for building
-a toolchain can be found at:
+1) Toolchain
+
+Toolchain binaries can be obtained from openrisc.io or our github releases page.
+Instructions for building the different toolchains can be found on openrisc.io
+or Stafford's toolchain build and release scripts.
+
+	binaries	https://github.com/openrisc/or1k-gcc/releases
+	toolchains	https://openrisc.io/software
+	building	https://github.com/stffrdhrn/or1k-toolchain-build
 
-https://github.com/openrisc/tutorials
+2) Building
 
-2) or1ksim (optional)
+Build the Linux kernel as usual
 
-or1ksim is the architectural simulator which will allow you to actually run
-your OpenRISC Linux kernel if you don't have an OpenRISC processor at hand.
+	make ARCH=openrisc defconfig
+	make ARCH=openrisc
 
-	git clone https://github.com/openrisc/or1ksim.git
+3) Running on FPGA (optional)
 
-	cd or1ksim
-	./configure --prefix=$OPENRISC_PREFIX
-	make
-	make install
+The OpenRISC community typically uses FuseSoC to manage building and programming
+an SoC into an FPGA.  The below is an example of programming a De0 Nano
+development board with the OpenRISC SoC.  During the build FPGA RTL is code
+downloaded from the FuseSoC IP cores repository and built using the FPGA vendor
+tools.  Binaries are loaded onto the board with openocd.
 
-3)  Linux kernel
+	git clone https://github.com/olofk/fusesoc
+	cd fusesoc
+	sudo pip install -e .
 
-Build the kernel as usual
+	fusesoc init
+	fusesoc build de0_nano
+	fusesoc pgm de0_nano
 
-	make ARCH=openrisc defconfig
-	make ARCH=openrisc
+	openocd -f interface/altera-usb-blaster.cfg \
+		-f board/or1k_generic.cfg
+
+	telnet localhost 4444
+	> init
+	> halt; load_image vmlinux ; reset
 
-4)  Run in architectural simulator
+4) Running on a Simulator (optional)
 
-Grab the or1ksim platform configuration file (from the or1ksim source) and
-together with your freshly built vmlinux, run your kernel with the following
-incantation:
+QEMU is a processor emulator which we recommend for simulating the OpenRISC
+platform.  Please follow the OpenRISC instructions on the QEMU website to get
+Linux running on QEMU.  You can build QEMU yourself, but your Linux distribution
+likely provides binary packages to support OpenRISC.
 
-	sim -f arch/openrisc/or1ksim.cfg vmlinux
+	qemu openrisc	https://wiki.qemu.org/Documentation/Platforms/OpenRISC
 
 ---------------------------------------------------------------------
 
diff --git a/arch/openrisc/TODO.openrisc b/Documentation/openrisc/TODO
similarity index 100%
rename from arch/openrisc/TODO.openrisc
rename to Documentation/openrisc/TODO
diff --git a/MAINTAINERS b/MAINTAINERS
index 082679a4bcc2a46bcf7f3f66ddc42fa80bb25c7b..e7aa8379b8900aa5f8e63220e150820c729aaa81 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10037,7 +10037,11 @@ T:	git git://github.com/openrisc/linux.git
 L:	openrisc@lists.librecores.org
 W:	http://openrisc.io
 S:	Maintained
+F:	Documentation/devicetree/bindings/openrisc/
+F:	Documentation/openrisc/
 F:	arch/openrisc/
+F:	drivers/irqchip/irq-ompic.c
+F:	drivers/irqchip/irq-or1k-*
 
 OPENVSWITCH
 M:	Pravin Shelar <pshelar@nicira.com>
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index df2136ab1dcca223481ad65633368a8cf037359a..339df7324e9c2004bf0b182fc274b4f5c1497d54 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -22,13 +22,19 @@ config OPENRISC
 	select HAVE_UID16
 	select GENERIC_ATOMIC64
 	select GENERIC_CLOCKEVENTS
+	select GENERIC_CLOCKEVENTS_BROADCAST
 	select GENERIC_STRNCPY_FROM_USER
 	select GENERIC_STRNLEN_USER
+	select GENERIC_SMP_IDLE_THREAD
 	select MODULES_USE_ELF_RELA
 	select HAVE_DEBUG_STACKOVERFLOW
 	select OR1K_PIC
 	select CPU_NO_EFFICIENT_FFS if !OPENRISC_HAVE_INST_FF1
 	select NO_BOOTMEM
+	select ARCH_USE_QUEUED_SPINLOCKS
+	select ARCH_USE_QUEUED_RWLOCKS
+	select OMPIC if SMP
+	select ARCH_WANT_FRAME_POINTERS
 
 config CPU_BIG_ENDIAN
 	def_bool y
@@ -56,6 +62,12 @@ config TRACE_IRQFLAGS_SUPPORT
 config GENERIC_CSUM
         def_bool y
 
+config STACKTRACE_SUPPORT
+	def_bool y
+
+config LOCKDEP_SUPPORT
+	def_bool  y
+
 source "init/Kconfig"
 
 source "kernel/Kconfig.freezer"
@@ -73,6 +85,17 @@ config OR1K_1200
 
 endchoice
 
+config DCACHE_WRITETHROUGH
+	bool "Have write through data caches"
+	default n
+	help
+	  Select this if your implementation features write through data caches.
+	  Selecting 'N' here will allow the kernel to force flushing of data
+	  caches at relevant times. Most OpenRISC implementations support write-
+	  through data caches.
+
+	  If unsure say N here
+
 config OPENRISC_BUILTIN_DTB
         string "Builtin DTB"
         default ""
@@ -105,8 +128,19 @@ config OPENRISC_HAVE_INST_DIV
 endmenu
 
 config NR_CPUS
-	int
-	default "1"
+	int "Maximum number of CPUs (2-32)"
+	range 2 32
+	depends on SMP
+	default "2"
+
+config SMP
+	bool "Symmetric Multi-Processing support"
+	help
+	  This enables support for systems with more than one CPU. If you have
+	  a system with only one CPU, say N. If you have a system with more
+	  than one CPU, say Y.
+
+	  If you don't know what to do here, say N.
 
 source kernel/Kconfig.hz
 source kernel/Kconfig.preempt
@@ -125,6 +159,17 @@ config OPENRISC_NO_SPR_SR_DSX
 	  Say N here if you know that your OpenRISC processor has
 	  SPR_SR_DSX bit implemented. Say Y if you are unsure.
 
+config OPENRISC_HAVE_SHADOW_GPRS
+	bool "Support for shadow gpr files" if !SMP
+	default y if SMP
+	help
+	  Say Y here if your OpenRISC processor features shadowed
+	  register files. They will in such case be used as a
+	  scratch reg storage on exception entry.
+
+	  On SMP systems, this feature is mandatory.
+	  On a unicore system it's safe to say N here if you are unsure.
+
 config CMDLINE
         string "Default kernel command string"
         default ""
diff --git a/arch/openrisc/Makefile b/arch/openrisc/Makefile
index 89076a66eee266d79782949f93a9cbba30371b1a..cf88029628643b1120b15407eab08e50f44886fc 100644
--- a/arch/openrisc/Makefile
+++ b/arch/openrisc/Makefile
@@ -25,6 +25,7 @@ LDFLAGS_vmlinux :=
 LIBGCC 		:= $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
 KBUILD_CFLAGS	+= -pipe -ffixed-r10 -D__linux__
+CHECKFLAGS	+= -mbig-endian
 
 ifeq ($(CONFIG_OPENRISC_HAVE_INST_MUL),y)
 	KBUILD_CFLAGS += $(call cc-option,-mhard-mul)
diff --git a/arch/openrisc/boot/dts/or1ksim.dts b/arch/openrisc/boot/dts/or1ksim.dts
index 9f4b856da5805db4c01b42ba387f3c913536a426..d8aa8309c9d30981c9129b8610fd4b306e884f8d 100644
--- a/arch/openrisc/boot/dts/or1ksim.dts
+++ b/arch/openrisc/boot/dts/or1ksim.dts
@@ -6,8 +6,13 @@
 	#size-cells = <1>;
 	interrupt-parent = <&pic>;
 
+	aliases {
+		uart0 = &serial0;
+	};
+
 	chosen {
-		bootargs = "console=uart,mmio,0x90000000,115200";
+		bootargs = "earlycon";
+		stdout-path = "uart0:115200";
 	};
 
 	memory@0 {
diff --git a/arch/openrisc/boot/dts/simple_smp.dts b/arch/openrisc/boot/dts/simple_smp.dts
new file mode 100644
index 0000000000000000000000000000000000000000..defbb92714ec1746e55799d97f6746dfbd478246
--- /dev/null
+++ b/arch/openrisc/boot/dts/simple_smp.dts
@@ -0,0 +1,63 @@
+/dts-v1/;
+/ {
+	compatible = "opencores,or1ksim";
+	#address-cells = <1>;
+	#size-cells = <1>;
+	interrupt-parent = <&pic>;
+
+	aliases {
+		uart0 = &serial0;
+	};
+
+	chosen {
+		bootargs = "earlycon";
+		stdout-path = "uart0:115200";
+	};
+
+	memory@0 {
+		device_type = "memory";
+		reg = <0x00000000 0x02000000>;
+	};
+
+	cpus {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		cpu@0 {
+			compatible = "opencores,or1200-rtlsvn481";
+			reg = <0>;
+			clock-frequency = <20000000>;
+		};
+		cpu@1 {
+			compatible = "opencores,or1200-rtlsvn481";
+			reg = <1>;
+			clock-frequency = <20000000>;
+		};
+	};
+
+	ompic: ompic@98000000 {
+		compatible = "openrisc,ompic";
+		reg = <0x98000000 16>;
+		interrupt-controller;
+		#interrupt-cells = <0>;
+		interrupts = <1>;
+	};
+
+	/*
+	 * OR1K PIC is built into CPU and accessed via special purpose
+	 * registers.  It is not addressable and, hence, has no 'reg'
+	 * property.
+	 */
+	pic: pic {
+		compatible = "opencores,or1k-pic-level";
+		#interrupt-cells = <1>;
+		interrupt-controller;
+	};
+
+	serial0: serial@90000000 {
+		compatible = "opencores,uart16550-rtlsvn105", "ns16550a";
+		reg = <0x90000000 0x100>;
+		interrupts = <2>;
+		clock-frequency = <20000000>;
+	};
+
+};
diff --git a/arch/openrisc/configs/simple_smp_defconfig b/arch/openrisc/configs/simple_smp_defconfig
new file mode 100644
index 0000000000000000000000000000000000000000..b6e3c7e158e7e51ef6b6de10acd1205f7b938f86
--- /dev/null
+++ b/arch/openrisc/configs/simple_smp_defconfig
@@ -0,0 +1,66 @@
+CONFIG_CROSS_COMPILE="or1k-linux-"
+CONFIG_LOCALVERSION="-simple-smp"
+CONFIG_NO_HZ=y
+CONFIG_LOG_BUF_SHIFT=14
+CONFIG_BLK_DEV_INITRD=y
+# CONFIG_RD_GZIP is not set
+# CONFIG_RD_BZIP2 is not set
+# CONFIG_RD_LZMA is not set
+# CONFIG_RD_XZ is not set
+# CONFIG_RD_LZO is not set
+# CONFIG_RD_LZ4 is not set
+CONFIG_EXPERT=y
+# CONFIG_KALLSYMS is not set
+# CONFIG_EPOLL is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_AIO is not set
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLOB=y
+CONFIG_MODULES=y
+# CONFIG_BLOCK is not set
+CONFIG_OPENRISC_BUILTIN_DTB="simple_smp"
+CONFIG_SMP=y
+CONFIG_HZ_100=y
+CONFIG_OPENRISC_HAVE_SHADOW_GPRS=y
+CONFIG_NET=y
+CONFIG_PACKET=y
+CONFIG_UNIX=y
+CONFIG_INET=y
+# CONFIG_INET_XFRM_MODE_TRANSPORT is not set
+# CONFIG_INET_XFRM_MODE_TUNNEL is not set
+# CONFIG_INET_XFRM_MODE_BEET is not set
+# CONFIG_INET_DIAG is not set
+CONFIG_TCP_CONG_ADVANCED=y
+# CONFIG_TCP_CONG_BIC is not set
+# CONFIG_TCP_CONG_CUBIC is not set
+# CONFIG_TCP_CONG_WESTWOOD is not set
+# CONFIG_TCP_CONG_HTCP is not set
+# CONFIG_IPV6 is not set
+# CONFIG_WIRELESS is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_PREVENT_FIRMWARE_BUILD is not set
+# CONFIG_FW_LOADER is not set
+CONFIG_NETDEVICES=y
+CONFIG_ETHOC=y
+CONFIG_MICREL_PHY=y
+# CONFIG_WLAN is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+# CONFIG_HW_RANDOM is not set
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+# CONFIG_DNOTIFY is not set
+CONFIG_TMPFS=y
+CONFIG_NFS_FS=y
+CONFIG_XZ_DEC=y
+# CONFIG_ENABLE_WARN_DEPRECATED is not set
+# CONFIG_ENABLE_MUST_CHECK is not set
+# CONFIG_RCU_TRACE is not set
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 5bea416a779270713ea614d3260e8f16fb0c766a..6eb16719549e49eaecc9febe46b071081d50cc98 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -1,7 +1,6 @@
 generic-y += barrier.h
 generic-y += bug.h
 generic-y += bugs.h
-generic-y += cacheflush.h
 generic-y += checksum.h
 generic-y += clkdev.h
 generic-y += current.h
@@ -28,6 +27,10 @@ generic-y += module.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
+generic-y += qspinlock_types.h
+generic-y += qspinlock.h
+generic-y += qrwlock_types.h
+generic-y += qrwlock.h
 generic-y += sections.h
 generic-y += segment.h
 generic-y += string.h
diff --git a/arch/openrisc/include/asm/cacheflush.h b/arch/openrisc/include/asm/cacheflush.h
new file mode 100644
index 0000000000000000000000000000000000000000..70f46fd7a074c67dd27af5a2e2c6b82e53a9d3e1
--- /dev/null
+++ b/arch/openrisc/include/asm/cacheflush.h
@@ -0,0 +1,96 @@
+/*
+ * OpenRISC Linux
+ *
+ * Linux architectural port borrowing liberally from similar works of
+ * others.  All original copyrights apply as per the original source
+ * declaration.
+ *
+ * OpenRISC implementation:
+ * Copyright (C) Jan Henrik Weinstock <jan.weinstock@rwth-aachen.de>
+ * et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __ASM_CACHEFLUSH_H
+#define __ASM_CACHEFLUSH_H
+
+#include <linux/mm.h>
+
+/*
+ * Helper function for flushing or invalidating entire pages from data
+ * and instruction caches. SMP needs a little extra work, since we need
+ * to flush the pages on all cpus.
+ */
+extern void local_dcache_page_flush(struct page *page);
+extern void local_icache_page_inv(struct page *page);
+
+/*
+ * Data cache flushing always happen on the local cpu. Instruction cache
+ * invalidations need to be broadcasted to all other cpu in the system in
+ * case of SMP configurations.
+ */
+#ifndef CONFIG_SMP
+#define dcache_page_flush(page)      local_dcache_page_flush(page)
+#define icache_page_inv(page)        local_icache_page_inv(page)
+#else  /* CONFIG_SMP */
+#define dcache_page_flush(page)      local_dcache_page_flush(page)
+#define icache_page_inv(page)        smp_icache_page_inv(page)
+extern void smp_icache_page_inv(struct page *page);
+#endif /* CONFIG_SMP */
+
+/*
+ * Synchronizes caches. Whenever a cpu writes executable code to memory, this
+ * should be called to make sure the processor sees the newly written code.
+ */
+static inline void sync_icache_dcache(struct page *page)
+{
+	if (!IS_ENABLED(CONFIG_DCACHE_WRITETHROUGH))
+		dcache_page_flush(page);
+	icache_page_inv(page);
+}
+
+/*
+ * Pages with this bit set need not be flushed/invalidated, since
+ * they have not changed since last flush. New pages start with
+ * PG_arch_1 not set and are therefore dirty by default.
+ */
+#define PG_dc_clean                  PG_arch_1
+
+#define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE 1
+static inline void flush_dcache_page(struct page *page)
+{
+	clear_bit(PG_dc_clean, &page->flags);
+}
+
+/*
+ * Other interfaces are not required since we do not have virtually
+ * indexed or tagged caches. So we can use the default here.
+ */
+#define flush_cache_all()				do { } while (0)
+#define flush_cache_mm(mm)				do { } while (0)
+#define flush_cache_dup_mm(mm)				do { } while (0)
+#define flush_cache_range(vma, start, end)		do { } while (0)
+#define flush_cache_page(vma, vmaddr, pfn)		do { } while (0)
+#define flush_dcache_mmap_lock(mapping)			do { } while (0)
+#define flush_dcache_mmap_unlock(mapping)		do { } while (0)
+#define flush_icache_range(start, end)			do { } while (0)
+#define flush_icache_page(vma, pg)			do { } while (0)
+#define flush_icache_user_range(vma, pg, adr, len)	do { } while (0)
+#define flush_cache_vmap(start, end)			do { } while (0)
+#define flush_cache_vunmap(start, end)			do { } while (0)
+
+#define copy_to_user_page(vma, page, vaddr, dst, src, len)           \
+	do {                                                         \
+		memcpy(dst, src, len);                               \
+		if (vma->vm_flags & VM_EXEC)                         \
+			sync_icache_dcache(page);                    \
+	} while (0)
+
+#define copy_from_user_page(vma, page, vaddr, dst, src, len)         \
+	memcpy(dst, src, len)
+
+#endif /* __ASM_CACHEFLUSH_H */
diff --git a/arch/openrisc/include/asm/cmpxchg.h b/arch/openrisc/include/asm/cmpxchg.h
index f0a5d8b844d6b85b16eb6c170f8af86f73ad8440..d29f7db53906b77d19dd920ad98bb37e41b45e4a 100644
--- a/arch/openrisc/include/asm/cmpxchg.h
+++ b/arch/openrisc/include/asm/cmpxchg.h
@@ -1,32 +1,29 @@
 /*
+ * 1,2 and 4 byte cmpxchg and xchg implementations for OpenRISC.
+ *
  * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ * Copyright (C) 2017 Stafford Horne <shorne@gmail.com>
  *
  * This file is licensed under the terms of the GNU General Public License
  * version 2.  This program is licensed "as is" without any warranty of any
  * kind, whether express or implied.
+ *
+ * Note:
+ * The portable implementations of 1 and 2 byte xchg and cmpxchg using a 4
+ * byte cmpxchg is sourced heavily from the sh and mips implementations.
  */
 
 #ifndef __ASM_OPENRISC_CMPXCHG_H
 #define __ASM_OPENRISC_CMPXCHG_H
 
 #include  <linux/types.h>
-
-/*
- * This function doesn't exist, so you'll get a linker error
- * if something tries to do an invalid cmpxchg().
- */
-extern void __cmpxchg_called_with_bad_pointer(void);
+#include  <linux/bitops.h>
 
 #define __HAVE_ARCH_CMPXCHG 1
 
-static inline unsigned long
-__cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
+static inline unsigned long cmpxchg_u32(volatile void *ptr,
+		unsigned long old, unsigned long new)
 {
-	if (size != 4) {
-		__cmpxchg_called_with_bad_pointer();
-		return old;
-	}
-
 	__asm__ __volatile__(
 		"1:	l.lwa %0, 0(%1)		\n"
 		"	l.sfeq %0, %2		\n"
@@ -43,6 +40,97 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
 	return old;
 }
 
+static inline unsigned long xchg_u32(volatile void *ptr,
+		unsigned long val)
+{
+	__asm__ __volatile__(
+		"1:	l.lwa %0, 0(%1)		\n"
+		"	l.swa 0(%1), %2		\n"
+		"	l.bnf 1b		\n"
+		"	 l.nop			\n"
+		: "=&r"(val)
+		: "r"(ptr), "r"(val)
+		: "cc", "memory");
+
+	return val;
+}
+
+static inline u32 cmpxchg_small(volatile void *ptr, u32 old, u32 new,
+				int size)
+{
+	int off = (unsigned long)ptr % sizeof(u32);
+	volatile u32 *p = ptr - off;
+#ifdef __BIG_ENDIAN
+	int bitoff = (sizeof(u32) - size - off) * BITS_PER_BYTE;
+#else
+	int bitoff = off * BITS_PER_BYTE;
+#endif
+	u32 bitmask = ((0x1 << size * BITS_PER_BYTE) - 1) << bitoff;
+	u32 load32, old32, new32;
+	u32 ret;
+
+	load32 = READ_ONCE(*p);
+
+	while (true) {
+		ret = (load32 & bitmask) >> bitoff;
+		if (old != ret)
+			return ret;
+
+		old32 = (load32 & ~bitmask) | (old << bitoff);
+		new32 = (load32 & ~bitmask) | (new << bitoff);
+
+		/* Do 32 bit cmpxchg */
+		load32 = cmpxchg_u32(p, old32, new32);
+		if (load32 == old32)
+			return old;
+	}
+}
+
+/* xchg */
+
+static inline u32 xchg_small(volatile void *ptr, u32 x, int size)
+{
+	int off = (unsigned long)ptr % sizeof(u32);
+	volatile u32 *p = ptr - off;
+#ifdef __BIG_ENDIAN
+	int bitoff = (sizeof(u32) - size - off) * BITS_PER_BYTE;
+#else
+	int bitoff = off * BITS_PER_BYTE;
+#endif
+	u32 bitmask = ((0x1 << size * BITS_PER_BYTE) - 1) << bitoff;
+	u32 oldv, newv;
+	u32 ret;
+
+	do {
+		oldv = READ_ONCE(*p);
+		ret = (oldv & bitmask) >> bitoff;
+		newv = (oldv & ~bitmask) | (x << bitoff);
+	} while (cmpxchg_u32(p, oldv, newv) != oldv);
+
+	return ret;
+}
+
+/*
+ * This function doesn't exist, so you'll get a linker error
+ * if something tries to do an invalid cmpxchg().
+ */
+extern unsigned long __cmpxchg_called_with_bad_pointer(void)
+	__compiletime_error("Bad argument size for cmpxchg");
+
+static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+		unsigned long new, int size)
+{
+	switch (size) {
+	case 1:
+	case 2:
+		return cmpxchg_small(ptr, old, new, size);
+	case 4:
+		return cmpxchg_u32(ptr, old, new);
+	default:
+		return __cmpxchg_called_with_bad_pointer();
+	}
+}
+
 #define cmpxchg(ptr, o, n)						\
 	({								\
 		(__typeof__(*(ptr))) __cmpxchg((ptr),			\
@@ -55,32 +143,27 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size)
  * This function doesn't exist, so you'll get a linker error if
  * something tries to do an invalidly-sized xchg().
  */
-extern void __xchg_called_with_bad_pointer(void);
+extern unsigned long __xchg_called_with_bad_pointer(void)
+	__compiletime_error("Bad argument size for xchg");
 
-static inline unsigned long __xchg(unsigned long val, volatile void *ptr,
-				   int size)
+static inline unsigned long __xchg(volatile void *ptr, unsigned long with,
+		int size)
 {
-	if (size != 4) {
-		__xchg_called_with_bad_pointer();
-		return val;
+	switch (size) {
+	case 1:
+	case 2:
+		return xchg_small(ptr, with, size);
+	case 4:
+		return xchg_u32(ptr, with);
+	default:
+		return __xchg_called_with_bad_pointer();
 	}
-
-	__asm__ __volatile__(
-		"1:	l.lwa %0, 0(%1)		\n"
-		"	l.swa 0(%1), %2		\n"
-		"	l.bnf 1b		\n"
-		"	 l.nop			\n"
-		: "=&r"(val)
-		: "r"(ptr), "r"(val)
-		: "cc", "memory");
-
-	return val;
 }
 
 #define xchg(ptr, with) 						\
 	({								\
-		(__typeof__(*(ptr))) __xchg((unsigned long)(with),	\
-					    (ptr),			\
+		(__typeof__(*(ptr))) __xchg((ptr),			\
+					    (unsigned long)(with),	\
 					    sizeof(*(ptr)));		\
 	})
 
diff --git a/arch/openrisc/include/asm/cpuinfo.h b/arch/openrisc/include/asm/cpuinfo.h
index ec10679d6429d5f67bf43f77e990c6eecf0a004f..4ea0a33eba6c3f493e4a94f45565c9068dfd1be7 100644
--- a/arch/openrisc/include/asm/cpuinfo.h
+++ b/arch/openrisc/include/asm/cpuinfo.h
@@ -19,7 +19,7 @@
 #ifndef __ASM_OPENRISC_CPUINFO_H
 #define __ASM_OPENRISC_CPUINFO_H
 
-struct cpuinfo {
+struct cpuinfo_or1k {
 	u32 clock_frequency;
 
 	u32 icache_size;
@@ -29,8 +29,11 @@ struct cpuinfo {
 	u32 dcache_size;
 	u32 dcache_block_size;
 	u32 dcache_ways;
+
+	u16 coreid;
 };
 
-extern struct cpuinfo cpuinfo;
+extern struct cpuinfo_or1k cpuinfo_or1k[NR_CPUS];
+extern void setup_cpuinfo(void);
 
 #endif /* __ASM_OPENRISC_CPUINFO_H */
diff --git a/arch/openrisc/include/asm/mmu_context.h b/arch/openrisc/include/asm/mmu_context.h
index e94b814d2e3c4181561404550215e23d38e42147..c380d8caf84f6f1149b0925817fb0a7f27062562 100644
--- a/arch/openrisc/include/asm/mmu_context.h
+++ b/arch/openrisc/include/asm/mmu_context.h
@@ -34,7 +34,7 @@ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
  * registers like cr3 on the i386
  */
 
-extern volatile pgd_t *current_pgd;   /* defined in arch/openrisc/mm/fault.c */
+extern volatile pgd_t *current_pgd[]; /* defined in arch/openrisc/mm/fault.c */
 
 static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
 {
diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h
index 71a6f08de8f21a896f955f7a95792b7e62054a3a..21c71303012f7a17796a0f7961459da4453c59fb 100644
--- a/arch/openrisc/include/asm/pgtable.h
+++ b/arch/openrisc/include/asm/pgtable.h
@@ -94,7 +94,7 @@ extern void paging_init(void);
  * 64 MB of vmalloc area is comparable to what's available on other arches.
  */
 
-#define VMALLOC_START	(PAGE_OFFSET-0x04000000)
+#define VMALLOC_START	(PAGE_OFFSET-0x04000000UL)
 #define VMALLOC_END	(PAGE_OFFSET)
 #define VMALLOC_VMADDR(x) ((unsigned long)(x))
 
@@ -416,15 +416,19 @@ extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; /* defined in head.S */
 
 struct vm_area_struct;
 
-/*
- * or32 doesn't have any external MMU info: the kernel page
- * tables contain all the necessary information.
- *
- * Actually I am not sure on what this could be used for.
- */
+static inline void update_tlb(struct vm_area_struct *vma,
+	unsigned long address, pte_t *pte)
+{
+}
+
+extern void update_cache(struct vm_area_struct *vma,
+	unsigned long address, pte_t *pte);
+
 static inline void update_mmu_cache(struct vm_area_struct *vma,
 	unsigned long address, pte_t *pte)
 {
+	update_tlb(vma, address, pte);
+	update_cache(vma, address, pte);
 }
 
 /* __PHX__ FIXME, SWAP, this probably doesn't work */
diff --git a/arch/openrisc/include/asm/serial.h b/arch/openrisc/include/asm/serial.h
index 270a45241639c6899ec43d5e52a577d93241ffa7..cb5932f5447a2cbeb08ba59bf399e92491171172 100644
--- a/arch/openrisc/include/asm/serial.h
+++ b/arch/openrisc/include/asm/serial.h
@@ -29,7 +29,7 @@
  * it needs to be correct to get the early console working.
  */
 
-#define BASE_BAUD (cpuinfo.clock_frequency/16)
+#define BASE_BAUD (cpuinfo_or1k[smp_processor_id()].clock_frequency/16)
 
 #endif /* __KERNEL__ */
 
diff --git a/arch/openrisc/include/asm/smp.h b/arch/openrisc/include/asm/smp.h
new file mode 100644
index 0000000000000000000000000000000000000000..e21d2f12b5b67a8937474e6d6097703ceb3a1ae9
--- /dev/null
+++ b/arch/openrisc/include/asm/smp.h
@@ -0,0 +1,26 @@
+/*
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_OPENRISC_SMP_H
+#define __ASM_OPENRISC_SMP_H
+
+#include <asm/spr.h>
+#include <asm/spr_defs.h>
+
+#define raw_smp_processor_id()	(current_thread_info()->cpu)
+#define hard_smp_processor_id()	mfspr(SPR_COREID)
+
+extern void smp_init_cpus(void);
+
+extern void arch_send_call_function_single_ipi(int cpu);
+extern void arch_send_call_function_ipi_mask(const struct cpumask *mask);
+
+extern void set_smp_cross_call(void (*)(const struct cpumask *, unsigned int));
+extern void handle_IPI(unsigned int ipi_msg);
+
+#endif /* __ASM_OPENRISC_SMP_H */
diff --git a/arch/openrisc/include/asm/spinlock.h b/arch/openrisc/include/asm/spinlock.h
index fd00a3a24123e7efd7359beed04ae7ff2894af95..9b761e0e22c358230b7d6a305281988a142cb6a8 100644
--- a/arch/openrisc/include/asm/spinlock.h
+++ b/arch/openrisc/include/asm/spinlock.h
@@ -19,6 +19,16 @@
 #ifndef __ASM_OPENRISC_SPINLOCK_H
 #define __ASM_OPENRISC_SPINLOCK_H
 
-#error "or32 doesn't do SMP yet"
+#include <asm/qspinlock.h>
+
+#include <asm/qrwlock.h>
+
+#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
+#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
+
+#define arch_spin_relax(lock)	cpu_relax()
+#define arch_read_relax(lock)	cpu_relax()
+#define arch_write_relax(lock)	cpu_relax()
+
 
 #endif
diff --git a/arch/openrisc/include/asm/spinlock_types.h b/arch/openrisc/include/asm/spinlock_types.h
new file mode 100644
index 0000000000000000000000000000000000000000..7c6fb1208c888ab1c00dc328039fb9d3b064bb76
--- /dev/null
+++ b/arch/openrisc/include/asm/spinlock_types.h
@@ -0,0 +1,7 @@
+#ifndef _ASM_OPENRISC_SPINLOCK_TYPES_H
+#define _ASM_OPENRISC_SPINLOCK_TYPES_H
+
+#include <asm/qspinlock_types.h>
+#include <asm/qrwlock_types.h>
+
+#endif /* _ASM_OPENRISC_SPINLOCK_TYPES_H */
diff --git a/arch/openrisc/include/asm/spr_defs.h b/arch/openrisc/include/asm/spr_defs.h
index 367dac70326af01731e8d841fa8fd5ce49c36222..154b5a1ee579ae891a8bd4d92707a153def0b56b 100644
--- a/arch/openrisc/include/asm/spr_defs.h
+++ b/arch/openrisc/include/asm/spr_defs.h
@@ -51,6 +51,11 @@
 #define SPR_ICCFGR	(SPRGROUP_SYS + 6)
 #define SPR_DCFGR	(SPRGROUP_SYS + 7)
 #define SPR_PCCFGR	(SPRGROUP_SYS + 8)
+#define SPR_VR2		(SPRGROUP_SYS + 9)
+#define SPR_AVR		(SPRGROUP_SYS + 10)
+#define SPR_EVBAR	(SPRGROUP_SYS + 11)
+#define SPR_AECR	(SPRGROUP_SYS + 12)
+#define SPR_AESR	(SPRGROUP_SYS + 13)
 #define SPR_NPC         (SPRGROUP_SYS + 16)  /* CZ 21/06/01 */
 #define SPR_SR		(SPRGROUP_SYS + 17)  /* CZ 21/06/01 */
 #define SPR_PPC         (SPRGROUP_SYS + 18)  /* CZ 21/06/01 */
@@ -61,6 +66,8 @@
 #define SPR_EEAR_LAST	(SPRGROUP_SYS + 63)
 #define SPR_ESR_BASE	(SPRGROUP_SYS + 64)
 #define SPR_ESR_LAST	(SPRGROUP_SYS + 79)
+#define SPR_COREID	(SPRGROUP_SYS + 128)
+#define SPR_NUMCORES	(SPRGROUP_SYS + 129)
 #define SPR_GPR_BASE	(SPRGROUP_SYS + 1024)
 
 /* Data MMU group */
@@ -135,11 +142,18 @@
 #define SPR_VR_CFG	0x00ff0000  /* Processor configuration */
 #define SPR_VR_RES	0x0000ffc0  /* Reserved */
 #define SPR_VR_REV	0x0000003f  /* Processor revision */
+#define SPR_VR_UVRP	0x00000040  /* Updated Version Registers Present */
 
 #define SPR_VR_VER_OFF	24
 #define SPR_VR_CFG_OFF	16
 #define SPR_VR_REV_OFF	0
 
+/*
+ * Bit definitions for the Version Register 2
+ */
+#define SPR_VR2_CPUID	0xff000000  /* Processor ID */
+#define SPR_VR2_VER	0x00ffffff  /* Processor version */
+
 /*
  * Bit definitions for the Unit Present Register
  *
diff --git a/arch/openrisc/include/asm/thread_info.h b/arch/openrisc/include/asm/thread_info.h
index 6e619a79a4010bf63767205c289d841a567ae124..c229aa6bb502b744e748d2e37aeff7625a19ce47 100644
--- a/arch/openrisc/include/asm/thread_info.h
+++ b/arch/openrisc/include/asm/thread_info.h
@@ -74,7 +74,7 @@ struct thread_info {
 	.task		= &tsk,				\
 	.flags		= 0,				\
 	.cpu		= 0,				\
-	.preempt_count	= 1,				\
+	.preempt_count	= INIT_PREEMPT_COUNT,		\
 	.addr_limit	= KERNEL_DS,			\
 	.ksp            = 0,                            \
 }
diff --git a/arch/openrisc/include/asm/time.h b/arch/openrisc/include/asm/time.h
new file mode 100644
index 0000000000000000000000000000000000000000..313ee975774bb12d29b0538012f3872280ae8db0
--- /dev/null
+++ b/arch/openrisc/include/asm/time.h
@@ -0,0 +1,23 @@
+/*
+ * OpenRISC timer API
+ *
+ * Copyright (C) 2017 by Stafford Horne (shorne@gmail.com)
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ */
+#ifndef __ASM_OR1K_TIME_H
+#define __ASM_OR1K_TIME_H
+
+extern void openrisc_clockevent_init(void);
+
+extern void openrisc_timer_set(unsigned long count);
+extern void openrisc_timer_set_next(unsigned long delta);
+
+#ifdef CONFIG_SMP
+extern void synchronise_count_master(int cpu);
+extern void synchronise_count_slave(int cpu);
+#endif
+
+#endif /* __ASM_OR1K_TIME_H */
diff --git a/arch/openrisc/include/asm/tlbflush.h b/arch/openrisc/include/asm/tlbflush.h
index 6a2accd6cb6735da35b084078ee0c4a4383436a3..94227f0eaf6da5c0aa85b8f49548984831c0a1bd 100644
--- a/arch/openrisc/include/asm/tlbflush.h
+++ b/arch/openrisc/include/asm/tlbflush.h
@@ -33,13 +33,26 @@
  *  - flush_tlb_page(vma, vmaddr) flushes one page
  *  - flush_tlb_range(mm, start, end) flushes a range of pages
  */
+extern void local_flush_tlb_all(void);
+extern void local_flush_tlb_mm(struct mm_struct *mm);
+extern void local_flush_tlb_page(struct vm_area_struct *vma,
+				 unsigned long addr);
+extern void local_flush_tlb_range(struct vm_area_struct *vma,
+				  unsigned long start,
+				  unsigned long end);
 
-void flush_tlb_all(void);
-void flush_tlb_mm(struct mm_struct *mm);
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
-void flush_tlb_range(struct vm_area_struct *vma,
-		     unsigned long start,
-		     unsigned long end);
+#ifndef CONFIG_SMP
+#define flush_tlb_all	local_flush_tlb_all
+#define flush_tlb_mm	local_flush_tlb_mm
+#define flush_tlb_page	local_flush_tlb_page
+#define flush_tlb_range	local_flush_tlb_range
+#else
+extern void flush_tlb_all(void);
+extern void flush_tlb_mm(struct mm_struct *mm);
+extern void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr);
+extern void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
+			    unsigned long end);
+#endif
 
 static inline void flush_tlb(void)
 {
diff --git a/arch/openrisc/include/asm/unwinder.h b/arch/openrisc/include/asm/unwinder.h
new file mode 100644
index 0000000000000000000000000000000000000000..165ec6f02ab8cfccf001c063d5a88dea18b24a2c
--- /dev/null
+++ b/arch/openrisc/include/asm/unwinder.h
@@ -0,0 +1,20 @@
+/*
+ * OpenRISC unwinder.h
+ *
+ * Architecture API for unwinding stacks.
+ *
+ * Copyright (C) 2017 Stafford Horne <shorne@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#ifndef __ASM_OPENRISC_UNWINDER_H
+#define __ASM_OPENRISC_UNWINDER_H
+
+void unwind_stack(void *data, unsigned long *stack,
+		  void (*trace)(void *data, unsigned long addr,
+				int reliable));
+
+#endif /* __ASM_OPENRISC_UNWINDER_H */
diff --git a/arch/openrisc/kernel/Makefile b/arch/openrisc/kernel/Makefile
index c4ea6cabad4623fc3e65f78121ecefaa49f61edb..2d172e79f58d014f344446d8849bb7645dfabd7a 100644
--- a/arch/openrisc/kernel/Makefile
+++ b/arch/openrisc/kernel/Makefile
@@ -7,8 +7,10 @@ extra-y	:= head.o vmlinux.lds
 
 obj-y	:= setup.o or32_ksyms.o process.o dma.o \
 	   traps.o time.o irq.o entry.o ptrace.o signal.o \
-	   sys_call_table.o
+	   sys_call_table.o unwinder.o
 
+obj-$(CONFIG_SMP)		+= smp.o sync-timer.o
+obj-$(CONFIG_STACKTRACE)	+= stacktrace.o
 obj-$(CONFIG_MODULES)		+= module.o
 obj-$(CONFIG_OF)		+= prom.o
 
diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c
index b10369b7e31b287796482fe65d83e89ed97e1d33..a945f00011b426e501bf2ac1e2bce398a78a7487 100644
--- a/arch/openrisc/kernel/dma.c
+++ b/arch/openrisc/kernel/dma.c
@@ -32,6 +32,7 @@ page_set_nocache(pte_t *pte, unsigned long addr,
 		 unsigned long next, struct mm_walk *walk)
 {
 	unsigned long cl;
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
 
 	pte_val(*pte) |= _PAGE_CI;
 
@@ -42,7 +43,7 @@ page_set_nocache(pte_t *pte, unsigned long addr,
 	flush_tlb_page(NULL, addr);
 
 	/* Flush page out of dcache */
-	for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo.dcache_block_size)
+	for (cl = __pa(addr); cl < __pa(next); cl += cpuinfo->dcache_block_size)
 		mtspr(SPR_DCBFR, cl);
 
 	return 0;
@@ -140,6 +141,7 @@ or1k_map_page(struct device *dev, struct page *page,
 {
 	unsigned long cl;
 	dma_addr_t addr = page_to_phys(page) + offset;
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
 
 	if (attrs & DMA_ATTR_SKIP_CPU_SYNC)
 		return addr;
@@ -148,13 +150,13 @@ or1k_map_page(struct device *dev, struct page *page,
 	case DMA_TO_DEVICE:
 		/* Flush the dcache for the requested range */
 		for (cl = addr; cl < addr + size;
-		     cl += cpuinfo.dcache_block_size)
+		     cl += cpuinfo->dcache_block_size)
 			mtspr(SPR_DCBFR, cl);
 		break;
 	case DMA_FROM_DEVICE:
 		/* Invalidate the dcache for the requested range */
 		for (cl = addr; cl < addr + size;
-		     cl += cpuinfo.dcache_block_size)
+		     cl += cpuinfo->dcache_block_size)
 			mtspr(SPR_DCBIR, cl);
 		break;
 	default:
@@ -213,9 +215,10 @@ or1k_sync_single_for_cpu(struct device *dev,
 {
 	unsigned long cl;
 	dma_addr_t addr = dma_handle;
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
 
 	/* Invalidate the dcache for the requested range */
-	for (cl = addr; cl < addr + size; cl += cpuinfo.dcache_block_size)
+	for (cl = addr; cl < addr + size; cl += cpuinfo->dcache_block_size)
 		mtspr(SPR_DCBIR, cl);
 }
 
@@ -226,9 +229,10 @@ or1k_sync_single_for_device(struct device *dev,
 {
 	unsigned long cl;
 	dma_addr_t addr = dma_handle;
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
 
 	/* Flush the dcache for the requested range */
-	for (cl = addr; cl < addr + size; cl += cpuinfo.dcache_block_size)
+	for (cl = addr; cl < addr + size; cl += cpuinfo->dcache_block_size)
 		mtspr(SPR_DCBFR, cl);
 }
 
diff --git a/arch/openrisc/kernel/entry.S b/arch/openrisc/kernel/entry.S
index 1b7160c79646be4e136d0f813b4b55c0bfd63827..690d55272ba688a2adc88bca00e66cc61903c711 100644
--- a/arch/openrisc/kernel/entry.S
+++ b/arch/openrisc/kernel/entry.S
@@ -42,6 +42,61 @@
 
 /* =========================================================[ macros ]=== */
 
+#ifdef CONFIG_TRACE_IRQFLAGS
+/*
+ * Trace irq on/off creating a stack frame.
+ */
+#define TRACE_IRQS_OP(trace_op)					\
+	l.sw 	-8(r1),r2	/* store frame pointer */		;\
+	l.sw	-4(r1),r9	/* store return address */		;\
+	l.addi	r2,r1,0		/* move sp to fp */			;\
+	l.jal	trace_op						;\
+	 l.addi	r1,r1,-8						;\
+	l.ori	r1,r2,0		/* restore sp */			;\
+	l.lwz	r9,-4(r1)	/* restore return address */		;\
+	l.lwz	r2,-8(r1)	/* restore fp */			;\
+/*
+ * Trace irq on/off and save registers we need that would otherwise be
+ * clobbered.
+ */
+#define TRACE_IRQS_SAVE(t1,trace_op)					\
+	l.sw	-12(r1),t1	/* save extra reg */			;\
+	l.sw 	-8(r1),r2	/* store frame pointer */		;\
+	l.sw	-4(r1),r9	/* store return address */		;\
+	l.addi	r2,r1,0		/* move sp to fp */			;\
+	l.jal	trace_op						;\
+	 l.addi	r1,r1,-12						;\
+	l.ori	r1,r2,0		/* restore sp */			;\
+	l.lwz	r9,-4(r1)	/* restore return address */		;\
+	l.lwz	r2,-8(r1)	/* restore fp */			;\
+	l.lwz	t1,-12(r1)	/* restore extra reg */
+
+#define TRACE_IRQS_OFF	TRACE_IRQS_OP(trace_hardirqs_off)
+#define TRACE_IRQS_ON	TRACE_IRQS_OP(trace_hardirqs_on)
+#define TRACE_IRQS_ON_SYSCALL						\
+	TRACE_IRQS_SAVE(r10,trace_hardirqs_on)				;\
+	l.lwz	r3,PT_GPR3(r1)						;\
+	l.lwz	r4,PT_GPR4(r1)						;\
+	l.lwz	r5,PT_GPR5(r1)						;\
+	l.lwz	r6,PT_GPR6(r1)						;\
+	l.lwz	r7,PT_GPR7(r1)						;\
+	l.lwz	r8,PT_GPR8(r1)						;\
+	l.lwz	r11,PT_GPR11(r1)
+#define TRACE_IRQS_OFF_ENTRY						\
+	l.lwz	r5,PT_SR(r1)						;\
+	l.andi	r3,r5,(SPR_SR_IEE|SPR_SR_TEE)				;\
+	l.sfeq	r5,r0		/* skip trace if irqs were already off */;\
+	l.bf	1f							;\
+	 l.nop								;\
+	TRACE_IRQS_SAVE(r4,trace_hardirqs_off)				;\
+1:
+#else
+#define TRACE_IRQS_OFF
+#define TRACE_IRQS_ON
+#define TRACE_IRQS_OFF_ENTRY
+#define TRACE_IRQS_ON_SYSCALL
+#endif
+
 /*
  * We need to disable interrupts at beginning of RESTORE_ALL
  * since interrupt might come in after we've loaded EPC return address
@@ -124,6 +179,7 @@ handler:							;\
 	/* r30 already save */					;\
 /*        l.sw    PT_GPR30(r1),r30*/					;\
 	l.sw    PT_GPR31(r1),r31					;\
+	TRACE_IRQS_OFF_ENTRY						;\
 	/* Store -1 in orig_gpr11 for non-syscall exceptions */	;\
 	l.addi	r30,r0,-1					;\
 	l.sw	PT_ORIG_GPR11(r1),r30
@@ -557,9 +613,6 @@ _string_syscall_return:
 	.align 4
 
 ENTRY(_sys_call_handler)
-	/* syscalls run with interrupts enabled */
-	ENABLE_INTERRUPTS(r29)		// enable interrupts, r29 is temp
-
 	/* r1, EPCR, ESR a already saved */
 	l.sw	PT_GPR2(r1),r2
 	/* r3-r8 must be saved because syscall restart relies
@@ -597,6 +650,10 @@ ENTRY(_sys_call_handler)
 /*	l.sw    PT_GPR30(r1),r30 */
 
 _syscall_check_trace_enter:
+	/* syscalls run with interrupts enabled */
+	TRACE_IRQS_ON_SYSCALL
+	ENABLE_INTERRUPTS(r29)		// enable interrupts, r29 is temp
+
 	/* If TIF_SYSCALL_TRACE is set, then we want to do syscall tracing */
 	l.lwz	r30,TI_FLAGS(r10)
 	l.andi	r30,r30,_TIF_SYSCALL_TRACE
@@ -657,6 +714,7 @@ _syscall_check_trace_leave:
 _syscall_check_work:
 	/* Here we need to disable interrupts */
 	DISABLE_INTERRUPTS(r27,r29)
+	TRACE_IRQS_OFF
 	l.lwz	r30,TI_FLAGS(r10)
 	l.andi	r30,r30,_TIF_WORK_MASK
 	l.sfne	r30,r0
@@ -871,6 +929,7 @@ UNHANDLED_EXCEPTION(_vector_0x1f00,0x1f00)
 
 _resume_userspace:
 	DISABLE_INTERRUPTS(r3,r4)
+	TRACE_IRQS_OFF
 	l.lwz	r4,TI_FLAGS(r10)
 	l.andi	r13,r4,_TIF_WORK_MASK
 	l.sfeqi	r13,0
@@ -909,6 +968,15 @@ _work_pending:
 	 l.lwz	r8,PT_GPR8(r1)
 
 _restore_all:
+#ifdef CONFIG_TRACE_IRQFLAGS
+	l.lwz	r4,PT_SR(r1)
+	l.andi	r3,r4,(SPR_SR_IEE|SPR_SR_TEE)
+	l.sfeq	r3,r0		/* skip trace if irqs were off */
+	l.bf	skip_hardirqs_on
+	 l.nop
+	TRACE_IRQS_ON
+skip_hardirqs_on:
+#endif
 	RESTORE_ALL
 	/* This returns to userspace code */
 
diff --git a/arch/openrisc/kernel/head.S b/arch/openrisc/kernel/head.S
index 1e87913576e304e47ea7da5518436d3e0fe3f94e..fb02b2a1d6f2d875372b125cf837feb119d0164e 100644
--- a/arch/openrisc/kernel/head.S
+++ b/arch/openrisc/kernel/head.S
@@ -49,9 +49,31 @@
 
 /* ============================================[ tmp store locations ]=== */
 
+#define SPR_SHADOW_GPR(x)	((x) + SPR_GPR_BASE + 32)
+
 /*
  * emergency_print temporary stores
  */
+#ifdef CONFIG_OPENRISC_HAVE_SHADOW_GPRS
+#define EMERGENCY_PRINT_STORE_GPR4	l.mtspr r0,r4,SPR_SHADOW_GPR(14)
+#define EMERGENCY_PRINT_LOAD_GPR4	l.mfspr r4,r0,SPR_SHADOW_GPR(14)
+
+#define EMERGENCY_PRINT_STORE_GPR5	l.mtspr r0,r5,SPR_SHADOW_GPR(15)
+#define EMERGENCY_PRINT_LOAD_GPR5	l.mfspr r5,r0,SPR_SHADOW_GPR(15)
+
+#define EMERGENCY_PRINT_STORE_GPR6	l.mtspr r0,r6,SPR_SHADOW_GPR(16)
+#define EMERGENCY_PRINT_LOAD_GPR6	l.mfspr r6,r0,SPR_SHADOW_GPR(16)
+
+#define EMERGENCY_PRINT_STORE_GPR7	l.mtspr r0,r7,SPR_SHADOW_GPR(7)
+#define EMERGENCY_PRINT_LOAD_GPR7	l.mfspr r7,r0,SPR_SHADOW_GPR(7)
+
+#define EMERGENCY_PRINT_STORE_GPR8	l.mtspr r0,r8,SPR_SHADOW_GPR(8)
+#define EMERGENCY_PRINT_LOAD_GPR8	l.mfspr r8,r0,SPR_SHADOW_GPR(8)
+
+#define EMERGENCY_PRINT_STORE_GPR9	l.mtspr r0,r9,SPR_SHADOW_GPR(9)
+#define EMERGENCY_PRINT_LOAD_GPR9	l.mfspr r9,r0,SPR_SHADOW_GPR(9)
+
+#else /* !CONFIG_OPENRISC_HAVE_SHADOW_GPRS */
 #define EMERGENCY_PRINT_STORE_GPR4	l.sw    0x20(r0),r4
 #define EMERGENCY_PRINT_LOAD_GPR4	l.lwz   r4,0x20(r0)
 
@@ -70,13 +92,28 @@
 #define EMERGENCY_PRINT_STORE_GPR9	l.sw    0x34(r0),r9
 #define EMERGENCY_PRINT_LOAD_GPR9	l.lwz   r9,0x34(r0)
 
+#endif
 
 /*
  * TLB miss handlers temorary stores
  */
-#define EXCEPTION_STORE_GPR9		l.sw    0x10(r0),r9
-#define EXCEPTION_LOAD_GPR9		l.lwz   r9,0x10(r0)
+#ifdef CONFIG_OPENRISC_HAVE_SHADOW_GPRS
+#define EXCEPTION_STORE_GPR2		l.mtspr r0,r2,SPR_SHADOW_GPR(2)
+#define EXCEPTION_LOAD_GPR2		l.mfspr r2,r0,SPR_SHADOW_GPR(2)
+
+#define EXCEPTION_STORE_GPR3		l.mtspr r0,r3,SPR_SHADOW_GPR(3)
+#define EXCEPTION_LOAD_GPR3		l.mfspr r3,r0,SPR_SHADOW_GPR(3)
+
+#define EXCEPTION_STORE_GPR4		l.mtspr r0,r4,SPR_SHADOW_GPR(4)
+#define EXCEPTION_LOAD_GPR4		l.mfspr r4,r0,SPR_SHADOW_GPR(4)
+
+#define EXCEPTION_STORE_GPR5		l.mtspr r0,r5,SPR_SHADOW_GPR(5)
+#define EXCEPTION_LOAD_GPR5		l.mfspr r5,r0,SPR_SHADOW_GPR(5)
+
+#define EXCEPTION_STORE_GPR6		l.mtspr r0,r6,SPR_SHADOW_GPR(6)
+#define EXCEPTION_LOAD_GPR6		l.mfspr r6,r0,SPR_SHADOW_GPR(6)
 
+#else /* !CONFIG_OPENRISC_HAVE_SHADOW_GPRS */
 #define EXCEPTION_STORE_GPR2		l.sw    0x64(r0),r2
 #define EXCEPTION_LOAD_GPR2		l.lwz   r2,0x64(r0)
 
@@ -92,35 +129,67 @@
 #define EXCEPTION_STORE_GPR6		l.sw    0x74(r0),r6
 #define EXCEPTION_LOAD_GPR6		l.lwz   r6,0x74(r0)
 
+#endif
 
 /*
  * EXCEPTION_HANDLE temporary stores
  */
 
+#ifdef CONFIG_OPENRISC_HAVE_SHADOW_GPRS
+#define EXCEPTION_T_STORE_GPR30		l.mtspr r0,r30,SPR_SHADOW_GPR(30)
+#define EXCEPTION_T_LOAD_GPR30(reg)	l.mfspr reg,r0,SPR_SHADOW_GPR(30)
+
+#define EXCEPTION_T_STORE_GPR10		l.mtspr r0,r10,SPR_SHADOW_GPR(10)
+#define EXCEPTION_T_LOAD_GPR10(reg)	l.mfspr reg,r0,SPR_SHADOW_GPR(10)
+
+#define EXCEPTION_T_STORE_SP		l.mtspr r0,r1,SPR_SHADOW_GPR(1)
+#define EXCEPTION_T_LOAD_SP(reg)	l.mfspr reg,r0,SPR_SHADOW_GPR(1)
+
+#else /* !CONFIG_OPENRISC_HAVE_SHADOW_GPRS */
 #define EXCEPTION_T_STORE_GPR30		l.sw    0x78(r0),r30
 #define EXCEPTION_T_LOAD_GPR30(reg)	l.lwz   reg,0x78(r0)
 
 #define EXCEPTION_T_STORE_GPR10		l.sw    0x7c(r0),r10
 #define EXCEPTION_T_LOAD_GPR10(reg)	l.lwz   reg,0x7c(r0)
 
-#define EXCEPTION_T_STORE_SP		l.sw	0x80(r0),r1
+#define EXCEPTION_T_STORE_SP		l.sw    0x80(r0),r1
 #define EXCEPTION_T_LOAD_SP(reg)	l.lwz   reg,0x80(r0)
-
-/*
- * For UNHANLDED_EXCEPTION
- */
-
-#define EXCEPTION_T_STORE_GPR31		l.sw    0x84(r0),r31
-#define EXCEPTION_T_LOAD_GPR31(reg)	l.lwz   reg,0x84(r0)
+#endif
 
 /* =========================================================[ macros ]=== */
 
-
+#ifdef CONFIG_SMP
+#define GET_CURRENT_PGD(reg,t1)					\
+	LOAD_SYMBOL_2_GPR(reg,current_pgd)			;\
+	l.mfspr	t1,r0,SPR_COREID				;\
+	l.slli	t1,t1,2						;\
+	l.add	reg,reg,t1					;\
+	tophys  (t1,reg)					;\
+	l.lwz   reg,0(t1)
+#else
 #define GET_CURRENT_PGD(reg,t1)					\
 	LOAD_SYMBOL_2_GPR(reg,current_pgd)			;\
 	tophys  (t1,reg)					;\
 	l.lwz   reg,0(t1)
+#endif
 
+/* Load r10 from current_thread_info_set - clobbers r1 and r30 */
+#ifdef CONFIG_SMP
+#define GET_CURRENT_THREAD_INFO					\
+	LOAD_SYMBOL_2_GPR(r1,current_thread_info_set)		;\
+	tophys  (r30,r1)					;\
+	l.mfspr	r10,r0,SPR_COREID				;\
+	l.slli	r10,r10,2					;\
+	l.add	r30,r30,r10					;\
+	/* r10: current_thread_info  */				;\
+	l.lwz   r10,0(r30)
+#else
+#define GET_CURRENT_THREAD_INFO					\
+	LOAD_SYMBOL_2_GPR(r1,current_thread_info_set)		;\
+	tophys  (r30,r1)					;\
+	/* r10: current_thread_info  */				;\
+	l.lwz   r10,0(r30)
+#endif
 
 /*
  * DSCR: this is a common hook for handling exceptions. it will save
@@ -163,10 +232,7 @@
 	l.bnf   2f                            /* kernel_mode */	;\
 	 EXCEPTION_T_STORE_SP                 /* delay slot */	;\
 1: /* user_mode:   */						;\
-	LOAD_SYMBOL_2_GPR(r1,current_thread_info_set)		;\
-	tophys  (r30,r1)					;\
-	/* r10: current_thread_info  */				;\
-	l.lwz   r10,0(r30)					;\
+	GET_CURRENT_THREAD_INFO	 				;\
 	tophys  (r30,r10)					;\
 	l.lwz   r1,(TI_KSP)(r30)				;\
 	/* fall through */					;\
@@ -226,7 +292,7 @@
  *
  */
 #define UNHANDLED_EXCEPTION(handler)				\
-	EXCEPTION_T_STORE_GPR31					;\
+	EXCEPTION_T_STORE_GPR30					;\
 	EXCEPTION_T_STORE_GPR10					;\
 	EXCEPTION_T_STORE_SP					;\
 	/* temporary store r3, r9 into r1, r10 */		;\
@@ -255,35 +321,35 @@
 	/* r1: KSP, r10: current, r31: __pa(KSP) */		;\
 	/* r12:	temp, syscall indicator, r13 temp */		;\
 	l.addi  r1,r1,-(INT_FRAME_SIZE)				;\
-	/* r1 is KSP, r31 is __pa(KSP) */			;\
-	tophys  (r31,r1)					;\
-	l.sw    PT_GPR12(r31),r12					;\
+	/* r1 is KSP, r30 is __pa(KSP) */			;\
+	tophys  (r30,r1)					;\
+	l.sw    PT_GPR12(r30),r12					;\
 	l.mfspr r12,r0,SPR_EPCR_BASE				;\
-	l.sw    PT_PC(r31),r12					;\
+	l.sw    PT_PC(r30),r12					;\
 	l.mfspr r12,r0,SPR_ESR_BASE				;\
-	l.sw    PT_SR(r31),r12					;\
+	l.sw    PT_SR(r30),r12					;\
 	/* save r31 */						;\
-	EXCEPTION_T_LOAD_GPR31(r12)				;\
-	l.sw	PT_GPR31(r31),r12					;\
+	EXCEPTION_T_LOAD_GPR30(r12)				;\
+	l.sw	PT_GPR30(r30),r12					;\
 	/* save r10 as was prior to exception */		;\
 	EXCEPTION_T_LOAD_GPR10(r12)				;\
-	l.sw	PT_GPR10(r31),r12					;\
+	l.sw	PT_GPR10(r30),r12					;\
 	/* save PT_SP as was prior to exception */			;\
 	EXCEPTION_T_LOAD_SP(r12)				;\
-	l.sw	PT_SP(r31),r12					;\
-	l.sw    PT_GPR13(r31),r13					;\
+	l.sw	PT_SP(r30),r12					;\
+	l.sw    PT_GPR13(r30),r13					;\
 	/* --> */						;\
 	/* save exception r4, set r4 = EA */			;\
-	l.sw	PT_GPR4(r31),r4					;\
+	l.sw	PT_GPR4(r30),r4					;\
 	l.mfspr r4,r0,SPR_EEAR_BASE				;\
 	/* r12 == 1 if we come from syscall */			;\
 	CLEAR_GPR(r12)						;\
 	/* ----- play a MMU trick ----- */			;\
-	l.ori	r31,r0,(EXCEPTION_SR)				;\
-	l.mtspr	r0,r31,SPR_ESR_BASE				;\
+	l.ori	r30,r0,(EXCEPTION_SR)				;\
+	l.mtspr	r0,r30,SPR_ESR_BASE				;\
 	/* r31:	EA address of handler */			;\
-	LOAD_SYMBOL_2_GPR(r31,handler)				;\
-	l.mtspr r0,r31,SPR_EPCR_BASE				;\
+	LOAD_SYMBOL_2_GPR(r30,handler)				;\
+	l.mtspr r0,r30,SPR_EPCR_BASE				;\
 	l.rfe
 
 /* =====================================================[ exceptions] === */
@@ -487,6 +553,12 @@ _start:
 	CLEAR_GPR(r30)
 	CLEAR_GPR(r31)
 
+#ifdef CONFIG_SMP
+	l.mfspr	r26,r0,SPR_COREID
+	l.sfeq	r26,r0
+	l.bnf	secondary_wait
+	 l.nop
+#endif
 	/*
 	 * set up initial ksp and current
 	 */
@@ -638,6 +710,100 @@ _flush_tlb:
 	l.jr	r9
 	 l.nop
 
+#ifdef CONFIG_SMP
+secondary_wait:
+	/* Doze the cpu until we are asked to run */
+	/* If we dont have power management skip doze */
+	l.mfspr r25,r0,SPR_UPR
+	l.andi  r25,r25,SPR_UPR_PMP
+	l.sfeq  r25,r0
+	l.bf	secondary_check_release
+	 l.nop
+
+	/* Setup special secondary exception handler */
+	LOAD_SYMBOL_2_GPR(r3, _secondary_evbar)
+	tophys(r25,r3)
+	l.mtspr	r0,r25,SPR_EVBAR
+
+	/* Enable Interrupts */
+	l.mfspr	r25,r0,SPR_SR
+	l.ori	r25,r25,SPR_SR_IEE
+	l.mtspr	r0,r25,SPR_SR
+
+	/* Unmask interrupts interrupts */
+	l.mfspr r25,r0,SPR_PICMR
+	l.ori   r25,r25,0xffff
+	l.mtspr	r0,r25,SPR_PICMR
+
+	/* Doze */
+	l.mfspr r25,r0,SPR_PMR
+	LOAD_SYMBOL_2_GPR(r3, SPR_PMR_DME)
+	l.or    r25,r25,r3
+	l.mtspr r0,r25,SPR_PMR
+
+	/* Wakeup - Restore exception handler */
+	l.mtspr	r0,r0,SPR_EVBAR
+
+secondary_check_release:
+	/*
+	 * Check if we actually got the release signal, if not go-back to
+	 * sleep.
+	 */
+	l.mfspr	r25,r0,SPR_COREID
+	LOAD_SYMBOL_2_GPR(r3, secondary_release)
+	tophys(r4, r3)
+	l.lwz	r3,0(r4)
+	l.sfeq	r25,r3
+	l.bnf	secondary_wait
+	 l.nop
+	/* fall through to secondary_init */
+
+secondary_init:
+	/*
+	 * set up initial ksp and current
+	 */
+	LOAD_SYMBOL_2_GPR(r10, secondary_thread_info)
+	tophys	(r30,r10)
+	l.lwz	r10,0(r30)
+	l.addi	r1,r10,THREAD_SIZE
+	tophys	(r30,r10)
+	l.sw	TI_KSP(r30),r1
+
+	l.jal	_ic_enable
+	 l.nop
+
+	l.jal	_dc_enable
+	 l.nop
+
+	l.jal	_flush_tlb
+	 l.nop
+
+	/*
+	 * enable dmmu & immu
+	 */
+	l.mfspr	r30,r0,SPR_SR
+	l.movhi	r28,hi(SPR_SR_DME | SPR_SR_IME)
+	l.ori	r28,r28,lo(SPR_SR_DME | SPR_SR_IME)
+	l.or	r30,r30,r28
+	/*
+	 * This is a bit tricky, we need to switch over from physical addresses
+	 * to virtual addresses on the fly.
+	 * To do that, we first set up ESR with the IME and DME bits set.
+	 * Then EPCR is set to secondary_start and then a l.rfe is issued to
+	 * "jump" to that.
+	 */
+	l.mtspr	r0,r30,SPR_ESR_BASE
+	LOAD_SYMBOL_2_GPR(r30, secondary_start)
+	l.mtspr	r0,r30,SPR_EPCR_BASE
+	l.rfe
+
+secondary_start:
+	LOAD_SYMBOL_2_GPR(r30, secondary_start_kernel)
+	l.jr    r30
+	 l.nop
+
+#endif
+
 /* ========================================[ cache ]=== */
 
 	/* alignment here so we don't change memory offsets with
@@ -1533,6 +1699,17 @@ ENTRY(_early_uart_init)
 	l.jr	r9
 	l.nop
 
+	.align	0x1000
+	.global _secondary_evbar
+_secondary_evbar:
+
+	.space 0x800
+	/* Just disable interrupts and Return */
+	l.ori	r3,r0,SPR_SR_SM
+	l.mtspr	r0,r3,SPR_ESR_BASE
+	l.rfe
+
+
 	.section .rodata
 _string_unhandled_exception:
 	.string "\n\rRunarunaround: Unhandled exception 0x\0"
diff --git a/arch/openrisc/kernel/setup.c b/arch/openrisc/kernel/setup.c
index dbf5ee95a0d5f2ba8e2e6f453234afa954e04f45..9d28ab14d139cb4b6ed842f7f56c444858e17dc0 100644
--- a/arch/openrisc/kernel/setup.c
+++ b/arch/openrisc/kernel/setup.c
@@ -93,7 +93,7 @@ static void __init setup_memory(void)
 	memblock_dump_all();
 }
 
-struct cpuinfo cpuinfo;
+struct cpuinfo_or1k cpuinfo_or1k[NR_CPUS];
 
 static void print_cpuinfo(void)
 {
@@ -101,12 +101,13 @@ static void print_cpuinfo(void)
 	unsigned long vr = mfspr(SPR_VR);
 	unsigned int version;
 	unsigned int revision;
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
 
 	version = (vr & SPR_VR_VER) >> 24;
 	revision = (vr & SPR_VR_REV);
 
 	printk(KERN_INFO "CPU: OpenRISC-%x (revision %d) @%d MHz\n",
-	       version, revision, cpuinfo.clock_frequency / 1000000);
+	       version, revision, cpuinfo->clock_frequency / 1000000);
 
 	if (!(upr & SPR_UPR_UP)) {
 		printk(KERN_INFO
@@ -117,15 +118,15 @@ static void print_cpuinfo(void)
 	if (upr & SPR_UPR_DCP)
 		printk(KERN_INFO
 		       "-- dcache: %4d bytes total, %2d bytes/line, %d way(s)\n",
-		       cpuinfo.dcache_size, cpuinfo.dcache_block_size,
-		       cpuinfo.dcache_ways);
+		       cpuinfo->dcache_size, cpuinfo->dcache_block_size,
+		       cpuinfo->dcache_ways);
 	else
 		printk(KERN_INFO "-- dcache disabled\n");
 	if (upr & SPR_UPR_ICP)
 		printk(KERN_INFO
 		       "-- icache: %4d bytes total, %2d bytes/line, %d way(s)\n",
-		       cpuinfo.icache_size, cpuinfo.icache_block_size,
-		       cpuinfo.icache_ways);
+		       cpuinfo->icache_size, cpuinfo->icache_block_size,
+		       cpuinfo->icache_ways);
 	else
 		printk(KERN_INFO "-- icache disabled\n");
 
@@ -153,38 +154,58 @@ static void print_cpuinfo(void)
 		printk(KERN_INFO "-- custom unit(s)\n");
 }
 
+static struct device_node *setup_find_cpu_node(int cpu)
+{
+	u32 hwid;
+	struct device_node *cpun;
+	struct device_node *cpus = of_find_node_by_path("/cpus");
+
+	for_each_available_child_of_node(cpus, cpun) {
+		if (of_property_read_u32(cpun, "reg", &hwid))
+			continue;
+		if (hwid == cpu)
+			return cpun;
+	}
+
+	return NULL;
+}
+
 void __init setup_cpuinfo(void)
 {
 	struct device_node *cpu;
 	unsigned long iccfgr, dccfgr;
 	unsigned long cache_set_size;
+	int cpu_id = smp_processor_id();
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu_id];
 
-	cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481");
+	cpu = setup_find_cpu_node(cpu_id);
 	if (!cpu)
-		panic("No compatible CPU found in device tree...\n");
+		panic("Couldn't find CPU%d in device tree...\n", cpu_id);
 
 	iccfgr = mfspr(SPR_ICCFGR);
-	cpuinfo.icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
+	cpuinfo->icache_ways = 1 << (iccfgr & SPR_ICCFGR_NCW);
 	cache_set_size = 1 << ((iccfgr & SPR_ICCFGR_NCS) >> 3);
-	cpuinfo.icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
-	cpuinfo.icache_size =
-	    cache_set_size * cpuinfo.icache_ways * cpuinfo.icache_block_size;
+	cpuinfo->icache_block_size = 16 << ((iccfgr & SPR_ICCFGR_CBS) >> 7);
+	cpuinfo->icache_size =
+	    cache_set_size * cpuinfo->icache_ways * cpuinfo->icache_block_size;
 
 	dccfgr = mfspr(SPR_DCCFGR);
-	cpuinfo.dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
+	cpuinfo->dcache_ways = 1 << (dccfgr & SPR_DCCFGR_NCW);
 	cache_set_size = 1 << ((dccfgr & SPR_DCCFGR_NCS) >> 3);
-	cpuinfo.dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
-	cpuinfo.dcache_size =
-	    cache_set_size * cpuinfo.dcache_ways * cpuinfo.dcache_block_size;
+	cpuinfo->dcache_block_size = 16 << ((dccfgr & SPR_DCCFGR_CBS) >> 7);
+	cpuinfo->dcache_size =
+	    cache_set_size * cpuinfo->dcache_ways * cpuinfo->dcache_block_size;
 
 	if (of_property_read_u32(cpu, "clock-frequency",
-				 &cpuinfo.clock_frequency)) {
+				 &cpuinfo->clock_frequency)) {
 		printk(KERN_WARNING
 		       "Device tree missing CPU 'clock-frequency' parameter."
 		       "Assuming frequency 25MHZ"
 		       "This is probably not what you want.");
 	}
 
+	cpuinfo->coreid = mfspr(SPR_COREID);
+
 	of_node_put(cpu);
 
 	print_cpuinfo();
@@ -251,8 +272,8 @@ void __init detect_unit_config(unsigned long upr, unsigned long mask,
 void calibrate_delay(void)
 {
 	const int *val;
-	struct device_node *cpu = NULL;
-	cpu = of_find_compatible_node(NULL, NULL, "opencores,or1200-rtlsvn481");
+	struct device_node *cpu = setup_find_cpu_node(smp_processor_id());
+
 	val = of_get_property(cpu, "clock-frequency", NULL);
 	if (!val)
 		panic("no cpu 'clock-frequency' parameter in device tree");
@@ -268,6 +289,10 @@ void __init setup_arch(char **cmdline_p)
 
 	setup_cpuinfo();
 
+#ifdef CONFIG_SMP
+	smp_init_cpus();
+#endif
+
 	/* process 1's initial memory region is the kernel code/data */
 	init_mm.start_code = (unsigned long)_stext;
 	init_mm.end_code = (unsigned long)_etext;
@@ -302,54 +327,78 @@ void __init setup_arch(char **cmdline_p)
 
 static int show_cpuinfo(struct seq_file *m, void *v)
 {
-	unsigned long vr;
-	int version, revision;
+	unsigned int vr, cpucfgr;
+	unsigned int avr;
+	unsigned int version;
+	struct cpuinfo_or1k *cpuinfo = v;
 
 	vr = mfspr(SPR_VR);
-	version = (vr & SPR_VR_VER) >> 24;
-	revision = vr & SPR_VR_REV;
-
-	seq_printf(m,
-		  "cpu\t\t: OpenRISC-%x\n"
-		  "revision\t: %d\n"
-		  "frequency\t: %ld\n"
-		  "dcache size\t: %d bytes\n"
-		  "dcache block size\t: %d bytes\n"
-		  "dcache ways\t: %d\n"
-		  "icache size\t: %d bytes\n"
-		  "icache block size\t: %d bytes\n"
-		  "icache ways\t: %d\n"
-		  "immu\t\t: %d entries, %lu ways\n"
-		  "dmmu\t\t: %d entries, %lu ways\n"
-		  "bogomips\t: %lu.%02lu\n",
-		  version,
-		  revision,
-		  loops_per_jiffy * HZ,
-		  cpuinfo.dcache_size,
-		  cpuinfo.dcache_block_size,
-		  cpuinfo.dcache_ways,
-		  cpuinfo.icache_size,
-		  cpuinfo.icache_block_size,
-		  cpuinfo.icache_ways,
-		  1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
-		  1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW),
-		  1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
-		  1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW),
-		  (loops_per_jiffy * HZ) / 500000,
-		  ((loops_per_jiffy * HZ) / 5000) % 100);
+	cpucfgr = mfspr(SPR_CPUCFGR);
+
+#ifdef CONFIG_SMP
+	seq_printf(m, "processor\t\t: %d\n", cpuinfo->coreid);
+#endif
+	if (vr & SPR_VR_UVRP) {
+		vr = mfspr(SPR_VR2);
+		version = vr & SPR_VR2_VER;
+		avr = mfspr(SPR_AVR);
+		seq_printf(m, "cpu architecture\t: "
+			   "OpenRISC 1000 (%d.%d-rev%d)\n",
+			   (avr >> 24) & 0xff,
+			   (avr >> 16) & 0xff,
+			   (avr >> 8) & 0xff);
+		seq_printf(m, "cpu implementation id\t: 0x%x\n",
+			   (vr & SPR_VR2_CPUID) >> 24);
+		seq_printf(m, "cpu version\t\t: 0x%x\n", version);
+	} else {
+		version = (vr & SPR_VR_VER) >> 24;
+		seq_printf(m, "cpu\t\t\t: OpenRISC-%x\n", version);
+		seq_printf(m, "revision\t\t: %d\n", vr & SPR_VR_REV);
+	}
+	seq_printf(m, "frequency\t\t: %ld\n", loops_per_jiffy * HZ);
+	seq_printf(m, "dcache size\t\t: %d bytes\n", cpuinfo->dcache_size);
+	seq_printf(m, "dcache block size\t: %d bytes\n",
+		   cpuinfo->dcache_block_size);
+	seq_printf(m, "dcache ways\t\t: %d\n", cpuinfo->dcache_ways);
+	seq_printf(m, "icache size\t\t: %d bytes\n", cpuinfo->icache_size);
+	seq_printf(m, "icache block size\t: %d bytes\n",
+		   cpuinfo->icache_block_size);
+	seq_printf(m, "icache ways\t\t: %d\n", cpuinfo->icache_ways);
+	seq_printf(m, "immu\t\t\t: %d entries, %lu ways\n",
+		   1 << ((mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTS) >> 2),
+		   1 + (mfspr(SPR_DMMUCFGR) & SPR_DMMUCFGR_NTW));
+	seq_printf(m, "dmmu\t\t\t: %d entries, %lu ways\n",
+		   1 << ((mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTS) >> 2),
+		   1 + (mfspr(SPR_IMMUCFGR) & SPR_IMMUCFGR_NTW));
+	seq_printf(m, "bogomips\t\t: %lu.%02lu\n",
+		   (loops_per_jiffy * HZ) / 500000,
+		   ((loops_per_jiffy * HZ) / 5000) % 100);
+
+	seq_puts(m, "features\t\t: ");
+	seq_printf(m, "%s ", cpucfgr & SPR_CPUCFGR_OB32S ? "orbis32" : "");
+	seq_printf(m, "%s ", cpucfgr & SPR_CPUCFGR_OB64S ? "orbis64" : "");
+	seq_printf(m, "%s ", cpucfgr & SPR_CPUCFGR_OF32S ? "orfpx32" : "");
+	seq_printf(m, "%s ", cpucfgr & SPR_CPUCFGR_OF64S ? "orfpx64" : "");
+	seq_printf(m, "%s ", cpucfgr & SPR_CPUCFGR_OV64S ? "orvdx64" : "");
+	seq_puts(m, "\n");
+
+	seq_puts(m, "\n");
+
 	return 0;
 }
 
-static void *c_start(struct seq_file *m, loff_t * pos)
+static void *c_start(struct seq_file *m, loff_t *pos)
 {
-	/* We only have one CPU... */
-	return *pos < 1 ? (void *)1 : NULL;
+	*pos = cpumask_next(*pos - 1, cpu_online_mask);
+	if ((*pos) < nr_cpu_ids)
+		return &cpuinfo_or1k[*pos];
+	return NULL;
 }
 
-static void *c_next(struct seq_file *m, void *v, loff_t * pos)
+static void *c_next(struct seq_file *m, void *v, loff_t *pos)
 {
-	++*pos;
-	return NULL;
+	(*pos)++;
+	return c_start(m, pos);
 }
 
 static void c_stop(struct seq_file *m, void *v)
diff --git a/arch/openrisc/kernel/smp.c b/arch/openrisc/kernel/smp.c
new file mode 100644
index 0000000000000000000000000000000000000000..7d518ee8bddc154a5867e04fb0455ba89f4b7f1d
--- /dev/null
+++ b/arch/openrisc/kernel/smp.c
@@ -0,0 +1,259 @@
+/*
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ * Copyright (C) 2017 Stafford Horne <shorne@gmail.com>
+ *
+ * Based on arm64 and arc implementations
+ * Copyright (C) 2013 ARM Ltd.
+ * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com)
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/smp.h>
+#include <linux/cpu.h>
+#include <linux/sched.h>
+#include <linux/irq.h>
+#include <asm/cpuinfo.h>
+#include <asm/mmu_context.h>
+#include <asm/tlbflush.h>
+#include <asm/cacheflush.h>
+#include <asm/time.h>
+
+static void (*smp_cross_call)(const struct cpumask *, unsigned int);
+
+unsigned long secondary_release = -1;
+struct thread_info *secondary_thread_info;
+
+enum ipi_msg_type {
+	IPI_WAKEUP,
+	IPI_RESCHEDULE,
+	IPI_CALL_FUNC,
+	IPI_CALL_FUNC_SINGLE,
+};
+
+static DEFINE_SPINLOCK(boot_lock);
+
+static void boot_secondary(unsigned int cpu, struct task_struct *idle)
+{
+	/*
+	 * set synchronisation state between this boot processor
+	 * and the secondary one
+	 */
+	spin_lock(&boot_lock);
+
+	secondary_release = cpu;
+	smp_cross_call(cpumask_of(cpu), IPI_WAKEUP);
+
+	/*
+	 * now the secondary core is starting up let it run its
+	 * calibrations, then wait for it to finish
+	 */
+	spin_unlock(&boot_lock);
+}
+
+void __init smp_prepare_boot_cpu(void)
+{
+}
+
+void __init smp_init_cpus(void)
+{
+	int i;
+
+	for (i = 0; i < NR_CPUS; i++)
+		set_cpu_possible(i, true);
+}
+
+void __init smp_prepare_cpus(unsigned int max_cpus)
+{
+	int i;
+
+	/*
+	 * Initialise the present map, which describes the set of CPUs
+	 * actually populated at the present time.
+	 */
+	for (i = 0; i < max_cpus; i++)
+		set_cpu_present(i, true);
+}
+
+void __init smp_cpus_done(unsigned int max_cpus)
+{
+}
+
+static DECLARE_COMPLETION(cpu_running);
+
+int __cpu_up(unsigned int cpu, struct task_struct *idle)
+{
+	if (smp_cross_call == NULL) {
+		pr_warn("CPU%u: failed to start, IPI controller missing",
+			cpu);
+		return -EIO;
+	}
+
+	secondary_thread_info = task_thread_info(idle);
+	current_pgd[cpu] = init_mm.pgd;
+
+	boot_secondary(cpu, idle);
+	if (!wait_for_completion_timeout(&cpu_running,
+					msecs_to_jiffies(1000))) {
+		pr_crit("CPU%u: failed to start\n", cpu);
+		return -EIO;
+	}
+	synchronise_count_master(cpu);
+
+	return 0;
+}
+
+asmlinkage __init void secondary_start_kernel(void)
+{
+	struct mm_struct *mm = &init_mm;
+	unsigned int cpu = smp_processor_id();
+	/*
+	 * All kernel threads share the same mm context; grab a
+	 * reference and switch to it.
+	 */
+	atomic_inc(&mm->mm_count);
+	current->active_mm = mm;
+	cpumask_set_cpu(cpu, mm_cpumask(mm));
+
+	pr_info("CPU%u: Booted secondary processor\n", cpu);
+
+	setup_cpuinfo();
+	openrisc_clockevent_init();
+
+	notify_cpu_starting(cpu);
+
+	/*
+	 * OK, now it's safe to let the boot CPU continue
+	 */
+	complete(&cpu_running);
+
+	synchronise_count_slave(cpu);
+	set_cpu_online(cpu, true);
+
+	local_irq_enable();
+
+	preempt_disable();
+	/*
+	 * OK, it's off to the idle thread for us
+	 */
+	cpu_startup_entry(CPUHP_AP_ONLINE_IDLE);
+}
+
+void handle_IPI(unsigned int ipi_msg)
+{
+	unsigned int cpu = smp_processor_id();
+
+	switch (ipi_msg) {
+	case IPI_WAKEUP:
+		break;
+
+	case IPI_RESCHEDULE:
+		scheduler_ipi();
+		break;
+
+	case IPI_CALL_FUNC:
+		generic_smp_call_function_interrupt();
+		break;
+
+	case IPI_CALL_FUNC_SINGLE:
+		generic_smp_call_function_single_interrupt();
+		break;
+
+	default:
+		WARN(1, "CPU%u: Unknown IPI message 0x%x\n", cpu, ipi_msg);
+		break;
+	}
+}
+
+void smp_send_reschedule(int cpu)
+{
+	smp_cross_call(cpumask_of(cpu), IPI_RESCHEDULE);
+}
+
+static void stop_this_cpu(void *dummy)
+{
+	/* Remove this CPU */
+	set_cpu_online(smp_processor_id(), false);
+
+	local_irq_disable();
+	/* CPU Doze */
+	if (mfspr(SPR_UPR) & SPR_UPR_PMP)
+		mtspr(SPR_PMR, mfspr(SPR_PMR) | SPR_PMR_DME);
+	/* If that didn't work, infinite loop */
+	while (1)
+		;
+}
+
+void smp_send_stop(void)
+{
+	smp_call_function(stop_this_cpu, NULL, 0);
+}
+
+/* not supported, yet */
+int setup_profiling_timer(unsigned int multiplier)
+{
+	return -EINVAL;
+}
+
+void __init set_smp_cross_call(void (*fn)(const struct cpumask *, unsigned int))
+{
+	smp_cross_call = fn;
+}
+
+void arch_send_call_function_single_ipi(int cpu)
+{
+	smp_cross_call(cpumask_of(cpu), IPI_CALL_FUNC_SINGLE);
+}
+
+void arch_send_call_function_ipi_mask(const struct cpumask *mask)
+{
+	smp_cross_call(mask, IPI_CALL_FUNC);
+}
+
+/* TLB flush operations - Performed on each CPU*/
+static inline void ipi_flush_tlb_all(void *ignored)
+{
+	local_flush_tlb_all();
+}
+
+void flush_tlb_all(void)
+{
+	on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+}
+
+/*
+ * FIXME: implement proper functionality instead of flush_tlb_all.
+ * *But*, as things currently stands, the local_tlb_flush_* functions will
+ * all boil down to local_tlb_flush_all anyway.
+ */
+void flush_tlb_mm(struct mm_struct *mm)
+{
+	on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+}
+
+void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+{
+	on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+}
+
+void flush_tlb_range(struct vm_area_struct *vma,
+		     unsigned long start, unsigned long end)
+{
+	on_each_cpu(ipi_flush_tlb_all, NULL, 1);
+}
+
+/* Instruction cache invalidate - performed on each cpu */
+static void ipi_icache_page_inv(void *arg)
+{
+	struct page *page = arg;
+
+	local_icache_page_inv(page);
+}
+
+void smp_icache_page_inv(struct page *page)
+{
+	on_each_cpu(ipi_icache_page_inv, page, 1);
+}
+EXPORT_SYMBOL(smp_icache_page_inv);
diff --git a/arch/openrisc/kernel/stacktrace.c b/arch/openrisc/kernel/stacktrace.c
new file mode 100644
index 0000000000000000000000000000000000000000..43f140a28bc7257ddbc20a713fe10e900dd371e0
--- /dev/null
+++ b/arch/openrisc/kernel/stacktrace.c
@@ -0,0 +1,86 @@
+/*
+ * Stack trace utility for OpenRISC
+ *
+ * Copyright (C) 2017 Stafford Horne <shorne@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ * Losely based on work from sh and powerpc.
+ */
+
+#include <linux/export.h>
+#include <linux/sched.h>
+#include <linux/sched/debug.h>
+#include <linux/stacktrace.h>
+
+#include <asm/processor.h>
+#include <asm/unwinder.h>
+
+/*
+ * Save stack-backtrace addresses into a stack_trace buffer.
+ */
+static void
+save_stack_address(void *data, unsigned long addr, int reliable)
+{
+	struct stack_trace *trace = data;
+
+	if (!reliable)
+		return;
+
+	if (trace->skip > 0) {
+		trace->skip--;
+		return;
+	}
+
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = addr;
+}
+
+void save_stack_trace(struct stack_trace *trace)
+{
+	unwind_stack(trace, (unsigned long *) &trace, save_stack_address);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace);
+
+static void
+save_stack_address_nosched(void *data, unsigned long addr, int reliable)
+{
+	struct stack_trace *trace = (struct stack_trace *)data;
+
+	if (!reliable)
+		return;
+
+	if (in_sched_functions(addr))
+		return;
+
+	if (trace->skip > 0) {
+		trace->skip--;
+		return;
+	}
+
+	if (trace->nr_entries < trace->max_entries)
+		trace->entries[trace->nr_entries++] = addr;
+}
+
+void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
+{
+	unsigned long *sp = NULL;
+
+	if (tsk == current)
+		sp = (unsigned long *) &sp;
+	else
+		sp = (unsigned long *) KSTK_ESP(tsk);
+
+	unwind_stack(trace, sp, save_stack_address_nosched);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
+
+void
+save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace)
+{
+	unwind_stack(trace, (unsigned long *) regs->sp,
+		     save_stack_address_nosched);
+}
+EXPORT_SYMBOL_GPL(save_stack_trace_regs);
diff --git a/arch/openrisc/kernel/sync-timer.c b/arch/openrisc/kernel/sync-timer.c
new file mode 100644
index 0000000000000000000000000000000000000000..ed8d835caca1996d7c5caa7cd7ed73bc72444526
--- /dev/null
+++ b/arch/openrisc/kernel/sync-timer.c
@@ -0,0 +1,120 @@
+/*
+ * OR1K timer synchronisation
+ *
+ * Based on work from MIPS implementation.
+ *
+ * All CPUs will have their count registers synchronised to the CPU0 next time
+ * value. This can cause a small timewarp for CPU0. All other CPU's should
+ * not have done anything significant (but they may have had interrupts
+ * enabled briefly - prom_smp_finish() should not be responsible for enabling
+ * interrupts...)
+ */
+
+#include <linux/kernel.h>
+#include <linux/irqflags.h>
+#include <linux/cpumask.h>
+
+#include <asm/time.h>
+#include <asm/timex.h>
+#include <linux/atomic.h>
+#include <asm/barrier.h>
+
+#include <asm/spr.h>
+
+static unsigned int initcount;
+static atomic_t count_count_start = ATOMIC_INIT(0);
+static atomic_t count_count_stop = ATOMIC_INIT(0);
+
+#define COUNTON 100
+#define NR_LOOPS 3
+
+void synchronise_count_master(int cpu)
+{
+	int i;
+	unsigned long flags;
+
+	pr_info("Synchronize counters for CPU %u: ", cpu);
+
+	local_irq_save(flags);
+
+	/*
+	 * We loop a few times to get a primed instruction cache,
+	 * then the last pass is more or less synchronised and
+	 * the master and slaves each set their cycle counters to a known
+	 * value all at once. This reduces the chance of having random offsets
+	 * between the processors, and guarantees that the maximum
+	 * delay between the cycle counters is never bigger than
+	 * the latency of information-passing (cachelines) between
+	 * two CPUs.
+	 */
+
+	for (i = 0; i < NR_LOOPS; i++) {
+		/* slaves loop on '!= 2' */
+		while (atomic_read(&count_count_start) != 1)
+			mb();
+		atomic_set(&count_count_stop, 0);
+		smp_wmb();
+
+		/* Let the slave writes its count register */
+		atomic_inc(&count_count_start);
+
+		/* Count will be initialised to current timer */
+		if (i == 1)
+			initcount = get_cycles();
+
+		/*
+		 * Everyone initialises count in the last loop:
+		 */
+		if (i == NR_LOOPS-1)
+			openrisc_timer_set(initcount);
+
+		/*
+		 * Wait for slave to leave the synchronization point:
+		 */
+		while (atomic_read(&count_count_stop) != 1)
+			mb();
+		atomic_set(&count_count_start, 0);
+		smp_wmb();
+		atomic_inc(&count_count_stop);
+	}
+	/* Arrange for an interrupt in a short while */
+	openrisc_timer_set_next(COUNTON);
+
+	local_irq_restore(flags);
+
+	/*
+	 * i386 code reported the skew here, but the
+	 * count registers were almost certainly out of sync
+	 * so no point in alarming people
+	 */
+	pr_cont("done.\n");
+}
+
+void synchronise_count_slave(int cpu)
+{
+	int i;
+
+	/*
+	 * Not every cpu is online at the time this gets called,
+	 * so we first wait for the master to say everyone is ready
+	 */
+
+	for (i = 0; i < NR_LOOPS; i++) {
+		atomic_inc(&count_count_start);
+		while (atomic_read(&count_count_start) != 2)
+			mb();
+
+		/*
+		 * Everyone initialises count in the last loop:
+		 */
+		if (i == NR_LOOPS-1)
+			openrisc_timer_set(initcount);
+
+		atomic_inc(&count_count_stop);
+		while (atomic_read(&count_count_stop) != 2)
+			mb();
+	}
+	/* Arrange for an interrupt in a short while */
+	openrisc_timer_set_next(COUNTON);
+}
+#undef NR_LOOPS
diff --git a/arch/openrisc/kernel/time.c b/arch/openrisc/kernel/time.c
index 687c11d048d7c2493c7d302e3c0cd053f99ef360..6baecea270801ae56ab1ef39383442f846d1999a 100644
--- a/arch/openrisc/kernel/time.c
+++ b/arch/openrisc/kernel/time.c
@@ -27,8 +27,14 @@
 
 #include <asm/cpuinfo.h>
 
-static int openrisc_timer_set_next_event(unsigned long delta,
-					 struct clock_event_device *dev)
+/* Test the timer ticks to count, used in sync routine */
+inline void openrisc_timer_set(unsigned long count)
+{
+	mtspr(SPR_TTCR, count);
+}
+
+/* Set the timer to trigger in delta cycles */
+inline void openrisc_timer_set_next(unsigned long delta)
 {
 	u32 c;
 
@@ -44,7 +50,12 @@ static int openrisc_timer_set_next_event(unsigned long delta,
 	 * Keep timer in continuous mode always.
 	 */
 	mtspr(SPR_TTMR, SPR_TTMR_CR | SPR_TTMR_IE | c);
+}
 
+static int openrisc_timer_set_next_event(unsigned long delta,
+					 struct clock_event_device *dev)
+{
+	openrisc_timer_set_next(delta);
 	return 0;
 }
 
@@ -53,13 +64,32 @@ static int openrisc_timer_set_next_event(unsigned long delta,
  * timers) we cannot enable the PERIODIC feature.  The tick timer can run using
  * one-shot events, so no problem.
  */
+DEFINE_PER_CPU(struct clock_event_device, clockevent_openrisc_timer);
 
-static struct clock_event_device clockevent_openrisc_timer = {
-	.name = "openrisc_timer_clockevent",
-	.features = CLOCK_EVT_FEAT_ONESHOT,
-	.rating = 300,
-	.set_next_event = openrisc_timer_set_next_event,
-};
+void openrisc_clockevent_init(void)
+{
+	unsigned int cpu = smp_processor_id();
+	struct clock_event_device *evt =
+		&per_cpu(clockevent_openrisc_timer, cpu);
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[cpu];
+
+	mtspr(SPR_TTMR, SPR_TTMR_CR);
+
+#ifdef CONFIG_SMP
+	evt->broadcast = tick_broadcast;
+#endif
+	evt->name = "openrisc_timer_clockevent",
+	evt->features = CLOCK_EVT_FEAT_ONESHOT,
+	evt->rating = 300,
+	evt->set_next_event = openrisc_timer_set_next_event,
+
+	evt->cpumask = cpumask_of(cpu);
+
+	/* We only have 28 bits */
+	clockevents_config_and_register(evt, cpuinfo->clock_frequency,
+					100, 0x0fffffff);
+
+}
 
 static inline void timer_ack(void)
 {
@@ -83,7 +113,9 @@ static inline void timer_ack(void)
 irqreturn_t __irq_entry timer_interrupt(struct pt_regs *regs)
 {
 	struct pt_regs *old_regs = set_irq_regs(regs);
-	struct clock_event_device *evt = &clockevent_openrisc_timer;
+	unsigned int cpu = smp_processor_id();
+	struct clock_event_device *evt =
+		&per_cpu(clockevent_openrisc_timer, cpu);
 
 	timer_ack();
 
@@ -99,24 +131,12 @@ irqreturn_t __irq_entry timer_interrupt(struct pt_regs *regs)
 	return IRQ_HANDLED;
 }
 
-static __init void openrisc_clockevent_init(void)
-{
-	clockevent_openrisc_timer.cpumask = cpumask_of(0);
-
-	/* We only have 28 bits */
-	clockevents_config_and_register(&clockevent_openrisc_timer,
-					cpuinfo.clock_frequency,
-					100, 0x0fffffff);
-
-}
-
 /**
  * Clocksource: Based on OpenRISC timer/counter
  *
  * This sets up the OpenRISC Tick Timer as a clock source.  The tick timer
  * is 32 bits wide and runs at the CPU clock frequency.
  */
-
 static u64 openrisc_timer_read(struct clocksource *cs)
 {
 	return (u64) mfspr(SPR_TTCR);
@@ -132,7 +152,9 @@ static struct clocksource openrisc_timer = {
 
 static int __init openrisc_timer_init(void)
 {
-	if (clocksource_register_hz(&openrisc_timer, cpuinfo.clock_frequency))
+	struct cpuinfo_or1k *cpuinfo = &cpuinfo_or1k[smp_processor_id()];
+
+	if (clocksource_register_hz(&openrisc_timer, cpuinfo->clock_frequency))
 		panic("failed to register clocksource");
 
 	/* Enable the incrementer: 'continuous' mode with interrupt disabled */
diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c
index 803e9e756f7785a9250c95ccd93f4cacc18f70d2..4085d72fa5ae8a30bc3011f3e8cf44ca331568d5 100644
--- a/arch/openrisc/kernel/traps.c
+++ b/arch/openrisc/kernel/traps.c
@@ -38,6 +38,7 @@
 #include <asm/segment.h>
 #include <asm/io.h>
 #include <asm/pgtable.h>
+#include <asm/unwinder.h>
 
 extern char _etext, _stext;
 
@@ -45,61 +46,20 @@ int kstack_depth_to_print = 0x180;
 int lwa_flag;
 unsigned long __user *lwa_addr;
 
-static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
+void print_trace(void *data, unsigned long addr, int reliable)
 {
-	return p > (void *)tinfo && p < (void *)tinfo + THREAD_SIZE - 3;
-}
-
-void show_trace(struct task_struct *task, unsigned long *stack)
-{
-	struct thread_info *context;
-	unsigned long addr;
-
-	context = (struct thread_info *)
-	    ((unsigned long)stack & (~(THREAD_SIZE - 1)));
-
-	while (valid_stack_ptr(context, stack)) {
-		addr = *stack++;
-		if (__kernel_text_address(addr)) {
-			printk(" [<%08lx>]", addr);
-			print_symbol(" %s", addr);
-			printk("\n");
-		}
-	}
-	printk(" =======================\n");
+	pr_emerg("[<%p>] %s%pS\n", (void *) addr, reliable ? "" : "? ",
+	       (void *) addr);
 }
 
 /* displays a short stack trace */
 void show_stack(struct task_struct *task, unsigned long *esp)
 {
-	unsigned long addr, *stack;
-	int i;
-
 	if (esp == NULL)
 		esp = (unsigned long *)&esp;
 
-	stack = esp;
-
-	printk("Stack dump [0x%08lx]:\n", (unsigned long)esp);
-	for (i = 0; i < kstack_depth_to_print; i++) {
-		if (kstack_end(stack))
-			break;
-		if (__get_user(addr, stack)) {
-			/* This message matches "failing address" marked
-			   s390 in ksymoops, so lines containing it will
-			   not be filtered out by ksymoops.  */
-			printk("Failing address 0x%lx\n", (unsigned long)stack);
-			break;
-		}
-		stack++;
-
-		printk("sp + %02d: 0x%08lx\n", i * 4, addr);
-	}
-	printk("\n");
-
-	show_trace(task, esp);
-
-	return;
+	pr_emerg("Call trace:\n");
+	unwind_stack(NULL, esp, print_trace);
 }
 
 void show_trace_task(struct task_struct *tsk)
@@ -115,7 +75,7 @@ void show_registers(struct pt_regs *regs)
 	int in_kernel = 1;
 	unsigned long esp;
 
-	esp = (unsigned long)(&regs->sp);
+	esp = (unsigned long)(regs->sp);
 	if (user_mode(regs))
 		in_kernel = 0;
 
diff --git a/arch/openrisc/kernel/unwinder.c b/arch/openrisc/kernel/unwinder.c
new file mode 100644
index 0000000000000000000000000000000000000000..8ae15c2c18459eb08541998a07eaaabf4df7ea81
--- /dev/null
+++ b/arch/openrisc/kernel/unwinder.c
@@ -0,0 +1,105 @@
+/*
+ * OpenRISC unwinder.c
+ *
+ * Reusable arch specific api for unwinding stacks.
+ *
+ * Copyright (C) 2017 Stafford Horne <shorne@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ */
+
+#include <linux/sched/task_stack.h>
+#include <linux/kernel.h>
+
+#include <asm/unwinder.h>
+
+#ifdef CONFIG_FRAME_POINTER
+struct or1k_frameinfo {
+	unsigned long *fp;
+	unsigned long ra;
+	unsigned long top;
+};
+
+/*
+ * Verify a frameinfo structure.  The return address should be a valid text
+ * address.  The frame pointer may be null if its the last frame, otherwise
+ * the frame pointer should point to a location in the stack after the the
+ * top of the next frame up.
+ */
+static inline int or1k_frameinfo_valid(struct or1k_frameinfo *frameinfo)
+{
+	return (frameinfo->fp == NULL ||
+		(!kstack_end(frameinfo->fp) &&
+		 frameinfo->fp > &frameinfo->top)) &&
+	       __kernel_text_address(frameinfo->ra);
+}
+
+/*
+ * Create a stack trace doing scanning which is frame pointer aware. We can
+ * get reliable stack traces by matching the previously found frame
+ * pointer with the top of the stack address every time we find a valid
+ * or1k_frameinfo.
+ *
+ * Ideally the stack parameter will be passed as FP, but it can not be
+ * guaranteed.  Therefore we scan each address looking for the first sign
+ * of a return address.
+ *
+ * The OpenRISC stack frame looks something like the following.  The
+ * location SP is held in r1 and location FP is held in r2 when frame pointers
+ * enabled.
+ *
+ * SP   -> (top of stack)
+ *      -  (callee saved registers)
+ *      -  (local variables)
+ * FP-8 -> previous FP             \
+ * FP-4 -> return address          |- or1k_frameinfo
+ * FP   -> (previous top of stack) /
+ */
+void unwind_stack(void *data, unsigned long *stack,
+		  void (*trace)(void *data, unsigned long addr, int reliable))
+{
+	unsigned long *next_fp = NULL;
+	struct or1k_frameinfo *frameinfo = NULL;
+	int reliable = 0;
+
+	while (!kstack_end(stack)) {
+		frameinfo = container_of(stack,
+					 struct or1k_frameinfo,
+					 top);
+
+		if (__kernel_text_address(frameinfo->ra)) {
+			if (or1k_frameinfo_valid(frameinfo) &&
+			    (next_fp == NULL ||
+			     next_fp == &frameinfo->top)) {
+				reliable = 1;
+				next_fp = frameinfo->fp;
+			} else
+				reliable = 0;
+
+			trace(data, frameinfo->ra, reliable);
+		}
+		stack++;
+	}
+}
+
+#else /* CONFIG_FRAME_POINTER */
+
+/*
+ * Create a stack trace by doing a simple scan treating all text addresses
+ * as return addresses.
+ */
+void unwind_stack(void *data, unsigned long *stack,
+		   void (*trace)(void *data, unsigned long addr, int reliable))
+{
+	unsigned long addr;
+
+	while (!kstack_end(stack)) {
+		addr = *stack++;
+		if (__kernel_text_address(addr))
+			trace(data, addr, 0);
+	}
+}
+#endif /* CONFIG_FRAME_POINTER */
+
diff --git a/arch/openrisc/lib/delay.c b/arch/openrisc/lib/delay.c
index 8b13fdf43ec61592713fa7d2e8b640adb2ed3a06..a92bd621aa1f6e5e64a7ade5ad392c5d5c46e918 100644
--- a/arch/openrisc/lib/delay.c
+++ b/arch/openrisc/lib/delay.c
@@ -25,7 +25,7 @@
 
 int read_current_timer(unsigned long *timer_value)
 {
-	*timer_value = mfspr(SPR_TTCR);
+	*timer_value = get_cycles();
 	return 0;
 }
 
diff --git a/arch/openrisc/mm/Makefile b/arch/openrisc/mm/Makefile
index 324ba26345291a27caafef1edfd0a8884fe7c7f9..a31b2a42e966f1604d67db63fa645738dfc2964e 100644
--- a/arch/openrisc/mm/Makefile
+++ b/arch/openrisc/mm/Makefile
@@ -2,4 +2,4 @@
 # Makefile for the linux openrisc-specific parts of the memory manager.
 #
 
-obj-y	:= fault.o tlb.o init.o ioremap.o
+obj-y	:= fault.o cache.o tlb.o init.o ioremap.o
diff --git a/arch/openrisc/mm/cache.c b/arch/openrisc/mm/cache.c
new file mode 100644
index 0000000000000000000000000000000000000000..b747bf1fc1b637e18c3a8beeaeab3417f9ace37b
--- /dev/null
+++ b/arch/openrisc/mm/cache.c
@@ -0,0 +1,61 @@
+/*
+ * OpenRISC cache.c
+ *
+ * Linux architectural port borrowing liberally from similar works of
+ * others.  All original copyrights apply as per the original source
+ * declaration.
+ *
+ * Modifications for the OpenRISC architecture:
+ * Copyright (C) 2015 Jan Henrik Weinstock <jan.weinstock@rwth-aachen.de>
+ *
+ *      This program is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU General Public License
+ *      as published by the Free Software Foundation; either version
+ *      2 of the License, or (at your option) any later version.
+ */
+
+#include <asm/spr.h>
+#include <asm/spr_defs.h>
+#include <asm/cache.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+static void cache_loop(struct page *page, const unsigned int reg)
+{
+	unsigned long paddr = page_to_pfn(page) << PAGE_SHIFT;
+	unsigned long line = paddr & ~(L1_CACHE_BYTES - 1);
+
+	while (line < paddr + PAGE_SIZE) {
+		mtspr(reg, line);
+		line += L1_CACHE_BYTES;
+	}
+}
+
+void local_dcache_page_flush(struct page *page)
+{
+	cache_loop(page, SPR_DCBFR);
+}
+EXPORT_SYMBOL(local_dcache_page_flush);
+
+void local_icache_page_inv(struct page *page)
+{
+	cache_loop(page, SPR_ICBIR);
+}
+EXPORT_SYMBOL(local_icache_page_inv);
+
+void update_cache(struct vm_area_struct *vma, unsigned long address,
+	pte_t *pte)
+{
+	unsigned long pfn = pte_val(*pte) >> PAGE_SHIFT;
+	struct page *page = pfn_to_page(pfn);
+	int dirty = !test_and_set_bit(PG_dc_clean, &page->flags);
+
+	/*
+	 * Since icaches do not snoop for updated data on OpenRISC, we
+	 * must write back and invalidate any dirty pages manually. We
+	 * can skip data pages, since they will not end up in icaches.
+	 */
+	if ((vma->vm_flags & VM_EXEC) && dirty)
+		sync_icache_dcache(page);
+}
+
diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c
index e310ab499385c5432a8d18c07a8081a0b9e5a3a5..d0021dfae20ad24649f20eab77f234ea2c598a22 100644
--- a/arch/openrisc/mm/fault.c
+++ b/arch/openrisc/mm/fault.c
@@ -33,7 +33,7 @@ unsigned long pte_errors;	/* updated by do_page_fault() */
 /* __PHX__ :: - check the vmalloc_fault in do_page_fault()
  *            - also look into include/asm-or32/mmu_context.h
  */
-volatile pgd_t *current_pgd;
+volatile pgd_t *current_pgd[NR_CPUS];
 
 extern void die(char *, struct pt_regs *, long);
 
@@ -319,7 +319,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long address,
 
 		phx_mmu("vmalloc_fault");
 */
-		pgd = (pgd_t *)current_pgd + offset;
+		pgd = (pgd_t *)current_pgd[smp_processor_id()] + offset;
 		pgd_k = init_mm.pgd + offset;
 
 		/* Since we're two-level, we don't need to do both
diff --git a/arch/openrisc/mm/init.c b/arch/openrisc/mm/init.c
index f67d82b9d22fb10ee719882c2389dc7a40ca2843..6972d5d6f23f7343306c750945a22862e1f9e46e 100644
--- a/arch/openrisc/mm/init.c
+++ b/arch/openrisc/mm/init.c
@@ -147,7 +147,7 @@ void __init paging_init(void)
 	 * (even if it is most probably not used until the next
 	 *  switch_mm)
 	 */
-	current_pgd = init_mm.pgd;
+	current_pgd[smp_processor_id()] = init_mm.pgd;
 
 	end = (unsigned long)__va(max_low_pfn * PAGE_SIZE);
 
diff --git a/arch/openrisc/mm/tlb.c b/arch/openrisc/mm/tlb.c
index 683bd4d31c7cb021080c30c73f81ecd1e878b078..6c253a2e86bc4e1a2cba5e509f09fdbe5c148635 100644
--- a/arch/openrisc/mm/tlb.c
+++ b/arch/openrisc/mm/tlb.c
@@ -49,7 +49,7 @@
  *
  */
 
-void flush_tlb_all(void)
+void local_flush_tlb_all(void)
 {
 	int i;
 	unsigned long num_tlb_sets;
@@ -86,7 +86,7 @@ void flush_tlb_all(void)
 #define flush_itlb_page_no_eir(addr) \
 	mtspr_off(SPR_ITLBMR_BASE(0), ITLB_OFFSET(addr), 0);
 
-void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
+void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 {
 	if (have_dtlbeir)
 		flush_dtlb_page_eir(addr);
@@ -99,8 +99,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
 		flush_itlb_page_no_eir(addr);
 }
 
-void flush_tlb_range(struct vm_area_struct *vma,
-		     unsigned long start, unsigned long end)
+void local_flush_tlb_range(struct vm_area_struct *vma,
+			   unsigned long start, unsigned long end)
 {
 	int addr;
 	bool dtlbeir;
@@ -129,13 +129,13 @@ void flush_tlb_range(struct vm_area_struct *vma,
  * This should be changed to loop over over mm and call flush_tlb_range.
  */
 
-void flush_tlb_mm(struct mm_struct *mm)
+void local_flush_tlb_mm(struct mm_struct *mm)
 {
 
 	/* Was seeing bugs with the mm struct passed to us. Scrapped most of
 	   this function. */
 	/* Several architctures do this */
-	flush_tlb_all();
+	local_flush_tlb_all();
 }
 
 /* called in schedule() just before actually doing the switch_to */
@@ -149,14 +149,14 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 	 * might be invalid at points where we still need to derefer
 	 * the pgd.
 	 */
-	current_pgd = next->pgd;
+	current_pgd[smp_processor_id()] = next->pgd;
 
 	/* We don't have context support implemented, so flush all
 	 * entries belonging to previous map
 	 */
 
 	if (prev != next)
-		flush_tlb_mm(prev);
+		local_flush_tlb_mm(prev);
 
 }
 
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index 9d8a1dd2e2c25a8f3bca727f795a8b1fc7de858d..a2ca82f6c2ddd3d9c60017b83b988a943ece510b 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -151,6 +151,9 @@ config CLPS711X_IRQCHIP
 	select SPARSE_IRQ
 	default y
 
+config OMPIC
+	bool
+
 config OR1K_PIC
 	bool
 	select IRQ_DOMAIN
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index b842dfdc903f1f9649a27a907157a049e1cde4a2..046df81c402ad1fd729217a7c5f3e0b23064882b 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_DW_APB_ICTL)		+= irq-dw-apb-ictl.o
 obj-$(CONFIG_METAG)			+= irq-metag-ext.o
 obj-$(CONFIG_METAG_PERFCOUNTER_IRQS)	+= irq-metag.o
 obj-$(CONFIG_CLPS711X_IRQCHIP)		+= irq-clps711x.o
+obj-$(CONFIG_OMPIC)			+= irq-ompic.o
 obj-$(CONFIG_OR1K_PIC)			+= irq-or1k-pic.o
 obj-$(CONFIG_ORION_IRQCHIP)		+= irq-orion.o
 obj-$(CONFIG_OMAP_IRQCHIP)		+= irq-omap-intc.o
diff --git a/drivers/irqchip/irq-ompic.c b/drivers/irqchip/irq-ompic.c
new file mode 100644
index 0000000000000000000000000000000000000000..cf6d0c4555188b7f43e367f1623316d85b553cf8
--- /dev/null
+++ b/drivers/irqchip/irq-ompic.c
@@ -0,0 +1,202 @@
+/*
+ * Open Multi-Processor Interrupt Controller driver
+ *
+ * Copyright (C) 2014 Stefan Kristiansson <stefan.kristiansson@saunalahti.fi>
+ * Copyright (C) 2017 Stafford Horne <shorne@gmail.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ * The ompic device handles IPI communication between cores in multi-core
+ * OpenRISC systems.
+ *
+ * Registers
+ *
+ * For each CPU the ompic has 2 registers. The control register for sending
+ * and acking IPIs and the status register for receiving IPIs. The register
+ * layouts are as follows:
+ *
+ *  Control register
+ *  +---------+---------+----------+---------+
+ *  | 31      | 30      | 29 .. 16 | 15 .. 0 |
+ *  ----------+---------+----------+----------
+ *  | IRQ ACK | IRQ GEN | DST CORE | DATA    |
+ *  +---------+---------+----------+---------+
+ *
+ *  Status register
+ *  +----------+-------------+----------+---------+
+ *  | 31       | 30          | 29 .. 16 | 15 .. 0 |
+ *  -----------+-------------+----------+---------+
+ *  | Reserved | IRQ Pending | SRC CORE | DATA    |
+ *  +----------+-------------+----------+---------+
+ *
+ * Architecture
+ *
+ * - The ompic generates a level interrupt to the CPU PIC when a message is
+ *   ready.  Messages are delivered via the memory bus.
+ * - The ompic does not have any interrupt input lines.
+ * - The ompic is wired to the same irq line on each core.
+ * - Devices are wired to the same irq line on each core.
+ *
+ *   +---------+                         +---------+
+ *   | CPU     |                         | CPU     |
+ *   |  Core 0 |<==\ (memory access) /==>|  Core 1 |
+ *   |  [ PIC ]|   |                 |   |  [ PIC ]|
+ *   +----^-^--+   |                 |   +----^-^--+
+ *        | |      v                 v        | |
+ *   <====|=|=================================|=|==> (memory bus)
+ *        | |      ^                  ^       | |
+ *  (ipi  | +------|---------+--------|-------|-+ (device irq)
+ *   irq  |        |         |        |       |
+ *  core0)| +------|---------|--------|-------+ (ipi irq core1)
+ *        | |      |         |        |
+ *   +----o-o-+    |    +--------+    |
+ *   | ompic  |<===/    | Device |<===/
+ *   |  IPI   |         +--------+
+ *   +--------+*
+ *
+ */
+
+#include <linux/io.h>
+#include <linux/ioport.h>
+#include <linux/interrupt.h>
+#include <linux/smp.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_address.h>
+
+#include <linux/irqchip.h>
+
+#define OMPIC_CPUBYTES		8
+#define OMPIC_CTRL(cpu)		(0x0 + (cpu * OMPIC_CPUBYTES))
+#define OMPIC_STAT(cpu)		(0x4 + (cpu * OMPIC_CPUBYTES))
+
+#define OMPIC_CTRL_IRQ_ACK	(1 << 31)
+#define OMPIC_CTRL_IRQ_GEN	(1 << 30)
+#define OMPIC_CTRL_DST(cpu)	(((cpu) & 0x3fff) << 16)
+
+#define OMPIC_STAT_IRQ_PENDING	(1 << 30)
+
+#define OMPIC_DATA(x)		((x) & 0xffff)
+
+DEFINE_PER_CPU(unsigned long, ops);
+
+static void __iomem *ompic_base;
+
+static inline u32 ompic_readreg(void __iomem *base, loff_t offset)
+{
+	return ioread32be(base + offset);
+}
+
+static void ompic_writereg(void __iomem *base, loff_t offset, u32 data)
+{
+	iowrite32be(data, base + offset);
+}
+
+static void ompic_raise_softirq(const struct cpumask *mask,
+				unsigned int ipi_msg)
+{
+	unsigned int dst_cpu;
+	unsigned int src_cpu = smp_processor_id();
+
+	for_each_cpu(dst_cpu, mask) {
+		set_bit(ipi_msg, &per_cpu(ops, dst_cpu));
+
+		/*
+		 * On OpenRISC the atomic set_bit() call implies a memory
+		 * barrier.  Otherwise we would need: smp_wmb(); paired
+		 * with the read in ompic_ipi_handler.
+		 */
+
+		ompic_writereg(ompic_base, OMPIC_CTRL(src_cpu),
+			       OMPIC_CTRL_IRQ_GEN |
+			       OMPIC_CTRL_DST(dst_cpu) |
+			       OMPIC_DATA(1));
+	}
+}
+
+static irqreturn_t ompic_ipi_handler(int irq, void *dev_id)
+{
+	unsigned int cpu = smp_processor_id();
+	unsigned long *pending_ops = &per_cpu(ops, cpu);
+	unsigned long ops;
+
+	ompic_writereg(ompic_base, OMPIC_CTRL(cpu), OMPIC_CTRL_IRQ_ACK);
+	while ((ops = xchg(pending_ops, 0)) != 0) {
+
+		/*
+		 * On OpenRISC the atomic xchg() call implies a memory
+		 * barrier.  Otherwise we may need an smp_rmb(); paired
+		 * with the write in ompic_raise_softirq.
+		 */
+
+		do {
+			unsigned long ipi_msg;
+
+			ipi_msg = __ffs(ops);
+			ops &= ~(1UL << ipi_msg);
+
+			handle_IPI(ipi_msg);
+		} while (ops);
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int __init ompic_of_init(struct device_node *node,
+				struct device_node *parent)
+{
+	struct resource res;
+	int irq;
+	int ret;
+
+	/* Validate the DT */
+	if (ompic_base) {
+		pr_err("ompic: duplicate ompic's are not supported");
+		return -EEXIST;
+	}
+
+	if (of_address_to_resource(node, 0, &res)) {
+		pr_err("ompic: reg property requires an address and size");
+		return -EINVAL;
+	}
+
+	if (resource_size(&res) < (num_possible_cpus() * OMPIC_CPUBYTES)) {
+		pr_err("ompic: reg size, currently %d must be at least %d",
+			resource_size(&res),
+			(num_possible_cpus() * OMPIC_CPUBYTES));
+		return -EINVAL;
+	}
+
+	/* Setup the device */
+	ompic_base = ioremap(res.start, resource_size(&res));
+	if (IS_ERR(ompic_base)) {
+		pr_err("ompic: unable to map registers");
+		return PTR_ERR(ompic_base);
+	}
+
+	irq = irq_of_parse_and_map(node, 0);
+	if (irq <= 0) {
+		pr_err("ompic: unable to parse device irq");
+		ret = -EINVAL;
+		goto out_unmap;
+	}
+
+	ret = request_irq(irq, ompic_ipi_handler, IRQF_PERCPU,
+				"ompic_ipi", NULL);
+	if (ret)
+		goto out_irq_disp;
+
+	set_smp_cross_call(ompic_raise_softirq);
+
+	return 0;
+
+out_irq_disp:
+	irq_dispose_mapping(irq);
+out_unmap:
+	iounmap(ompic_base);
+	ompic_base = NULL;
+	return ret;
+}
+IRQCHIP_DECLARE(ompic, "openrisc,ompic", ompic_of_init);