diff --git a/arch/arm/cpu/armv7/at91/cpu.c b/arch/arm/cpu/armv7/at91/cpu.c
index 2fbf60d5427bdf3145930de0a070e506fda240bb..8d86f97e3dab36be1b26af44ec9999a2c4d3a477 100644
--- a/arch/arm/cpu/armv7/at91/cpu.c
+++ b/arch/arm/cpu/armv7/at91/cpu.c
@@ -61,6 +61,8 @@ int print_cpuinfo(void)
 
 void enable_caches(void)
 {
+	icache_enable();
+	dcache_enable();
 }
 
 unsigned int get_chip_id(void)
diff --git a/arch/avr32/cpu/cache.c b/arch/avr32/cpu/cache.c
index ab0374e587b4ad478fc97d5ace0f426ad26c7745..b3ffc3348b514e90805336b4e1a308b37b84bf92 100644
--- a/arch/avr32/cpu/cache.c
+++ b/arch/avr32/cpu/cache.c
@@ -24,31 +24,31 @@ void dcache_clean_range(volatile void *start, size_t size)
 	sync_write_buffer();
 }
 
-void dcache_invalidate_range(volatile void *start, size_t size)
+void invalidate_dcache_range(unsigned long start, unsigned long stop)
 {
-	unsigned long v, begin, end, linesz;
+	unsigned long v, linesz;
 
 	linesz = CONFIG_SYS_DCACHE_LINESZ;
 
 	/* You asked for it, you got it */
-	begin = (unsigned long)start & ~(linesz - 1);
-	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+	start = start & ~(linesz - 1);
+	stop = (stop + linesz - 1) & ~(linesz - 1);
 
-	for (v = begin; v < end; v += linesz)
+	for (v = start; v < stop; v += linesz)
 		dcache_invalidate_line((void *)v);
 }
 
-void dcache_flush_range(volatile void *start, size_t size)
+void flush_dcache_range(unsigned long start, unsigned long stop)
 {
-	unsigned long v, begin, end, linesz;
+	unsigned long v, linesz;
 
 	linesz = CONFIG_SYS_DCACHE_LINESZ;
 
 	/* You asked for it, you got it */
-	begin = (unsigned long)start & ~(linesz - 1);
-	end = ((unsigned long)start + size + linesz - 1) & ~(linesz - 1);
+	start = start & ~(linesz - 1);
+	stop = (stop + linesz - 1) & ~(linesz - 1);
 
-	for (v = begin; v < end; v += linesz)
+	for (v = start; v < stop; v += linesz)
 		dcache_flush_line((void *)v);
 
 	sync_write_buffer();
diff --git a/arch/avr32/include/asm/arch-at32ap700x/cacheflush.h b/arch/avr32/include/asm/arch-at32ap700x/cacheflush.h
index 13d6d3aed366d875357f37e171ae315d63b52bd7..e08cd9de6d22687d22d1cde3872efb74f3f07271 100644
--- a/arch/avr32/include/asm/arch-at32ap700x/cacheflush.h
+++ b/arch/avr32/include/asm/arch-at32ap700x/cacheflush.h
@@ -49,9 +49,7 @@ static inline void icache_invalidate_line(volatile void *vaddr)
  * Applies the above functions on all lines that are touched by the
  * specified virtual address range.
  */
-void dcache_invalidate_range(volatile void *start, size_t len);
 void dcache_clean_range(volatile void *start, size_t len);
-void dcache_flush_range(volatile void *start, size_t len);
 void icache_invalidate_range(volatile void *start, size_t len);
 
 static inline void dcache_flush_unlocked(void)
diff --git a/arch/avr32/include/asm/dma-mapping.h b/arch/avr32/include/asm/dma-mapping.h
index 95ea81ff5e624f7b2d11edf9293afe8b164b0518..dbdd2fee3806c32e7c8a48597d0b603882cd7564 100644
--- a/arch/avr32/include/asm/dma-mapping.h
+++ b/arch/avr32/include/asm/dma-mapping.h
@@ -23,13 +23,15 @@ static inline unsigned long dma_map_single(volatile void *vaddr, size_t len,
 
 	switch (dir) {
 	case DMA_BIDIRECTIONAL:
-		dcache_flush_range(vaddr, len);
+		flush_dcache_range((unsigned long)vaddr,
+				   (unsigned long)vaddr + len);
 		break;
 	case DMA_TO_DEVICE:
 		dcache_clean_range(vaddr, len);
 		break;
 	case DMA_FROM_DEVICE:
-		dcache_invalidate_range(vaddr, len);
+		invalidate_dcache_range((unsigned long)vaddr,
+					(unsigned long)vaddr + len);
 		break;
 	default:
 		/* This will cause a linker error */
diff --git a/arch/avr32/lib/board.c b/arch/avr32/lib/board.c
index 7680102f523f2f8ba5ea090d08d7fad4235a0bd1..bf0997f98dc3802626418d190bb3cec2e165992c 100644
--- a/arch/avr32/lib/board.c
+++ b/arch/avr32/lib/board.c
@@ -65,8 +65,8 @@ static void dma_alloc_init(void)
 	printf("DMA: Using memory from 0x%08lx to 0x%08lx\n",
 	       dma_alloc_start, dma_alloc_end);
 
-	dcache_invalidate_range(cached(dma_alloc_start),
-				dma_alloc_end - dma_alloc_start);
+	invalidate_dcache_range((unsigned long)cached(dma_alloc_start),
+				dma_alloc_end);
 }
 
 void *dma_alloc_coherent(size_t len, unsigned long *handle)
diff --git a/drivers/net/macb.c b/drivers/net/macb.c
index 781a272cff2edec4650025c5c3b94515186e0f41..01a94a4c4d117795fc6b2f42b8da701974b7e184 100644
--- a/drivers/net/macb.c
+++ b/drivers/net/macb.c
@@ -40,17 +40,21 @@
 
 #include "macb.h"
 
-#define CONFIG_SYS_MACB_RX_BUFFER_SIZE		4096
-#define CONFIG_SYS_MACB_RX_RING_SIZE		(CONFIG_SYS_MACB_RX_BUFFER_SIZE / 128)
-#define CONFIG_SYS_MACB_TX_RING_SIZE		16
-#define CONFIG_SYS_MACB_TX_TIMEOUT		1000
-#define CONFIG_SYS_MACB_AUTONEG_TIMEOUT	5000000
+#define MACB_RX_BUFFER_SIZE		4096
+#define MACB_RX_RING_SIZE		(MACB_RX_BUFFER_SIZE / 128)
+#define MACB_TX_RING_SIZE		16
+#define MACB_TX_TIMEOUT		1000
+#define MACB_AUTONEG_TIMEOUT	5000000
 
 struct macb_dma_desc {
 	u32	addr;
 	u32	ctrl;
 };
 
+#define DMA_DESC_BYTES(n)	(n * sizeof(struct macb_dma_desc))
+#define MACB_TX_DMA_DESC_SIZE	(DMA_DESC_BYTES(MACB_TX_RING_SIZE))
+#define MACB_RX_DMA_DESC_SIZE	(DMA_DESC_BYTES(MACB_RX_RING_SIZE))
+
 #define RXADDR_USED		0x00000001
 #define RXADDR_WRAP		0x00000002
 
@@ -170,7 +174,7 @@ int macb_miiphy_read(const char *devname, u8 phy_adr, u8 reg, u16 *value)
 	struct eth_device *dev = eth_get_dev_by_name(devname);
 	struct macb_device *macb = to_macb(dev);
 
-	if ( macb->phy_addr != phy_adr )
+	if (macb->phy_addr != phy_adr)
 		return -1;
 
 	arch_get_mdio_control(devname);
@@ -184,7 +188,7 @@ int macb_miiphy_write(const char *devname, u8 phy_adr, u8 reg, u16 value)
 	struct eth_device *dev = eth_get_dev_by_name(devname);
 	struct macb_device *macb = to_macb(dev);
 
-	if ( macb->phy_addr != phy_adr )
+	if (macb->phy_addr != phy_adr)
 		return -1;
 
 	arch_get_mdio_control(devname);
@@ -194,6 +198,39 @@ int macb_miiphy_write(const char *devname, u8 phy_adr, u8 reg, u16 value)
 }
 #endif
 
+#define RX	1
+#define TX	0
+static inline void macb_invalidate_ring_desc(struct macb_device *macb, bool rx)
+{
+	if (rx)
+		invalidate_dcache_range(macb->rx_ring_dma, macb->rx_ring_dma +
+			MACB_RX_DMA_DESC_SIZE);
+	else
+		invalidate_dcache_range(macb->tx_ring_dma, macb->tx_ring_dma +
+			MACB_TX_DMA_DESC_SIZE);
+}
+
+static inline void macb_flush_ring_desc(struct macb_device *macb, bool rx)
+{
+	if (rx)
+		flush_dcache_range(macb->rx_ring_dma, macb->rx_ring_dma +
+			MACB_RX_DMA_DESC_SIZE);
+	else
+		flush_dcache_range(macb->tx_ring_dma, macb->tx_ring_dma +
+			MACB_TX_DMA_DESC_SIZE);
+}
+
+static inline void macb_flush_rx_buffer(struct macb_device *macb)
+{
+	flush_dcache_range(macb->rx_buffer_dma, macb->rx_buffer_dma +
+				MACB_RX_BUFFER_SIZE);
+}
+
+static inline void macb_invalidate_rx_buffer(struct macb_device *macb)
+{
+	invalidate_dcache_range(macb->rx_buffer_dma, macb->rx_buffer_dma +
+				MACB_RX_BUFFER_SIZE);
+}
 
 #if defined(CONFIG_CMD_NET)
 
@@ -208,23 +245,28 @@ static int macb_send(struct eth_device *netdev, void *packet, int length)
 
 	ctrl = length & TXBUF_FRMLEN_MASK;
 	ctrl |= TXBUF_FRAME_END;
-	if (tx_head == (CONFIG_SYS_MACB_TX_RING_SIZE - 1)) {
+	if (tx_head == (MACB_TX_RING_SIZE - 1)) {
 		ctrl |= TXBUF_WRAP;
 		macb->tx_head = 0;
-	} else
+	} else {
 		macb->tx_head++;
+	}
 
 	macb->tx_ring[tx_head].ctrl = ctrl;
 	macb->tx_ring[tx_head].addr = paddr;
 	barrier();
+	macb_flush_ring_desc(macb, TX);
+	/* Do we need check paddr and length is dcache line aligned? */
+	flush_dcache_range(paddr, paddr + length);
 	macb_writel(macb, NCR, MACB_BIT(TE) | MACB_BIT(RE) | MACB_BIT(TSTART));
 
 	/*
 	 * I guess this is necessary because the networking core may
 	 * re-use the transmit buffer as soon as we return...
 	 */
-	for (i = 0; i <= CONFIG_SYS_MACB_TX_TIMEOUT; i++) {
+	for (i = 0; i <= MACB_TX_TIMEOUT; i++) {
 		barrier();
+		macb_invalidate_ring_desc(macb, TX);
 		ctrl = macb->tx_ring[tx_head].ctrl;
 		if (ctrl & TXBUF_USED)
 			break;
@@ -233,7 +275,7 @@ static int macb_send(struct eth_device *netdev, void *packet, int length)
 
 	dma_unmap_single(packet, length, paddr);
 
-	if (i <= CONFIG_SYS_MACB_TX_TIMEOUT) {
+	if (i <= MACB_TX_TIMEOUT) {
 		if (ctrl & TXBUF_UNDERRUN)
 			printf("%s: TX underrun\n", netdev->name);
 		if (ctrl & TXBUF_EXHAUSTED)
@@ -253,10 +295,12 @@ static void reclaim_rx_buffers(struct macb_device *macb,
 	unsigned int i;
 
 	i = macb->rx_tail;
+
+	macb_invalidate_ring_desc(macb, RX);
 	while (i > new_tail) {
 		macb->rx_ring[i].addr &= ~RXADDR_USED;
 		i++;
-		if (i > CONFIG_SYS_MACB_RX_RING_SIZE)
+		if (i > MACB_RX_RING_SIZE)
 			i = 0;
 	}
 
@@ -266,6 +310,7 @@ static void reclaim_rx_buffers(struct macb_device *macb,
 	}
 
 	barrier();
+	macb_flush_ring_desc(macb, RX);
 	macb->rx_tail = new_tail;
 }
 
@@ -279,6 +324,8 @@ static int macb_recv(struct eth_device *netdev)
 	u32 status;
 
 	for (;;) {
+		macb_invalidate_ring_desc(macb, RX);
+
 		if (!(macb->rx_ring[rx_tail].addr & RXADDR_USED))
 			return -1;
 
@@ -292,10 +339,12 @@ static int macb_recv(struct eth_device *netdev)
 		if (status & RXBUF_FRAME_END) {
 			buffer = macb->rx_buffer + 128 * macb->rx_tail;
 			length = status & RXBUF_FRMLEN_MASK;
+
+			macb_invalidate_rx_buffer(macb);
 			if (wrapped) {
 				unsigned int headlen, taillen;
 
-				headlen = 128 * (CONFIG_SYS_MACB_RX_RING_SIZE
+				headlen = 128 * (MACB_RX_RING_SIZE
 						 - macb->rx_tail);
 				taillen = length - headlen;
 				memcpy((void *)NetRxPackets[0],
@@ -306,11 +355,11 @@ static int macb_recv(struct eth_device *netdev)
 			}
 
 			NetReceive(buffer, length);
-			if (++rx_tail >= CONFIG_SYS_MACB_RX_RING_SIZE)
+			if (++rx_tail >= MACB_RX_RING_SIZE)
 				rx_tail = 0;
 			reclaim_rx_buffers(macb, rx_tail);
 		} else {
-			if (++rx_tail >= CONFIG_SYS_MACB_RX_RING_SIZE) {
+			if (++rx_tail >= MACB_RX_RING_SIZE) {
 				wrapped = 1;
 				rx_tail = 0;
 			}
@@ -333,7 +382,7 @@ static void macb_phy_reset(struct macb_device *macb)
 	macb_mdio_write(macb, MII_BMCR, (BMCR_ANENABLE
 					 | BMCR_ANRESTART));
 
-	for (i = 0; i < CONFIG_SYS_MACB_AUTONEG_TIMEOUT / 100; i++) {
+	for (i = 0; i < MACB_AUTONEG_TIMEOUT / 100; i++) {
 		status = macb_mdio_read(macb, MII_BMSR);
 		if (status & BMSR_ANEGCOMPLETE)
 			break;
@@ -385,9 +434,8 @@ static int macb_phy_init(struct macb_device *macb)
 	arch_get_mdio_control(netdev->name);
 #ifdef CONFIG_MACB_SEARCH_PHY
 	/* Auto-detect phy_addr */
-	if (!macb_phy_find(macb)) {
+	if (!macb_phy_find(macb))
 		return 0;
-	}
 #endif /* CONFIG_MACB_SEARCH_PHY */
 
 	/* Check if the PHY is up to snuff... */
@@ -414,7 +462,7 @@ static int macb_phy_init(struct macb_device *macb)
 		/* Try to re-negotiate if we don't have link already. */
 		macb_phy_reset(macb);
 
-		for (i = 0; i < CONFIG_SYS_MACB_AUTONEG_TIMEOUT / 100; i++) {
+		for (i = 0; i < MACB_AUTONEG_TIMEOUT / 100; i++) {
 			status = macb_mdio_read(macb, MII_BMSR);
 			if (status & BMSR_LSTATUS)
 				break;
@@ -499,21 +547,28 @@ static int macb_init(struct eth_device *netdev, bd_t *bd)
 
 	/* initialize DMA descriptors */
 	paddr = macb->rx_buffer_dma;
-	for (i = 0; i < CONFIG_SYS_MACB_RX_RING_SIZE; i++) {
-		if (i == (CONFIG_SYS_MACB_RX_RING_SIZE - 1))
+	for (i = 0; i < MACB_RX_RING_SIZE; i++) {
+		if (i == (MACB_RX_RING_SIZE - 1))
 			paddr |= RXADDR_WRAP;
 		macb->rx_ring[i].addr = paddr;
 		macb->rx_ring[i].ctrl = 0;
 		paddr += 128;
 	}
-	for (i = 0; i < CONFIG_SYS_MACB_TX_RING_SIZE; i++) {
+	macb_flush_ring_desc(macb, RX);
+	macb_flush_rx_buffer(macb);
+
+	for (i = 0; i < MACB_TX_RING_SIZE; i++) {
 		macb->tx_ring[i].addr = 0;
-		if (i == (CONFIG_SYS_MACB_TX_RING_SIZE - 1))
+		if (i == (MACB_TX_RING_SIZE - 1))
 			macb->tx_ring[i].ctrl = TXBUF_USED | TXBUF_WRAP;
 		else
 			macb->tx_ring[i].ctrl = TXBUF_USED;
 	}
-	macb->rx_tail = macb->tx_head = macb->tx_tail = 0;
+	macb_flush_ring_desc(macb, TX);
+
+	macb->rx_tail = 0;
+	macb->tx_head = 0;
+	macb->tx_tail = 0;
 
 	macb_writel(macb, RBQP, macb->rx_ring_dma);
 	macb_writel(macb, TBQP, macb->tx_ring_dma);
@@ -654,15 +709,15 @@ int macb_eth_initialize(int id, void *regs, unsigned int phy_addr)
 
 	netdev = &macb->netdev;
 
-	macb->rx_buffer = dma_alloc_coherent(CONFIG_SYS_MACB_RX_BUFFER_SIZE,
+	macb->rx_buffer = dma_alloc_coherent(MACB_RX_BUFFER_SIZE,
 					     &macb->rx_buffer_dma);
-	macb->rx_ring = dma_alloc_coherent(CONFIG_SYS_MACB_RX_RING_SIZE
-					   * sizeof(struct macb_dma_desc),
+	macb->rx_ring = dma_alloc_coherent(MACB_RX_DMA_DESC_SIZE,
 					   &macb->rx_ring_dma);
-	macb->tx_ring = dma_alloc_coherent(CONFIG_SYS_MACB_TX_RING_SIZE
-					   * sizeof(struct macb_dma_desc),
+	macb->tx_ring = dma_alloc_coherent(MACB_TX_DMA_DESC_SIZE,
 					   &macb->tx_ring_dma);
 
+	/* TODO: we need check the rx/tx_ring_dma is dcache line aligned */
+
 	macb->regs = regs;
 	macb->phy_addr = phy_addr;
 
diff --git a/drivers/video/atmel_hlcdfb.c b/drivers/video/atmel_hlcdfb.c
index bb4d7d8c1471ad9ca79b203ad4cc7cc03c673c01..935ae42a9c873904ff8e7a174332438fe5d7d34c 100644
--- a/drivers/video/atmel_hlcdfb.c
+++ b/drivers/video/atmel_hlcdfb.c
@@ -171,6 +171,9 @@ void lcd_ctrl_init(void *lcdbase)
 			| LCDC_BASECTRL_DMAIEN | LCDC_BASECTRL_DFETCH;
 	desc->next = (u32)desc;
 
+	/* Flush the DMA descriptor if we enabled dcache */
+	flush_dcache_range((u32)desc, (u32)desc + sizeof(*desc));
+
 	lcdc_writel(&regs->lcdc_baseaddr, desc->address);
 	lcdc_writel(&regs->lcdc_basectrl, desc->control);
 	lcdc_writel(&regs->lcdc_basenext, desc->next);
@@ -194,4 +197,7 @@ void lcd_ctrl_init(void *lcdbase)
 	lcdc_writel(&regs->lcdc_lcden, value | LCDC_LCDEN_PWMEN);
 	while (!(lcdc_readl(&regs->lcdc_lcdsr) & LCDC_LCDSR_PWMSTS))
 		udelay(1);
+
+	/* Enable flushing if we enabled dcache */
+	lcd_set_flush_dcache(1);
 }
diff --git a/include/configs/ethernut5.h b/include/configs/ethernut5.h
index c81fc44b121c5e28db83dea0642b5892a7799b18..4c69af6af3f30da2d60994a3559e95426b8e3468 100644
--- a/include/configs/ethernut5.h
+++ b/include/configs/ethernut5.h
@@ -12,6 +12,8 @@
 
 #include <asm/hardware.h>
 
+#define CONFIG_SYS_GENERIC_BOARD
+
 /* The first stage boot loader expects u-boot running at this address. */
 #define CONFIG_SYS_TEXT_BASE	0x27000000	/* 16MB available */