diff --git a/libv4l-rockchip/libvpu/vp8_enc/encasiccontroller_v2.c b/libv4l-rockchip/libvpu/vp8_enc/encasiccontroller_v2.c
index a5a090afb1c864e18b0ec4ed183847f2650d876a..74468eec08841cba2bf7169c26a612d3ee9c5152 100644
--- a/libv4l-rockchip/libvpu/vp8_enc/encasiccontroller_v2.c
+++ b/libv4l-rockchip/libvpu/vp8_enc/encasiccontroller_v2.c
@@ -65,7 +65,9 @@ int32_t VP8_EncAsicMemAlloc_V2(asicData_s* asic, uint32_t width, uint32_t height
 
   memset(asic->segmentMap.vir_addr, 0, asic->segmentMap.size);
 
-  asic->frmhdr = (uint8_t*)(((int)asic->hdr + 7) & (~7));
+  /* aligned to 8 bytes. we use unsigned long here for ILP32 and LP64 */
+  asic->frmhdr = (uint8_t*)ALIGN((unsigned long)asic->hdr, 8);
+
   asic->frmHdrBufLen = FRAME_HEADER_SIZE;
 
   return ENCHW_OK;
diff --git a/libv4l-rockchip/libvpu/vp8_enc/enccommon.h b/libv4l-rockchip/libvpu/vp8_enc/enccommon.h
index dc9eeb52285d4aa3562e1533a82837bea4310665..aec74d4697d84a3a422cf72ca1908535b96b9c7d 100644
--- a/libv4l-rockchip/libvpu/vp8_enc/enccommon.h
+++ b/libv4l-rockchip/libvpu/vp8_enc/enccommon.h
@@ -40,4 +40,6 @@ typedef enum
 #define SIGN(a)         ((a) < (0) ? (-1) : (1))
 #define CLIP3(v, min, max)  ((v) < (min) ? (min) : ((v) > (max) ? (max) : (v)))
 
+#define ALIGN(x, a)      (((x) + (a) - 1) & ~((a) - 1))
+
 #endif
diff --git a/libv4l-rockchip/libvpu/vp8_enc/vp8codeframe.c b/libv4l-rockchip/libvpu/vp8_enc/vp8codeframe.c
index 7b62b357f3e03ba138e1641cad685e53fc39396e..e649d6c0c3d768eb8422704e77ae49e41cf0bd48 100644
--- a/libv4l-rockchip/libvpu/vp8_enc/vp8codeframe.c
+++ b/libv4l-rockchip/libvpu/vp8_enc/vp8codeframe.c
@@ -128,25 +128,28 @@ void VP8SetNewFrame(vp8Instance_s* inst) {
 
   /* header remainder is byte aligned, max 7 bytes = 56 bits */
   if (regs->firstFreeBit != 0) {
-    /* 64-bit aligned stream pointer */
-    uint8_t* pTmp = (uint8_t*)((size_t)(inst->buffer[1].data) & (uint32_t)(~0x07));
     uint32_t val;
 
-    /* Clear remaining bits */
-    for (val = 6; val >= regs->firstFreeBit / 8; val--)
-      pTmp[val] = 0;
+    /* firstFreeBit is less than 8 bytes, so 8 bytes is enough */
+    uint8_t buf[8];
 
-    val = pTmp[0] << 24;
-    val |= pTmp[1] << 16;
-    val |= pTmp[2] << 8;
-    val |= pTmp[3];
+    ASSERT(regs->firstFreeBit / 8 <= sizeof(buf));
+
+    memset(buf, 0, sizeof(buf));
+    memcpy(buf, inst->asic.frmhdr + regs->outputStrmBase,
+		    regs->firstFreeBit / 8);
+
+    val = buf[0] << 24;
+    val |= buf[1] << 16;
+    val |= buf[2] << 8;
+    val |= buf[3];
 
     regs->strmStartMSB = val;  /* 32 bits to MSB */
 
     if (regs->firstFreeBit > 32) {
-      val = pTmp[4] << 24;
-      val |= pTmp[5] << 16;
-      val |= pTmp[6] << 8;
+      val = buf[4] << 24;
+      val |= buf[5] << 16;
+      val |= buf[6] << 8;
 
       regs->strmStartLSB = val;
     } else
diff --git a/libv4l-rockchip/libvpu/vp8_enc/vp8encapi.c b/libv4l-rockchip/libvpu/vp8_enc/vp8encapi.c
index a967d05fb4a14511d24a39f30092be2cb43c455a..27e104982fbeee3837422d6fac63ce834fc53638 100644
--- a/libv4l-rockchip/libvpu/vp8_enc/vp8encapi.c
+++ b/libv4l-rockchip/libvpu/vp8_enc/vp8encapi.c
@@ -579,21 +579,20 @@ VP8EncRet VP8EncStrmEncode(VP8EncInst inst, const VP8EncIn* pEncIn,
 
   /* Divide stream buffer for every partition */
   {
-    uint8_t* pStart = (uint8_t*)pEncInst->asic.frmhdr;
+    uint8_t* pBuf = (uint8_t*)pEncInst->asic.frmhdr;
     uint32_t bufSize = pEncInst->asic.frmHdrBufLen;
-    uint8_t* pEnd;
+    uint32_t tagSize = 0;
     int32_t status = ENCHW_OK;
 
     /* Frame tag 10 bytes (I-frame) or 3 bytes (P-frame),
      * written by SW at end of frame */
-    pEnd = pStart + 3;
-    if (ct == VP8ENC_INTRA_FRAME) pEnd += 7;
-    if (VP8SetBuffer(&pEncInst->buffer[0], pStart, pEnd - pStart) == ENCHW_NOK)
+    tagSize = (ct == VP8ENC_INTRA_FRAME) ? 10 : 3;
+
+    if (VP8SetBuffer(&pEncInst->buffer[0], pBuf, tagSize) == ENCHW_NOK)
       status = ENCHW_NOK;
 
-    pStart = pEnd;
-    pEnd = pStart + bufSize;
-    if (VP8SetBuffer(&pEncInst->buffer[1], pStart, pEnd - pStart) == ENCHW_NOK)
+    if (VP8SetBuffer(&pEncInst->buffer[1], pBuf + tagSize,
+        bufSize - tagSize) == ENCHW_NOK)
       status = ENCHW_NOK;
 
     if (status == ENCHW_NOK) {