diff --git a/libv4l-rockchip_v2/Makefile.am b/libv4l-rockchip_v2/Makefile.am index 69f02390c4f12a330cbe714d9722bf0b26948854..335e41d227a72e1c667525318f1a4560b7cfeafb 100644 --- a/libv4l-rockchip_v2/Makefile.am +++ b/libv4l-rockchip_v2/Makefile.am @@ -10,7 +10,9 @@ libv4l_encplugin_la_SOURCES = \ libv4l-encplugin-rockchip.c \ libvepu/rk_vepu.c \ libvepu/rk_vepu_debug.c \ + libvepu/common/rk_venc_rate_control.c \ libvepu/h264e/h264e.c \ + libvepu/h264e/h264e_rate_control.c \ libvepu/vp8e/vp8e.c \ libvepu/vp8e/boolhuff.c \ libvepu/vp8e/vp8e_bitstream.c \ diff --git a/libv4l-rockchip_v2/libvepu/common/rk_venc.h b/libv4l-rockchip_v2/libvepu/common/rk_venc.h index ddd329ed2c268e61016a8dd7bfb3cbbf7779f46a..caaaf829047e3c16045715f46085a1e201764c73 100644 --- a/libv4l-rockchip_v2/libvepu/common/rk_venc.h +++ b/libv4l-rockchip_v2/libvepu/common/rk_venc.h @@ -23,6 +23,8 @@ #include "../rk_vepu_interface.h" +#include "rk_venc_rate_control.h" + typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; @@ -67,6 +69,7 @@ struct rk_venc { struct rk_venc_ops *ops; struct rk_vepu_runtime_param runtime_param; + struct v4l2_plugin_rate_control rc; enum ENC_FORMAT fmt; }; diff --git a/libv4l-rockchip_v2/libvepu/common/rk_venc_rate_control.c b/libv4l-rockchip_v2/libvepu/common/rk_venc_rate_control.c new file mode 100644 index 0000000000000000000000000000000000000000..8d856a84de42b603a62c67e9efa9de5778f7451d --- /dev/null +++ b/libv4l-rockchip_v2/libvepu/common/rk_venc_rate_control.c @@ -0,0 +1,601 @@ +/* + * Copyright 2016 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "rk_venc_rate_control.h" + +#include <assert.h> +#include <memory.h> +#include <stdio.h> + +#include "rk_venc.h" +#include "libvepu/rk_vepu_debug.h" + +#define I32_MAX 2147483647 /* 2 ^ 31 - 1 */ +#define QP_DELTA 4 +#define QP_DELTA_LIMIT 10 +#define DRIFT_MAX 0x1FFFFFFF +#define DRIFT_MIN -0x1FFFFFFF + +static const int32_t h264_q_step[] = { + 3, 3, 3, 4, 4, 5, 5, 6, 7, 7, + 8, 9, 10, 11, 13, 14, 16, 18, 20, 23, + 25, 28, 32, 36, 40, 45, 51, 57, 64, 72, + 80, 90, 101, 114, 128, 144, 160, 180, 203, 228, + 256, 288, 320, 360, 405, 456, 513, 577, 640, 720, + 810, 896 +}; + +#define QINDEX_RANGE 128 +static const int32_t vp8_ac_lookup[QINDEX_RANGE] = { + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, + 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, + 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, + 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, + 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, + 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, + 137, 140, 143, 146, 149, 152, 155, 158, 161, 164, + 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, + 205, 209, 213, 217, 221, 225, 229, 234, 239, 245, + 249, 254, 259, 264, 269, 274, 279, 284 +}; + +static int32_t get_gop_avg_qp(struct v4l2_plugin_rate_control *rc) +{ + int32_t qp_aver = rc->qp_last; + + if (rc->acc_inter_qp && rc->acc_inter_cnt) + qp_aver = DIV(rc->acc_inter_qp, rc->acc_inter_cnt); + + rc->acc_inter_qp = 0; + rc->acc_inter_cnt = 0; + + return qp_aver; +} + +static int32_t get_avg_bits(struct bits_statistic *p, int32_t n) +{ + int32_t i; + int32_t sum = 0; + int32_t pos = p->pos; + + if (!p->len) + return 0; + + if (n == -1 || n > p->len) + n = p->len; + + i = n; + while (i--) { + if (pos) + pos--; + else + pos = p->len - 1; + sum += p->bits[pos]; + if (sum < 0) { + return I32_MAX / (n - i); + } + } + return DIV(sum, n); +} + +static int32_t axb_div_c(int32_t a, int32_t b, int32_t c) +{ + uint32_t left = 32; + uint32_t right = 0; + uint32_t shift; + int32_t sign = 1; + int32_t tmp; + + if (a == 0 || b == 0) + return 0; + else if ((a * b / b) == a && c != 0) + return (a * b / c); + + if (a < 0) { + sign = -1; + a = -a; + } + if (b < 0) { + sign *= -1; + b = -b; + } + if (c < 0) { + sign *= -1; + c = -c; + } + + if (c == 0) + return 0x7FFFFFFF * sign; + + if (b > a) { + tmp = b; + b = a; + a = tmp; + } + + for (--left; (((uint32_t)a << left) >> left) != (uint32_t)a; --left) + ; + + left--; + + while (((uint32_t)b >> right) > (uint32_t)c) + right++; + + if (right > left) { + return 0x7FFFFFFF * sign; + } else { + shift = left - right; + return (int32_t)((((uint32_t)a << shift) / + (uint32_t)c * (uint32_t)b) >> shift) * sign; + } +} + +static inline void reset_statistic(struct bits_statistic *p) +{ + memset(p, 0, sizeof(*p)); +} + +static inline void reset_linear_model(struct linear_model *p, int32_t qp) +{ + memset(p, 0, sizeof(*p)); + p->qp_last = qp; +} + +static void update_statitistic(struct bits_statistic *p, int32_t bits) +{ + const int32_t clen = STATISTIC_TABLE_LENGTH; + + p->bits[p->pos] = bits; + + if (++p->pos >= clen) { + p->pos = 0; + } + if (p->len < clen) { + p->len++; + } +} + +/* + * store previous intra frame bits / gop bits, and used for next gop intra frame + * bit compensation + */ +static void save_intra_frm_ratio(struct v4l2_plugin_rate_control *rc) +{ + if (rc->acc_bits_cnt) { + int32_t intra_frm_ratio = + axb_div_c(get_avg_bits(&rc->intra, 1), + rc->mb_per_pic, 256) * 100; + intra_frm_ratio = DIV(intra_frm_ratio, rc->acc_bits_cnt); + intra_frm_ratio = MIN(99, intra_frm_ratio); + + update_statitistic(&rc->gop, intra_frm_ratio); + } + rc->acc_bits_cnt = 0; +} + +static void update_pid_ctrl(struct bits_statistic *p, int32_t bits) +{ + p->len = 3; + + p->bits[0] = bits - p->bits[2]; /* Derivative */ + if ((bits > 0) && (bits + p->bits[1] > p->bits[1])) + p->bits[1] = bits + p->bits[1]; /* Integral */ + if ((bits < 0) && (bits + p->bits[1] < p->bits[1])) + p->bits[1] = bits + p->bits[1]; /* Integral */ + p->bits[2] = bits; /* Proportional */ + VPU_PLG_DBG("P %d I %d D %d\n", p->bits[2], p->bits[1], p->bits[0]); +} + +static inline int32_t get_pid_ctrl_value(struct bits_statistic *p) +{ + return DIV(p->bits[2] * 40 + p->bits[1] * 60 + p->bits[0] * 1, 1000); +} + +/* + * according to linear formula 'R * Q * Q = b * Q + a' + * now give the target R, calculate a Qp value using + * approximation + */ +static int32_t calculate_qp_using_linear_model( + struct v4l2_plugin_rate_control *rc, + struct linear_model *model, + int64_t r) +{ + int32_t qp = model->qp_last; + int64_t estimate_r = 0; + int64_t diff = 0; + int64_t diff_min = I32_MAX; + int64_t qp_best = qp; + int32_t tmp; + + VPU_PLG_DBG("a %lld b %lld\n", model->a, model->b); + + if (model->b == 0 && model->a == 0) { + return qp_best; + } + + if (r <= 0) { + qp = CLIP3(qp_best + QP_DELTA, rc->qp_min, rc->qp_max); + return qp; + } + + do { + int64_t qstep = rc->qstep[qp]; + estimate_r = + DIV(model->b, qstep) + DIV(model->a, qstep * qstep); + diff = estimate_r - r; + if (ABS(diff) < diff_min) { + diff_min = ABS(diff); + qp_best = qp; + if (diff > 0) { + qp++; + } else { + qp--; + } + } else { + break; + } + } while (qp <= rc->qp_max && qp >= rc->qp_min); + + tmp = qp_best - model->qp_last; + if (tmp > QP_DELTA) { + qp_best = model->qp_last + QP_DELTA; + /* + * when there is a bit gap between requirement and actual bits, + * delta qp cannot quickly catch the requirement. + */ + if (tmp > QP_DELTA_LIMIT) + qp_best = model->qp_last + QP_DELTA * 2; + } else if (tmp < -QP_DELTA) { + qp_best = model->qp_last - QP_DELTA; + } + + model->qp_last = qp_best; + + return qp_best; +} + +/* determine qp for current picture */ +void calculate_pic_qp(struct v4l2_plugin_rate_control *rc) +{ + int32_t target_bits; + int32_t norm_bits; + + if (rc->pic_rc_en != true) { + rc->qp = rc->qp_fixed; + return; + } + + if (rc->cur_frmtype == INTRA_FRAME) { + /* + * when there are no intra statistic information, we calcuate + * intra qp using previous gop inter frame average qp. + */ + rc->qp = get_gop_avg_qp(rc); + save_intra_frm_ratio(rc); + /* + * if all frames are intra we calculate qp + * using intra frame statistic info. + */ + if (rc->pre_frmtype == INTRA_FRAME) { + target_bits = rc->target_bits - + get_pid_ctrl_value(&rc->pid_intra); + + norm_bits = axb_div_c(target_bits, 256, rc->mb_per_pic); + rc->qp = calculate_qp_using_linear_model(rc, + &rc->intra_frames, + norm_bits); + } + } else { + /* + * calculate qp by matching to previous + * inter frames R-Q curve + */ + target_bits = rc->target_bits - + get_pid_ctrl_value(&rc->pid_inter); + + norm_bits = axb_div_c(target_bits, 256, rc->mb_per_pic); + rc->qp = calculate_qp_using_linear_model(rc, &rc->inter_frames, + norm_bits); + } +} + +static void store_linear_x_y(struct linear_model *model, int32_t r, int32_t qstep) +{ + model->qp[model->i] = qstep; + model->r[model->i] = r; + model->y[model->i] = r * qstep * qstep; + + model->n++; + model->n = MIN(model->n, LINEAR_MODEL_STATISTIC_COUNT); + + model->i++; + model->i %= LINEAR_MODEL_STATISTIC_COUNT; +} + +/* + * This function want to calculate coefficient 'b' 'a' using ordinary + * least square. + * y = b * x + a + * b_n = accumulate(x * y) - n * (average(x) * average(y)) + * a_n = accumulate(x * x) * accumulate(y) - accumulate(x) * accumulate(x * y) + * denom = accumulate(x * x) - n * (square(average(x)) + * b = b_n / denom + * a = a_n / denom + */ +static void calculate_linear_coefficient(struct linear_model *model) +{ + int i = 0; + int n; + int64_t acc_xy = 0; + int64_t acc_x = 0; + int64_t acc_y = 0; + int64_t acc_sq_x = 0; + + int64_t b_num = 0; + int64_t denom = 0; + + int64_t *x = model->qp; + int64_t *y = model->y; + + n = model->n; + i = n; + + while (i--) { + acc_xy += x[i] * y[i]; + acc_x += x[i]; + acc_y += y[i]; + acc_sq_x += x[i] * x[i]; + } + + b_num = n * acc_xy - acc_x * acc_y; + denom = n * acc_sq_x - acc_x * acc_x; + + model->b = DIV(b_num, denom); + model->a = DIV(acc_y, n) - DIV(acc_x * model->b, n); +} + +/* + * in the beginning of rate control, we should get a estimate qp value using + * experience point. + */ +static int32_t caluate_qp_by_bits_est(int32_t bits, int32_t pels, + const int32_t qp_tbl[2][11]) +{ + const int32_t upscale = 8000; + int32_t i = -1; + + /* prevents overflow */ + if (bits > 1000000) + return qp_tbl[1][10]; + + /* make room for multiplication */ + pels >>= 8; + bits >>= 5; + + /* adjust the bits value for the current resolution */ + bits *= pels + 250; + assert(pels > 0); + assert(bits > 0); + bits /= 350 + (3 * pels) / 4; + bits = axb_div_c(bits, upscale, pels << 6); + + while (qp_tbl[0][++i] < bits); + + return qp_tbl[1][i]; +} + +static int32_t get_drift_bits(struct virt_buffer *vb, + int32_t time_inc) +{ + int32_t drift, target; + + /* + * saturate actual_bits, this is to prevent overflows caused by much + * greater bitrate setting than is really possible to reach. + */ + vb->actual_bits = CLIP3(vb->actual_bits, DRIFT_MIN, DRIFT_MAX); + + vb->pic_time_inc += time_inc; + vb->virt_bits_cnt += axb_div_c(vb->bit_rate, time_inc, vb->time_scale); + target = vb->virt_bits_cnt - vb->actual_bits; + + /* saturate target, prevents rc going totally out of control. + This situation should never happen. */ + target = CLIP3(target, DRIFT_MIN, DRIFT_MAX); + + /* picture time inc must be in range of [0, time_scale) */ + while (vb->pic_time_inc >= vb->time_scale) { + vb->pic_time_inc -= vb->time_scale; + vb->virt_bits_cnt -= vb->bit_rate; + vb->actual_bits -= vb->bit_rate; + } + + drift = axb_div_c(vb->bit_rate, vb->pic_time_inc, vb->time_scale); + drift -= vb->virt_bits_cnt; + vb->virt_bits_cnt += drift; + + return target; +} + +void rk_venc_recalc_parameter(struct v4l2_plugin_rate_control *rc) +{ + rc->vb.bits_per_pic = axb_div_c(rc->vb.bit_rate, + rc->fps_denom, rc->fps_num); +} + +bool rk_venc_init_pic_rc(struct v4l2_plugin_rate_control *rc, + const int32_t qp_tbl[2][11]) +{ + struct rk_venc *enc = container_of(rc, struct rk_venc, rc); + struct virt_buffer *vb = &rc->vb; + + switch (enc->fmt) { + case ENC_FORMAT_H264: + rc->qstep = h264_q_step; + rc->qstep_size = sizeof(h264_q_step) / sizeof(int32_t); + break; + case ENC_FORMAT_VP8: + rc->qstep = vp8_ac_lookup; + rc->qstep_size = sizeof(vp8_ac_lookup) / sizeof(int32_t); + break; + default: + VPU_PLG_ERR("unsupport encoder format %d\n", (int)enc->fmt); + return false; + } + + if (rc->qp == -1) { + int32_t tmp = axb_div_c(vb->bit_rate, rc->fps_denom, rc->fps_num); + rc->qp = caluate_qp_by_bits_est(tmp, rc->mb_per_pic * 16 * 16, qp_tbl); + } + + rc->qp = CLIP3(rc->qp, rc->qp_min, rc->qp_max); + + rc->cur_frmtype = INTRA_FRAME; + rc->pre_frmtype = INTER_FRAME; + + rc->qp_last = rc->qp; + rc->qp_fixed = rc->qp; + + vb->bits_per_pic = axb_div_c(vb->bit_rate, rc->fps_denom, rc->fps_num); + + reset_statistic(&rc->pid_inter); + reset_statistic(&rc->pid_intra); + reset_statistic(&rc->intra); + reset_statistic(&rc->gop); + + reset_linear_model(&rc->intra_frames, rc->qp); + reset_linear_model(&rc->inter_frames, rc->qp); + + rc->acc_inter_qp = 0; + rc->acc_inter_cnt = 0; + rc->acc_bits_cnt = 0; + + rc->window_len = rc->gop_len; + vb->window_rem = rc->gop_len; + rc->intra_interval_ctrl = rc->intra_interval = rc->gop_len; + rc->target_bits = 0; + + return true; +} + +void rk_venc_after_pic_rc(struct v4l2_plugin_rate_control *rc, + uint32_t bytes) +{ + struct virt_buffer *vb = &rc->vb; + int32_t bits = (int32_t)bytes * 8; + int32_t norm_bits = 0; + + VPU_PLG_INF("get actual bits %d\n", bits); + + rc->acc_bits_cnt += bits; + + /* store the error between target and actual frame size */ + if (rc->cur_frmtype != INTRA_FRAME) { + /* saturate the error to avoid inter frames with + * mostly intra MBs to affect too much */ + update_pid_ctrl(&rc->pid_inter, + MIN(bits - rc->target_bits, 2 * rc->target_bits)); + } else { + update_pid_ctrl(&rc->pid_intra, bits - rc->target_bits); + } + + norm_bits = axb_div_c(bits, 256, rc->mb_per_pic); + + /* update number of bits used for residual, inter or intra */ + if (rc->cur_frmtype != INTRA_FRAME) { + store_linear_x_y(&rc->inter_frames, norm_bits, rc->qstep[rc->qp]); + calculate_linear_coefficient(&rc->inter_frames); + } else { + update_statitistic(&rc->intra, norm_bits); + + store_linear_x_y(&rc->intra_frames, norm_bits, rc->qstep[rc->qp]); + calculate_linear_coefficient(&rc->intra_frames); + } + + vb->bucket_fullness += bits; + vb->actual_bits += bits; +} + +void rk_venc_before_pic_rc(struct v4l2_plugin_rate_control *rc, + uint32_t timeInc, enum FRAME_TYPE frmtype) +{ + struct virt_buffer *vb = &rc->vb; + int32_t rcWindow, intraBits = 0, tmp = 0; + + rc->cur_frmtype = frmtype; + + tmp = get_drift_bits(&rc->vb, (int32_t)timeInc); + + if (vb->window_rem == 0) { + vb->window_rem = rc->window_len - 1; + reset_statistic(&rc->pid_inter); + if (rc->cur_frmtype != rc->pre_frmtype) + reset_statistic(&rc->pid_intra); + } else { + vb->window_rem--; + } + + if (rc->cur_frmtype != INTRA_FRAME && + rc->intra_interval > 1) { + intraBits = vb->bits_per_pic * rc->intra_interval * + get_avg_bits(&rc->gop, 10) / 100; + intraBits -= vb->bits_per_pic; + intraBits /= (rc->intra_interval - 1); + intraBits = MAX(0, intraBits); + } + + /* Compensate for intra "stealing" bits from inters. */ + tmp += intraBits * (rc->intra_interval - rc->intra_interval_ctrl); + + rcWindow = MAX(1, rc->window_len); + rc->target_bits = vb->bits_per_pic - intraBits + DIV(tmp, rcWindow); + rc->target_bits = MAX(0, rc->target_bits); + + VPU_PLG_INF("require target bits %d\n", rc->target_bits); + calculate_pic_qp(rc); + + rc->qp = CLIP3(rc->qp, rc->qp_min, rc->qp_max); + rc->qp_last = rc->qp; + + if (rc->cur_frmtype == INTRA_FRAME) { + /* + * if there is not a all intra coding, we prefer a better intra + * frame to get a better psnr. + */ + if (rc->pre_frmtype != INTRA_FRAME) + rc->qp += rc->intra_qp_delta; + + rc->qp = CLIP3(rc->qp, rc->qp_min, rc->qp_max); + if (rc->intra_interval_ctrl > 1) + rc->intra_interval = rc->intra_interval_ctrl; + rc->intra_interval_ctrl = 1; + } else { + rc->acc_inter_qp += rc->qp; + rc->acc_inter_cnt++; + rc->intra_interval_ctrl++; + + if (rc->intra_interval_ctrl > rc->intra_interval) + rc->intra_interval = rc->intra_interval_ctrl; + } + + rc->pre_frmtype = rc->cur_frmtype; + + VPU_PLG_INF("get qp %d\n", rc->qp); +} diff --git a/libv4l-rockchip_v2/libvepu/common/rk_venc_rate_control.h b/libv4l-rockchip_v2/libvepu/common/rk_venc_rate_control.h new file mode 100644 index 0000000000000000000000000000000000000000..2ee5bf26e82628a158cc78c3dc86fe93c24ba5ca --- /dev/null +++ b/libv4l-rockchip_v2/libvepu/common/rk_venc_rate_control.h @@ -0,0 +1,114 @@ +/* + * Copyright 2016 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef _V4L2_PLUGIN_RK_VENC_RATE_CONTROL_H_ +#define _V4L2_PLUGIN_RK_VENC_RATE_CONTROL_H_ + +#include <inttypes.h> +#include <stdbool.h> + +#define STATISTIC_TABLE_LENGTH 10 + +enum FRAME_TYPE { + INTRA_FRAME = 0, + INTER_FRAME = 1 +}; + +struct bits_statistic { + int32_t bits[STATISTIC_TABLE_LENGTH]; + int32_t pos; + int32_t len; +}; + +#define LINEAR_MODEL_STATISTIC_COUNT 15 + +struct linear_model { + int32_t n; /* elements count */ + int32_t i; /* elements index for store */ + + int64_t b; /* coefficient */ + int64_t a; + + int64_t qp[LINEAR_MODEL_STATISTIC_COUNT]; /* x */ + int64_t r[LINEAR_MODEL_STATISTIC_COUNT]; + int64_t y[LINEAR_MODEL_STATISTIC_COUNT]; /* y = qp*qp*r */ + + int32_t qp_last; /* qp value in last calculate */ +}; + +/* Virtual buffer */ +struct virt_buffer { + int32_t bit_rate; + int32_t bits_per_pic; + int32_t pic_time_inc; + int32_t time_scale; + int32_t virt_bits_cnt; + int32_t actual_bits; + int32_t bucket_fullness; + int32_t gop_rem; + int32_t window_rem; +}; + +struct v4l2_plugin_rate_control { + bool pic_rc_en; + int32_t mb_per_pic; + enum FRAME_TYPE cur_frmtype; + enum FRAME_TYPE pre_frmtype; + int32_t qp_fixed; + int32_t qp; + int32_t qp_min; + int32_t qp_max; + int32_t qp_last; + int32_t fps_num; + int32_t fps_denom; + struct virt_buffer vb; + struct bits_statistic pid_inter; + int32_t target_bits; + int32_t acc_inter_qp; + int32_t acc_inter_cnt; + int32_t gop_len; + int32_t intra_qp_delta; + + struct bits_statistic intra; + struct bits_statistic pid_intra; + struct bits_statistic gop; + + struct linear_model intra_frames; + struct linear_model inter_frames; + + /* accumulate bits count for current gop */ + int32_t acc_bits_cnt; + + /* bitrate window which tries to match target */ + int32_t window_len; + int32_t intra_interval; + int32_t intra_interval_ctrl; + + const int32_t *qstep; + int32_t qstep_size; + + bool initiated; +}; + +void rk_venc_recalc_parameter(struct v4l2_plugin_rate_control *rc); +bool rk_venc_init_pic_rc(struct v4l2_plugin_rate_control *rc, + const int32_t qp_tbl[2][11]); +void rk_venc_after_pic_rc(struct v4l2_plugin_rate_control *rc, + uint32_t byteCnt); +void rk_venc_before_pic_rc(struct v4l2_plugin_rate_control *rc, + uint32_t timeInc, enum FRAME_TYPE frmtype); + +#endif diff --git a/libv4l-rockchip_v2/libvepu/h264e/h264e.c b/libv4l-rockchip_v2/libvepu/h264e/h264e.c index 775098d4feba13d09ddbd991c83abb42f4b3b27d..d21ec9606674a620ec3070bf6af12e5b263ec7a8 100644 --- a/libv4l-rockchip_v2/libvepu/h264e/h264e.c +++ b/libv4l-rockchip_v2/libvepu/h264e/h264e.c @@ -20,6 +20,8 @@ #include <stdio.h> #include "h264e.h" +#include "h264e_rate_control.h" +#include "../common/rk_venc_rate_control.h" #include "../rk_vepu_debug.h" const int32_t h264e_qp_tbl[2][11] = { @@ -107,6 +109,40 @@ static void h264e_init_slice(struct v4l2_plugin_h264_slice_param *slice) slice->slice_beta_offset_div2 = 0; } +static void h264e_init_rc(struct rk_venc *ictx) +{ + struct rk_h264_encoder *ctx = (struct rk_h264_encoder *)ictx; + struct mb_qpctrl *qpCtrl = &ctx->mbrc.qp_ctrl; + struct v4l2_plugin_rate_control *rc = &ictx->rc; + + memset(qpCtrl, 0, sizeof(*qpCtrl)); + + ctx->mbrc.mb_rc_en = true; + qpCtrl->check_points = MIN(ctx->sps.pic_height_in_map_units - 1, + CHECK_POINTS_MAX); + qpCtrl->chkptr_distance = + MB_PER_PIC(ctx) / (qpCtrl->check_points + 1); + + rc->pic_rc_en = true; + rc->fps_num = 30; + rc->fps_denom = 1; + rc->vb.bit_rate = 1000000; + rc->vb.actual_bits = 0; + rc->vb.time_scale = rc->fps_num; + rc->vb.virt_bits_cnt = 0; + rc->vb.bucket_fullness = 0; + rc->vb.pic_time_inc = 0; + rc->gop_len = 150; + rc->qp_min = 10; + rc->qp_max = 51; + rc->mb_per_pic = MB_PER_PIC(ctx); + rc->intra_qp_delta = -3; + rc->qp = -1; + rc->initiated = false; + + rk_venc_init_pic_rc(&ctx->venc.rc, h264e_qp_tbl); +} + static int h264e_init(struct rk_venc *ictx, struct rk_vepu_init_param *param) { @@ -135,6 +171,8 @@ static int h264e_init(struct rk_venc *ictx, ctx->sps.pic_height_in_map_units * 16 - ctx->height; } + h264e_init_rc(ictx); + if (ctx->sps.level_idc >= H264ENC_LEVEL_3_1) ctx->h264_inter4x4_disabled = 1; else @@ -156,6 +194,9 @@ static void h264e_deinit(struct rk_venc *ictx) static int h264e_begin_picture(struct rk_venc *ictx) { struct rk_h264_encoder *ctx = (struct rk_h264_encoder *)ictx; + int i; + enum FRAME_TYPE frmtype = INTER_FRAME; + int timeInc = 1; struct rk3288_h264e_reg_params *hw_info = &ctx->hw_info; if (ictx->runtime_param.keyframe_request) { @@ -170,8 +211,13 @@ static int h264e_begin_picture(struct rk_venc *ictx) ctx->slice.idr_pic_id %= 16; ctx->slice.frame_num = 0; hw_info->frame_coding_type = FRAME_CODING_TYPE_INTRA; + frmtype = INTRA_FRAME; + timeInc = 0; } + rk_venc_before_pic_rc(&ctx->venc.rc, timeInc, frmtype); + h264e_before_mb_rate_control(&ctx->mbrc); + hw_info->pic_init_qp = ctx->pps.pic_init_qp_minus26 + 26; hw_info->transform8x8_mode = ctx->pps.transform_8x8_mode_flag; hw_info->enable_cabac = ctx->pps.entropy_coding_mode_flag != 0; @@ -187,13 +233,22 @@ static int h264e_begin_picture(struct rk_venc *ictx) hw_info->slice_size_mb_rows = ctx->slice_size_mb_rows; hw_info->cabac_init_idc = ctx->slice.cabac_init_idc; - hw_info->qp = 30; + hw_info->qp = ctx->venc.rc.qp; hw_info->mad_qp_delta = 0; hw_info->mad_threshold = 0; - hw_info->qp_min = 10; - hw_info->qp_max = 51; - hw_info->cp_distance_mbs = 0; + hw_info->qp_min = ctx->venc.rc.qp_min; + hw_info->qp_max = ctx->venc.rc.qp_max; + hw_info->cp_distance_mbs = ctx->mbrc.qp_ctrl.chkptr_distance; + + for (i = 0; i < ctx->mbrc.qp_ctrl.check_points; i++) { + hw_info->cp_target[i] = ctx->mbrc.qp_ctrl.word_cnt_target[i]; + } + + for (i = 0; i < CTRL_LEVELS; i++) { + hw_info->target_error[i] = ctx->mbrc.qp_ctrl.word_error[i]; + hw_info->delta_qp[i] = ctx->mbrc.qp_ctrl.qp_delta[i]; + } hw_info->h264_inter4x4_disabled = ctx->h264_inter4x4_disabled; @@ -207,9 +262,17 @@ static int h264e_end_picture(struct rk_venc *ictx, uint32_t outputStreamSize) { struct rk_h264_encoder *ctx = (struct rk_h264_encoder *)ictx; + struct v4l2_plugin_h264_feedback *feedback = &ctx->feedback; + int i; + + for (i = 0; i < CHECK_POINTS_MAX; i++) + ctx->mbrc.qp_ctrl.word_cnt_prev[i] = feedback->cp[i]; + h264e_after_mb_rate_control(&ctx->mbrc, outputStreamSize, + feedback->rlcCount, feedback->qpSum); + rk_venc_after_pic_rc(&ctx->venc.rc, outputStreamSize); ctx->frm_in_gop++; - ctx->frm_in_gop %= 150; + ctx->frm_in_gop %= ctx->venc.rc.gop_len; ctx->slice.frame_num++; ctx->slice.frame_num %= @@ -233,10 +296,32 @@ static int h264e_update_priv(struct rk_venc *ictx, void *config, uint32_t cfglen static void h264e_apply_param(struct rk_venc *ictx) { struct rk_h264_encoder *ctx = (struct rk_h264_encoder *)ictx; + struct rk_vepu_runtime_param *param = &ictx->runtime_param; + bool reinit = false; assert(ctx); - /* todo, apply parameters to rate control */ + if (param->bitrate != 0) { + ctx->venc.rc.vb.bit_rate = param->bitrate; + reinit = true; + } + + if (param->framerate_numer != 0 && param->framerate_denom != 0) { + ctx->venc.rc.fps_num = param->framerate_numer; + ctx->venc.rc.fps_denom = param->framerate_denom; + ictx->rc.vb.time_scale = param->framerate_numer; + reinit = true; + } + + if (reinit) { + if (!ictx->rc.initiated) { + ictx->rc.qp = -1; + rk_venc_init_pic_rc(&ictx->rc, h264e_qp_tbl); + ictx->rc.initiated = true; + } else { + rk_venc_recalc_parameter(&ictx->rc); + } + } } static void h264e_get_payloads(struct rk_venc *ictx, size_t *num, uint32_t **ids, diff --git a/libv4l-rockchip_v2/libvepu/h264e/h264e.h b/libv4l-rockchip_v2/libvepu/h264e/h264e.h index d62443e04a11cd968745f153b3c99348b47a0b9b..817bb4ec3e3ea3f8e69595712693e33d869a9020 100644 --- a/libv4l-rockchip_v2/libvepu/h264e/h264e.h +++ b/libv4l-rockchip_v2/libvepu/h264e/h264e.h @@ -23,6 +23,8 @@ #include "../rk_vepu_interface.h" #include "h264e_common.h" +#include "h264e_rate_control.h" + #include "../common/rk_venc.h" #define H264E_NUM_CTRLS 1 @@ -35,6 +37,8 @@ struct rk_h264_encoder { struct rk3288_h264e_reg_params hw_info; struct v4l2_plugin_h264_feedback feedback; + struct h264_mb_rate_control mbrc; + int width; int height; diff --git a/libv4l-rockchip_v2/libvepu/h264e/h264e_rate_control.c b/libv4l-rockchip_v2/libvepu/h264e/h264e_rate_control.c new file mode 100644 index 0000000000000000000000000000000000000000..48908d8c8cf6122bd996c34544b35a5e1b1bfe9e --- /dev/null +++ b/libv4l-rockchip_v2/libvepu/h264e/h264e_rate_control.c @@ -0,0 +1,196 @@ +/* + * Copyright 2015 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <memory.h> +#include <assert.h> + +#include "h264e.h" +#include "h264e_common.h" +#include "h264e_rate_control.h" +#include "../rk_vepu_debug.h" + +#define WORD_CNT_MAX 65535 + +static void calculate_mb_model_using_linear_model( + struct h264_mb_rate_control *rc, + int32_t non_zero_target) +{ + struct rk_h264_encoder *enc = + container_of(rc, struct rk_h264_encoder, mbrc); + const int32_t sscale = 256; + struct mb_qpctrl *qc = &rc->qp_ctrl; + int32_t scaler; + int32_t i; + int32_t tmp; + int32_t mb_per_pic = MB_COUNT(enc->width) * MB_COUNT(enc->height); + int32_t chk_ptr_cnt = MIN(MB_COUNT(enc->height), CHECK_POINTS_MAX); + int32_t chk_ptr_distance = mb_per_pic / (chk_ptr_cnt + 1); + int32_t bits_per_pic = enc->venc.rc.vb.bits_per_pic; + + assert(non_zero_target < (0x7FFFFFFF / sscale)); + + if(non_zero_target > 0) { + /* scaler is non-zero coefficent count per macro-block + plus 256 */ + scaler = DIV(non_zero_target * sscale, mb_per_pic); + } else { + return; + } + + for(i = 0; i < chk_ptr_cnt; i++) { + /* tmp is non-zero coefficient count target for i-th + check point */ + tmp = (scaler * (chk_ptr_distance * (i + 1) + 1)) / sscale; + tmp = MIN(WORD_CNT_MAX, tmp / 32 + 1); + if (tmp < 0) tmp = WORD_CNT_MAX; /* Detect overflow */ + qc->word_cnt_target[i] = tmp; /* div32 for regs */ + } + + /* calculate nz count for avg. bits per frame */ + /* tmp is target non-zero coefficient count for average size pic */ + tmp = DIV(bits_per_pic * 256, rc->bits_per_non_zero_coef); + + /* ladder 'non-zero coefficent count target' - 'non-zero coefficient + actual' of check point */ + qc->word_error[0] = -tmp * 3; + qc->qp_delta[0] = -3; + qc->word_error[1] = -tmp * 2; + qc->qp_delta[1] = -2; + qc->word_error[2] = -tmp * 1; + qc->qp_delta[2] = -1; + qc->word_error[3] = tmp * 1; + qc->qp_delta[3] = 0; + qc->word_error[4] = tmp * 2; + qc->qp_delta[4] = 1; + qc->word_error[5] = tmp * 3; + qc->qp_delta[5] = 2; + qc->word_error[6] = tmp * 4; + qc->qp_delta[6] = 3; + + for(i = 0; i < CTRL_LEVELS; i++) + qc->word_error[i] = CLIP3(qc->word_error[i] / 4, -32768, 32767); +} + +static void calculate_mb_model_using_adaptive_model( + struct h264_mb_rate_control *rc, + int32_t non_zero_target) +{ + struct rk_h264_encoder *enc = + container_of(rc, struct rk_h264_encoder, mbrc); + const int32_t sscale = 256; + struct mb_qpctrl *qc = &rc->qp_ctrl; + int32_t i; + int32_t tmp; + int32_t scaler; + int32_t chk_ptr_cnt = MIN(MB_COUNT(enc->height), CHECK_POINTS_MAX); + int32_t bits_per_pic = enc->venc.rc.vb.bits_per_pic; + + assert(non_zero_target < (0x7FFFFFFF / sscale)); + + if((non_zero_target > 0) && (rc->non_zero_cnt > 0)) + scaler = DIV(non_zero_target * sscale, rc->non_zero_cnt); + else + return; + + for(i = 0; i < chk_ptr_cnt; i++) { + tmp = (int32_t)(qc->word_cnt_prev[i] * scaler) / sscale; + tmp = MIN(WORD_CNT_MAX, tmp / 32 + 1); + if (tmp < 0) tmp = WORD_CNT_MAX; /* Detect overflow */ + qc->word_cnt_target[i] = tmp; /* div32 for regs */ + } + + /* calculate nz count for avg. bits per frame */ + tmp = DIV(bits_per_pic * 256, (rc->bits_per_non_zero_coef * 3)); + + qc->word_error[0] = -tmp * 3; + qc->qp_delta[0] = -3; + qc->word_error[1] = -tmp * 2; + qc->qp_delta[1] = -2; + qc->word_error[2] = -tmp * 1; + qc->qp_delta[2] = -1; + qc->word_error[3] = tmp * 1; + qc->qp_delta[3] = 0; + qc->word_error[4] = tmp * 2; + qc->qp_delta[4] = 1; + qc->word_error[5] = tmp * 3; + qc->qp_delta[5] = 2; + qc->word_error[6] = tmp * 4; + qc->qp_delta[6] = 3; + + for(i = 0; i < CTRL_LEVELS; i++) + qc->word_error[i] = + CLIP3(qc->word_error[i] / 4, -32768, 32767); +} + +static void calculate_mb_model(struct h264_mb_rate_control *rc, + int32_t target_bits) +{ + struct rk_h264_encoder *enc = + container_of(rc, struct rk_h264_encoder, mbrc); + int32_t non_zero_target; + int32_t mb_per_pic = MB_COUNT(enc->width) * MB_COUNT(enc->height); + int32_t coeff_cnt_max = mb_per_pic * 24 * 16; + + /* Disable macroblock rate control for intra frame, + because coefficient target will be wrong */ + if(enc->frm_in_gop == 0 || rc->bits_per_non_zero_coef == 0) + return; + + /* Required zero cnt */ + non_zero_target = DIV(target_bits * 256, rc->bits_per_non_zero_coef); + non_zero_target = CLIP3(non_zero_target, 0, coeff_cnt_max); + + non_zero_target = MIN(0x7FFFFFFFU / 1024U, (uint32_t)non_zero_target); + + VPU_PLG_INF("mb rc target non-zero coefficient count %d\n", + non_zero_target); + + /* Use linear model when previous frame can't be used for prediction */ + if (enc->frm_in_gop != 0 || rc->non_zero_cnt == 0) + calculate_mb_model_using_linear_model(rc, non_zero_target); + else + calculate_mb_model_using_adaptive_model(rc, non_zero_target); +} + +void h264e_before_mb_rate_control(struct h264_mb_rate_control *rc) +{ + struct rk_h264_encoder *enc = + container_of(rc, struct rk_h264_encoder, mbrc); + + memset(rc->qp_ctrl.word_cnt_target, 0, + sizeof(rc->qp_ctrl.word_cnt_target)); + + if (enc->venc.rc.cur_frmtype == INTER_FRAME && + enc->venc.rc.pre_frmtype == INTER_FRAME && + rc->mb_rc_en) + calculate_mb_model(rc, enc->venc.rc.target_bits); +} + +void h264e_after_mb_rate_control(struct h264_mb_rate_control *rc, + uint32_t bytes, uint32_t non_zero_cnt, uint32_t qp_sum) +{ + struct rk_h264_encoder *enc = container_of(rc, struct rk_h264_encoder, + mbrc); + int32_t bits = bytes * 8; + + VPU_PLG_INF("mb rc get actual non-zero coefficient count %u\n", + non_zero_cnt); + + if (enc->frm_in_gop != 0) { + rc->bits_per_non_zero_coef = DIV(bits * 256, non_zero_cnt); + rc->non_zero_cnt = non_zero_cnt; + } +} diff --git a/libv4l-rockchip_v2/libvepu/h264e/h264e_rate_control.h b/libv4l-rockchip_v2/libvepu/h264e/h264e_rate_control.h new file mode 100644 index 0000000000000000000000000000000000000000..2b6b0ac40c78629416545faf45d91dde55e3f706 --- /dev/null +++ b/libv4l-rockchip_v2/libvepu/h264e/h264e_rate_control.h @@ -0,0 +1,48 @@ +/* + * Copyright 2015 Rockchip Electronics Co. LTD + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef H264_RATE_CONTROL_H +#define H264_RATE_CONTROL_H + +#include <stdbool.h> +#include <stdint.h> + +#include "../common/rk_venc_rate_control.h" + +#define CTRL_LEVELS 7 +#define CHECK_POINTS_MAX 10 + +struct mb_qpctrl { + int32_t word_error[CTRL_LEVELS]; /* Check point error bit */ + int32_t qp_delta[CTRL_LEVELS]; /* Check point qp difference */ + int32_t word_cnt_target[CHECK_POINTS_MAX]; /* Required bit count */ + int32_t word_cnt_prev[CHECK_POINTS_MAX]; /* Real bit count */ + int32_t chkptr_distance; + int32_t check_points; +}; + +struct h264_mb_rate_control { + bool mb_rc_en; + int32_t bits_per_non_zero_coef; + int32_t non_zero_cnt; + struct mb_qpctrl qp_ctrl; +}; + +void h264e_before_mb_rate_control(struct h264_mb_rate_control *rc); +void h264e_after_mb_rate_control(struct h264_mb_rate_control *rc, uint32_t coded_bytes, + uint32_t non_zero_cnt, uint32_t qp_sum); + +#endif diff --git a/libv4l-rockchip_v2/libvepu/vp8e/vp8e.c b/libv4l-rockchip_v2/libvepu/vp8e/vp8e.c index c0d836016baa4ad080b89244af9c46c40081d6b1..80d7a4d2f2a714ab3de97e2306d157c605e9a295 100644 --- a/libv4l-rockchip_v2/libvepu/vp8e/vp8e.c +++ b/libv4l-rockchip_v2/libvepu/vp8e/vp8e.c @@ -23,6 +23,35 @@ #include "vp8e_bitstream.h" #include "../rk_vepu_debug.h" +const int32_t vp8e_qp_tbl[2][11] = { + { 47, 57, 73, 93, 122, 155, 214, 294, 373, 506, 0x7FFFFFFF }, + { 120, 110, 100, 90, 80, 70, 60, 50, 40, 30, 20}}; + +static void vp8e_init_rc(struct rk_venc *ictx) +{ + struct rk_vp8_encoder *ctx = (struct rk_vp8_encoder *)ictx; + struct v4l2_plugin_rate_control *rc = &ictx->rc; + + rc->pic_rc_en = true; + rc->fps_num = 30; + rc->fps_denom = 1; + rc->vb.bit_rate = 1000000; + rc->vb.actual_bits = 0; + rc->vb.time_scale = rc->fps_num; + rc->vb.virt_bits_cnt = 0; + rc->vb.bucket_fullness = 0; + rc->vb.pic_time_inc = 0; + rc->gop_len = 150; + rc->qp_min = 30; + rc->qp_max = 127; + rc->mb_per_pic = MB_COUNT(ctx->width) * MB_COUNT(ctx->height); + rc->intra_qp_delta = -3; + rc->qp = -1; + rc->initiated = false; + + rk_venc_init_pic_rc(&ctx->venc.rc, vp8e_qp_tbl); +} + static int vp8e_init(struct rk_venc *ictx, struct rk_vepu_init_param *param) { @@ -51,6 +80,8 @@ static int vp8e_init(struct rk_venc *ictx, ctx->hw_info.filter_sharpness = 0; ctx->hw_info.filter_level = 26; /* 0 ~ 63 */ + vp8e_init_rc(ictx); + ctx->priv_data = (struct vp8_hw_privdata *)calloc(1, sizeof(*ctx->priv_data)); if (ctx->priv_data == NULL) { VPU_PLG_ERR("allocate private data buffer failed\n"); @@ -75,6 +106,8 @@ static int vp8e_begin_picture(struct rk_venc *ictx) { struct rk_vp8_encoder *ctx = (struct rk_vp8_encoder *)ictx; struct rk3399_vp8e_reg_params *hw_info = &ctx->hw_info; + int time_inc = 1; + enum FRAME_TYPE frmtype = INTER_FRAME; VPU_PLG_ENTER(); @@ -89,9 +122,13 @@ static int vp8e_begin_picture(struct rk_venc *ictx) if (ctx->frm_in_gop == 0) { hw_info->is_intra = 1; hw_info->filter_level = 48; + time_inc = 0; + frmtype = INTRA_FRAME; } - hw_info->qp = 80; + rk_venc_before_pic_rc(&ctx->venc.rc, time_inc, frmtype); + + hw_info->qp = ictx->rc.qp; prepare_prob(&ctx->probs); @@ -125,10 +162,12 @@ static int vp8e_end_picture(struct rk_venc *ictx, VPU_PLG_ENTER(); + rk_venc_after_pic_rc(&ctx->venc.rc, outputStreamSize); + ctx->last_frm_intra = ctx->hw_info.is_intra; ctx->frm_in_gop++; - ctx->frm_in_gop %= 150; + ctx->frm_in_gop %= ictx->rc.gop_len; ctx->frame_cnt++; @@ -152,10 +191,33 @@ static int vp8e_update_priv(struct rk_venc *ictx, void *config, uint32_t cfglen) static void vp8e_apply_param(struct rk_venc *ictx) { struct rk_vp8_encoder *ctx = (struct rk_vp8_encoder *)ictx; + struct rk_vepu_runtime_param *param = &ictx->runtime_param; + bool reinit = false; assert(ctx); - /* todo, apply parameters to rate control */ + if (param->bitrate != 0) { + ctx->venc.rc.vb.bit_rate = param->bitrate; + reinit = true; + } + + if (param->framerate_numer != 0 && param->framerate_denom != 0) { + ctx->venc.rc.fps_num = param->framerate_numer; + ctx->venc.rc.fps_denom = param->framerate_denom; + ictx->rc.vb.time_scale = param->framerate_numer; + + reinit = true; + } + + if (reinit) { + if (!ictx->rc.initiated) { + ictx->rc.qp = -1; + rk_venc_init_pic_rc(&ictx->rc, vp8e_qp_tbl); + ictx->rc.initiated = true; + } else { + rk_venc_recalc_parameter(&ictx->rc); + } + } } static void vp8e_get_payloads(struct rk_venc *ictx, size_t *num, uint32_t **ids,