aboutsummaryrefslogtreecommitdiff
path: root/third_party/libaom/source/libaom/av1/encoder/ethread.c
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/libaom/source/libaom/av1/encoder/ethread.c')
-rw-r--r--third_party/libaom/source/libaom/av1/encoder/ethread.c528
1 files changed, 465 insertions, 63 deletions
diff --git a/third_party/libaom/source/libaom/av1/encoder/ethread.c b/third_party/libaom/source/libaom/av1/encoder/ethread.c
index 3735ca3c8b..d274b6b84f 100644
--- a/third_party/libaom/source/libaom/av1/encoder/ethread.c
+++ b/third_party/libaom/source/libaom/av1/encoder/ethread.c
@@ -11,9 +11,11 @@
#include "av1/common/warped_motion.h"
+#include "av1/encoder/bitstream.h"
#include "av1/encoder/encodeframe.h"
#include "av1/encoder/encoder.h"
#include "av1/encoder/encoder_alloc.h"
+#include "av1/encoder/encodeframe_utils.h"
#include "av1/encoder/ethread.h"
#if !CONFIG_REALTIME_ONLY
#include "av1/encoder/firstpass.h"
@@ -52,7 +54,7 @@ static AOM_INLINE void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) {
static AOM_INLINE void update_delta_lf_for_row_mt(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
MACROBLOCKD *xd = &cpi->td.mb.e_mbd;
- const int mib_size = cm->seq_params.mib_size;
+ const int mib_size = cm->seq_params->mib_size;
const int frame_lf_count =
av1_num_planes(cm) > 1 ? FRAME_LF_COUNT : FRAME_LF_COUNT - 2;
for (int row = 0; row < cm->tiles.rows; row++) {
@@ -68,7 +70,8 @@ static AOM_INLINE void update_delta_lf_for_row_mt(AV1_COMP *cpi) {
const int idx_str = cm->mi_params.mi_stride * mi_row + mi_col;
MB_MODE_INFO **mi = cm->mi_params.mi_grid_base + idx_str;
MB_MODE_INFO *mbmi = mi[0];
- if (mbmi->skip_txfm == 1 && (mbmi->bsize == cm->seq_params.sb_size)) {
+ if (mbmi->skip_txfm == 1 &&
+ (mbmi->bsize == cm->seq_params->sb_size)) {
for (int lf_id = 0; lf_id < frame_lf_count; ++lf_id)
mbmi->delta_lf[lf_id] = xd->delta_lf[lf_id];
mbmi->delta_lf_from_base = xd->delta_lf_from_base;
@@ -362,7 +365,7 @@ static AOM_INLINE void switch_tile_and_get_next_job(
*cur_tile_id = tile_id;
const int unit_height = mi_size_high[fp_block_size];
get_next_job(&tile_data[tile_id], current_mi_row,
- is_firstpass ? unit_height : cm->seq_params.mib_size);
+ is_firstpass ? unit_height : cm->seq_params->mib_size);
}
}
@@ -441,13 +444,20 @@ static int enc_row_mt_worker_hook(void *arg1, void *unused) {
const BLOCK_SIZE fp_block_size = cpi->fp_block_size;
int end_of_frame = 0;
+
+ // When master thread does not have a valid job to process, xd->tile_ctx
+ // is not set and it contains NULL pointer. This can result in NULL pointer
+ // access violation if accessed beyond the encode stage. Hence, updating
+ // thread_data->td->mb.e_mbd.tile_ctx is initialized with common frame
+ // context to avoid NULL pointer access in subsequent stages.
+ thread_data->td->mb.e_mbd.tile_ctx = cm->fc;
while (1) {
int current_mi_row = -1;
#if CONFIG_MULTITHREAD
pthread_mutex_lock(enc_row_mt_mutex_);
#endif
if (!get_next_job(&cpi->tile_data[cur_tile_id], &current_mi_row,
- cm->seq_params.mib_size)) {
+ cm->seq_params->mib_size)) {
// No jobs are available for the current tile. Query for the status of
// other tiles and get the next job if available
switch_tile_and_get_next_job(cm, cpi->tile_data, &cur_tile_id,
@@ -470,6 +480,7 @@ static int enc_row_mt_worker_hook(void *arg1, void *unused) {
td->mb.e_mbd.tile_ctx = td->tctx;
td->mb.tile_pb_ctx = &this_tile->tctx;
+ td->abs_sum_level = 0;
if (this_tile->allow_update_cdf) {
td->mb.row_ctx = this_tile->row_ctx;
@@ -482,7 +493,7 @@ static int enc_row_mt_worker_hook(void *arg1, void *unused) {
av1_init_above_context(&cm->above_contexts, av1_num_planes(cm), tile_row,
&td->mb.e_mbd);
- cfl_init(&td->mb.e_mbd.cfl, &cm->seq_params);
+ cfl_init(&td->mb.e_mbd.cfl, cm->seq_params);
if (td->mb.txfm_search_info.txb_rd_records != NULL) {
av1_crc32c_calculator_init(
&td->mb.txfm_search_info.txb_rd_records->mb_rd_record.crc_calculator);
@@ -492,6 +503,7 @@ static int enc_row_mt_worker_hook(void *arg1, void *unused) {
#if CONFIG_MULTITHREAD
pthread_mutex_lock(enc_row_mt_mutex_);
#endif
+ this_tile->abs_sum_level += td->abs_sum_level;
row_mt_sync->num_threads_working--;
#if CONFIG_MULTITHREAD
pthread_mutex_unlock(enc_row_mt_mutex_);
@@ -526,16 +538,12 @@ static int enc_worker_hook(void *arg1, void *unused) {
return 1;
}
-void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers) {
+#if CONFIG_MULTITHREAD
+void av1_init_mt_sync(AV1_COMP *cpi, int is_first_pass) {
AV1_COMMON *const cm = &cpi->common;
- const AVxWorkerInterface *const winterface = aom_get_worker_interface();
MultiThreadInfo *const mt_info = &cpi->mt_info;
- assert(mt_info->workers != NULL);
- assert(mt_info->tile_thr_data != NULL);
-
-#if CONFIG_MULTITHREAD
- if (cpi->oxcf.row_mt == 1) {
+ if (is_first_pass || cpi->oxcf.row_mt == 1) {
AV1EncRowMultiThreadInfo *enc_row_mt = &mt_info->enc_row_mt;
if (enc_row_mt->mutex_ == NULL) {
CHECK_MEM_ERROR(cm, enc_row_mt->mutex_,
@@ -543,24 +551,39 @@ void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers) {
if (enc_row_mt->mutex_) pthread_mutex_init(enc_row_mt->mutex_, NULL);
}
}
- AV1GlobalMotionSync *gm_sync = &mt_info->gm_sync;
- if (gm_sync->mutex_ == NULL) {
- CHECK_MEM_ERROR(cm, gm_sync->mutex_,
- aom_malloc(sizeof(*(gm_sync->mutex_))));
- if (gm_sync->mutex_) pthread_mutex_init(gm_sync->mutex_, NULL);
- }
- AV1TemporalFilterSync *tf_sync = &mt_info->tf_sync;
- if (tf_sync->mutex_ == NULL) {
- CHECK_MEM_ERROR(cm, tf_sync->mutex_, aom_malloc(sizeof(*tf_sync->mutex_)));
- if (tf_sync->mutex_) pthread_mutex_init(tf_sync->mutex_, NULL);
- }
- AV1CdefSync *cdef_sync = &mt_info->cdef_sync;
- if (cdef_sync->mutex_ == NULL) {
- CHECK_MEM_ERROR(cm, cdef_sync->mutex_,
- aom_malloc(sizeof(*(cdef_sync->mutex_))));
- if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL);
+
+ if (!is_first_pass) {
+ AV1GlobalMotionSync *gm_sync = &mt_info->gm_sync;
+ if (gm_sync->mutex_ == NULL) {
+ CHECK_MEM_ERROR(cm, gm_sync->mutex_,
+ aom_malloc(sizeof(*(gm_sync->mutex_))));
+ if (gm_sync->mutex_) pthread_mutex_init(gm_sync->mutex_, NULL);
+ }
+#if !CONFIG_REALTIME_ONLY
+ AV1TemporalFilterSync *tf_sync = &mt_info->tf_sync;
+ if (tf_sync->mutex_ == NULL) {
+ CHECK_MEM_ERROR(cm, tf_sync->mutex_,
+ aom_malloc(sizeof(*tf_sync->mutex_)));
+ if (tf_sync->mutex_) pthread_mutex_init(tf_sync->mutex_, NULL);
+ }
+#endif // !CONFIG_REALTIME_ONLY
+ AV1CdefSync *cdef_sync = &mt_info->cdef_sync;
+ if (cdef_sync->mutex_ == NULL) {
+ CHECK_MEM_ERROR(cm, cdef_sync->mutex_,
+ aom_malloc(sizeof(*(cdef_sync->mutex_))));
+ if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL);
+ }
}
-#endif
+}
+#endif // CONFIG_MULTITHREAD
+
+void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers) {
+ AV1_COMMON *const cm = &cpi->common;
+ const AVxWorkerInterface *const winterface = aom_get_worker_interface();
+ MultiThreadInfo *const mt_info = &cpi->mt_info;
+
+ assert(mt_info->workers != NULL);
+ assert(mt_info->tile_thr_data != NULL);
for (int i = num_workers - 1; i >= 0; i--) {
AVxWorker *const worker = &mt_info->workers[i];
@@ -576,7 +599,7 @@ void av1_create_second_pass_workers(AV1_COMP *cpi, int num_workers) {
// Create threads
if (!winterface->reset(worker))
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Tile encoder thread creation failed");
} else {
// Main thread acts as a worker and uses the thread data in cpi.
@@ -625,10 +648,6 @@ static AOM_INLINE void create_enc_workers(AV1_COMP *cpi, int num_workers) {
alloc_compound_type_rd_buffers(cm, &thread_data->td->comp_rd_buffer);
- CHECK_MEM_ERROR(
- cm, thread_data->td->tmp_conv_dst,
- aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
- sizeof(*thread_data->td->tmp_conv_dst)));
for (int j = 0; j < 2; ++j) {
CHECK_MEM_ERROR(
cm, thread_data->td->tmp_pred_bufs[j],
@@ -636,9 +655,14 @@ static AOM_INLINE void create_enc_workers(AV1_COMP *cpi, int num_workers) {
sizeof(*thread_data->td->tmp_pred_bufs[j])));
}
+ const int plane_types = PLANE_TYPES >> cm->seq_params->monochrome;
+ CHECK_MEM_ERROR(cm, thread_data->td->pixel_gradient_info,
+ aom_malloc(sizeof(*thread_data->td->pixel_gradient_info) *
+ plane_types * MAX_SB_SQUARE));
+
if (cpi->sf.part_sf.partition_search_type == VAR_BASED_PARTITION) {
const int num_64x64_blocks =
- (cm->seq_params.sb_size == BLOCK_64X64) ? 1 : 4;
+ (cm->seq_params->sb_size == BLOCK_64X64) ? 1 : 4;
CHECK_MEM_ERROR(
cm, thread_data->td->vt64x64,
aom_malloc(sizeof(*thread_data->td->vt64x64) * num_64x64_blocks));
@@ -680,6 +704,10 @@ void av1_create_workers(AV1_COMP *cpi, int num_workers) {
// Set up shared coeff buffers.
av1_setup_shared_coeff_buffer(cm, &thread_data->td->shared_coeff_buf);
+ CHECK_MEM_ERROR(
+ cm, thread_data->td->tmp_conv_dst,
+ aom_memalign(32, MAX_SB_SIZE * MAX_SB_SIZE *
+ sizeof(*thread_data->td->tmp_conv_dst)));
}
++mt_info->num_workers;
}
@@ -724,7 +752,7 @@ static AOM_INLINE void fp_create_enc_workers(AV1_COMP *cpi, int num_workers) {
if (create_workers) {
// Create threads
if (!winterface->reset(worker))
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Tile encoder thread creation failed");
}
} else {
@@ -764,7 +792,7 @@ static AOM_INLINE void sync_enc_workers(MultiThreadInfo *const mt_info,
}
if (had_error)
- aom_internal_error(&cm->error, AOM_CODEC_ERROR,
+ aom_internal_error(cm->error, AOM_CODEC_ERROR,
"Failed to encode tile data");
}
@@ -780,14 +808,15 @@ static AOM_INLINE void accumulate_counters_enc_workers(AV1_COMP *cpi,
!frame_is_intra_only(&cpi->common))
av1_accumulate_cyclic_refresh_counters(cpi->cyclic_refresh,
&thread_data->td->mb);
- if (thread_data->td->mb.txfm_search_info.txb_rd_records) {
- aom_free(thread_data->td->mb.txfm_search_info.txb_rd_records);
- thread_data->td->mb.txfm_search_info.txb_rd_records = NULL;
- }
- if (thread_data->td != &cpi->td &&
- cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
- aom_free(thread_data->td->mb.mv_costs);
+ if (thread_data->td != &cpi->td) {
+ if (cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
+ aom_free(thread_data->td->mb.mv_costs);
+ }
+ if (cpi->oxcf.cost_upd_freq.dv < COST_UPD_OFF) {
+ aom_free(thread_data->td->mb.dv_costs);
+ }
}
+ av1_dealloc_mb_data(&cpi->common, &thread_data->td->mb);
// Accumulate counters.
if (i > 0) {
@@ -822,6 +851,7 @@ static AOM_INLINE void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
thread_data->td->intrabc_used = 0;
thread_data->td->deltaq_used = 0;
+ thread_data->td->abs_sum_level = 0;
// Before encoding a frame, copy the thread data from cpi.
if (thread_data->td != &cpi->td) {
@@ -846,15 +876,19 @@ static AOM_INLINE void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
memcpy(thread_data->td->mb.mv_costs, cpi->td.mb.mv_costs,
sizeof(MvCosts));
}
+ if (cpi->oxcf.cost_upd_freq.dv < COST_UPD_OFF) {
+ CHECK_MEM_ERROR(cm, thread_data->td->mb.dv_costs,
+ (IntraBCMVCosts *)aom_malloc(sizeof(IntraBCMVCosts)));
+ memcpy(thread_data->td->mb.dv_costs, cpi->td.mb.dv_costs,
+ sizeof(IntraBCMVCosts));
+ }
}
+ av1_alloc_mb_data(cm, &thread_data->td->mb,
+ cpi->sf.rt_sf.use_nonrd_pick_mode);
+
// Reset cyclic refresh counters.
av1_init_cyclic_refresh_counters(&thread_data->td->mb);
- if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
- CHECK_MEM_ERROR(cm, thread_data->td->mb.txfm_search_info.txb_rd_records,
- (TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords)));
- }
-
if (thread_data->td->counts != &cpi->counts) {
memcpy(thread_data->td->counts, &cpi->counts, sizeof(cpi->counts));
}
@@ -867,6 +901,8 @@ static AOM_INLINE void prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
thread_data->td->mb.tmp_pred_bufs[j] =
thread_data->td->tmp_pred_bufs[j];
}
+ thread_data->td->mb.pixel_gradient_info =
+ thread_data->td->pixel_gradient_info;
thread_data->td->mb.e_mbd.tmp_conv_dst = thread_data->td->mb.tmp_conv_dst;
for (int j = 0; j < 2; ++j) {
@@ -904,11 +940,16 @@ static AOM_INLINE void fp_prepare_enc_workers(AV1_COMP *cpi, AVxWorkerHook hook,
memcpy(thread_data->td->mb.mv_costs, cpi->td.mb.mv_costs,
sizeof(MvCosts));
}
+ if (cpi->oxcf.cost_upd_freq.dv < COST_UPD_OFF) {
+ CHECK_MEM_ERROR(cm, thread_data->td->mb.dv_costs,
+ (IntraBCMVCosts *)aom_malloc(sizeof(IntraBCMVCosts)));
+ memcpy(thread_data->td->mb.dv_costs, cpi->td.mb.dv_costs,
+ sizeof(IntraBCMVCosts));
+ }
}
- if (!cpi->sf.rt_sf.use_nonrd_pick_mode) {
- CHECK_MEM_ERROR(cm, thread_data->td->mb.txfm_search_info.txb_rd_records,
- (TxbRdRecords *)aom_malloc(sizeof(TxbRdRecords)));
- }
+
+ av1_alloc_mb_data(cm, &thread_data->td->mb,
+ cpi->sf.rt_sf.use_nonrd_pick_mode);
}
}
#endif
@@ -1191,13 +1232,15 @@ void av1_fp_encode_tiles_row_mt(AV1_COMP *cpi) {
sync_enc_workers(&cpi->mt_info, cm, num_workers);
for (int i = num_workers - 1; i >= 0; i--) {
EncWorkerData *const thread_data = &cpi->mt_info.tile_thr_data[i];
- if (thread_data->td != &cpi->td &&
- cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
- aom_free(thread_data->td->mb.mv_costs);
- }
- if (thread_data->td->mb.txfm_search_info.txb_rd_records) {
- aom_free(thread_data->td->mb.txfm_search_info.txb_rd_records);
+ if (thread_data->td != &cpi->td) {
+ if (cpi->oxcf.cost_upd_freq.mv < COST_UPD_OFF) {
+ aom_free(thread_data->td->mb.mv_costs);
+ }
+ if (cpi->oxcf.cost_upd_freq.dv < COST_UPD_OFF) {
+ aom_free(thread_data->td->mb.dv_costs);
+ }
}
+ av1_dealloc_mb_data(cm, &thread_data->td->mb);
}
}
@@ -1277,11 +1320,15 @@ static int tpl_worker_hook(void *arg1, void *unused) {
AV1_COMMON *cm = &cpi->common;
MACROBLOCK *x = &thread_data->td->mb;
MACROBLOCKD *xd = &x->e_mbd;
+ TplTxfmStats *tpl_txfm_stats = &thread_data->td->tpl_txfm_stats;
CommonModeInfoParams *mi_params = &cm->mi_params;
- BLOCK_SIZE bsize = convert_length_to_bsize(cpi->tpl_data.tpl_bsize_1d);
+ BLOCK_SIZE bsize = convert_length_to_bsize(cpi->ppi->tpl_data.tpl_bsize_1d);
TX_SIZE tx_size = max_txsize_lookup[bsize];
int mi_height = mi_size_high[bsize];
- int num_active_workers = cpi->tpl_data.tpl_mt_sync.num_threads_working;
+ int num_active_workers = cpi->ppi->tpl_data.tpl_mt_sync.num_threads_working;
+
+ av1_init_tpl_txfm_stats(tpl_txfm_stats);
+
for (int mi_row = thread_data->start * mi_height; mi_row < mi_params->mi_rows;
mi_row += num_active_workers * mi_height) {
// Motion estimation row boundary
@@ -1290,7 +1337,7 @@ static int tpl_worker_hook(void *arg1, void *unused) {
xd->mb_to_top_edge = -GET_MV_SUBPEL(mi_row * MI_SIZE);
xd->mb_to_bottom_edge =
GET_MV_SUBPEL((mi_params->mi_rows - mi_height - mi_row) * MI_SIZE);
- av1_mc_flow_dispenser_row(cpi, x, mi_row, bsize, tx_size);
+ av1_mc_flow_dispenser_row(cpi, tpl_txfm_stats, x, mi_row, bsize, tx_size);
}
return 1;
}
@@ -1370,6 +1417,24 @@ static AOM_INLINE void prepare_tpl_workers(AV1_COMP *cpi, AVxWorkerHook hook,
// OBMC buffers are used only to init MS params and remain unused when
// called from tpl, hence set the buffers to defaults.
av1_init_obmc_buffer(&thread_data->td->mb.obmc_buffer);
+ thread_data->td->mb.tmp_conv_dst = thread_data->td->tmp_conv_dst;
+ thread_data->td->mb.e_mbd.tmp_conv_dst = thread_data->td->mb.tmp_conv_dst;
+ }
+ }
+}
+
+// Accumulate transform stats after tpl.
+static void tpl_accumulate_txfm_stats(ThreadData *main_td,
+ const MultiThreadInfo *mt_info,
+ int num_workers) {
+ TplTxfmStats *accumulated_stats = &main_td->tpl_txfm_stats;
+ for (int i = num_workers - 1; i >= 0; i--) {
+ AVxWorker *const worker = &mt_info->workers[i];
+ EncWorkerData *const thread_data = (EncWorkerData *)worker->data1;
+ ThreadData *td = thread_data->td;
+ if (td != main_td) {
+ const TplTxfmStats *tpl_txfm_stats = &td->tpl_txfm_stats;
+ av1_accumulate_tpl_txfm_stats(tpl_txfm_stats, accumulated_stats);
}
}
}
@@ -1379,7 +1444,7 @@ void av1_mc_flow_dispenser_mt(AV1_COMP *cpi) {
AV1_COMMON *cm = &cpi->common;
CommonModeInfoParams *mi_params = &cm->mi_params;
MultiThreadInfo *mt_info = &cpi->mt_info;
- TplParams *tpl_data = &cpi->tpl_data;
+ TplParams *tpl_data = &cpi->ppi->tpl_data;
AV1TplRowMultiThreadSync *tpl_sync = &tpl_data->tpl_mt_sync;
int mb_rows = mi_params->mb_rows;
int num_workers =
@@ -1398,6 +1463,7 @@ void av1_mc_flow_dispenser_mt(AV1_COMP *cpi) {
prepare_tpl_workers(cpi, tpl_worker_hook, num_workers);
launch_workers(&cpi->mt_info, num_workers);
sync_enc_workers(&cpi->mt_info, cm, num_workers);
+ tpl_accumulate_txfm_stats(&cpi->td, &cpi->mt_info, num_workers);
}
// Deallocate memory for temporal filter multi-thread synchronization.
@@ -1752,6 +1818,331 @@ void av1_global_motion_estimation_mt(AV1_COMP *cpi) {
}
#endif // !CONFIG_REALTIME_ONLY
+// Compare and order tiles based on absolute sum of tx coeffs.
+static int compare_tile_order(const void *a, const void *b) {
+ const PackBSTileOrder *const tile_a = (const PackBSTileOrder *)a;
+ const PackBSTileOrder *const tile_b = (const PackBSTileOrder *)b;
+
+ if (tile_a->abs_sum_level > tile_b->abs_sum_level)
+ return -1;
+ else if (tile_a->abs_sum_level == tile_b->abs_sum_level)
+ return (tile_a->tile_idx > tile_b->tile_idx ? 1 : -1);
+ else
+ return 1;
+}
+
+// Get next tile index to be processed for pack bitstream
+static AOM_INLINE int get_next_pack_bs_tile_idx(
+ AV1EncPackBSSync *const pack_bs_sync, const int num_tiles) {
+ assert(pack_bs_sync->next_job_idx <= num_tiles);
+ if (pack_bs_sync->next_job_idx == num_tiles) return -1;
+
+ return pack_bs_sync->pack_bs_tile_order[pack_bs_sync->next_job_idx++]
+ .tile_idx;
+}
+
+// Calculates bitstream chunk size based on total buffer size and tile or tile
+// group size.
+static AOM_INLINE size_t get_bs_chunk_size(int tg_or_tile_size,
+ const int frame_or_tg_size,
+ size_t *remain_buf_size,
+ size_t max_buf_size,
+ int is_last_chunk) {
+ size_t this_chunk_size;
+ assert(*remain_buf_size > 0);
+ if (is_last_chunk) {
+ this_chunk_size = *remain_buf_size;
+ *remain_buf_size = 0;
+ } else {
+ const uint64_t size_scale = (uint64_t)max_buf_size * tg_or_tile_size;
+ this_chunk_size = (size_t)(size_scale / frame_or_tg_size);
+ *remain_buf_size -= this_chunk_size;
+ assert(*remain_buf_size > 0);
+ }
+ assert(this_chunk_size > 0);
+ return this_chunk_size;
+}
+
+// Initializes params required for pack bitstream tile.
+static void init_tile_pack_bs_params(AV1_COMP *const cpi, uint8_t *const dst,
+ struct aom_write_bit_buffer *saved_wb,
+ PackBSParams *const pack_bs_params_arr,
+ uint8_t obu_extn_header) {
+ MACROBLOCKD *const xd = &cpi->td.mb.e_mbd;
+ AV1_COMMON *const cm = &cpi->common;
+ const CommonTileParams *const tiles = &cm->tiles;
+ const int num_tiles = tiles->cols * tiles->rows;
+ // Fixed size tile groups for the moment
+ const int num_tg_hdrs = cpi->num_tg;
+ // Tile group size in terms of number of tiles.
+ const int tg_size_in_tiles = (num_tiles + num_tg_hdrs - 1) / num_tg_hdrs;
+ uint8_t *tile_dst = dst;
+ uint8_t *tile_data_curr = dst;
+ // Max tile group count can not be more than MAX_TILES.
+ int tg_size_mi[MAX_TILES] = { 0 }; // Size of tile group in mi units
+ int tile_idx;
+ int tg_idx = 0;
+ int tile_count_in_tg = 0;
+ int new_tg = 1;
+
+ // Populate pack bitstream params of all tiles.
+ for (tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
+ const TileInfo *const tile_info = &cpi->tile_data[tile_idx].tile_info;
+ PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx];
+ // Calculate tile size in mi units.
+ const int tile_size_mi = (tile_info->mi_col_end - tile_info->mi_col_start) *
+ (tile_info->mi_row_end - tile_info->mi_row_start);
+ int is_last_tile_in_tg = 0;
+ tile_count_in_tg++;
+ if (tile_count_in_tg == tg_size_in_tiles || tile_idx == (num_tiles - 1))
+ is_last_tile_in_tg = 1;
+
+ // Populate pack bitstream params of this tile.
+ pack_bs_params->curr_tg_hdr_size = 0;
+ pack_bs_params->obu_extn_header = obu_extn_header;
+ pack_bs_params->saved_wb = saved_wb;
+ pack_bs_params->obu_header_size = 0;
+ pack_bs_params->is_last_tile_in_tg = is_last_tile_in_tg;
+ pack_bs_params->new_tg = new_tg;
+ pack_bs_params->tile_col = tile_info->tile_col;
+ pack_bs_params->tile_row = tile_info->tile_row;
+ pack_bs_params->tile_size_mi = tile_size_mi;
+ tg_size_mi[tg_idx] += tile_size_mi;
+
+ if (new_tg) new_tg = 0;
+ if (is_last_tile_in_tg) {
+ tile_count_in_tg = 0;
+ new_tg = 1;
+ tg_idx++;
+ }
+ }
+
+ assert(cpi->available_bs_size > 0);
+ size_t tg_buf_size[MAX_TILES] = { 0 };
+ size_t max_buf_size = cpi->available_bs_size;
+ size_t remain_buf_size = max_buf_size;
+ const int frame_size_mi = cm->mi_params.mi_rows * cm->mi_params.mi_cols;
+
+ tile_idx = 0;
+ // Prepare obu, tile group and frame header of each tile group.
+ for (tg_idx = 0; tg_idx < cpi->num_tg; tg_idx++) {
+ PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx];
+ int is_last_tg = tg_idx == cpi->num_tg - 1;
+ // Prorate bitstream buffer size based on tile group size and available
+ // buffer size. This buffer will be used to store headers and tile data.
+ tg_buf_size[tg_idx] =
+ get_bs_chunk_size(tg_size_mi[tg_idx], frame_size_mi, &remain_buf_size,
+ max_buf_size, is_last_tg);
+
+ pack_bs_params->dst = tile_dst;
+ pack_bs_params->tile_data_curr = tile_dst;
+
+ // Write obu, tile group and frame header at first tile in the tile
+ // group.
+ av1_write_obu_tg_tile_headers(cpi, xd, pack_bs_params, tile_idx);
+ tile_dst += tg_buf_size[tg_idx];
+
+ // Exclude headers from tile group buffer size.
+ tg_buf_size[tg_idx] -= pack_bs_params->curr_tg_hdr_size;
+ tile_idx += tg_size_in_tiles;
+ }
+
+ tg_idx = 0;
+ // Calculate bitstream buffer size of each tile in the tile group.
+ for (tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
+ PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx];
+
+ if (pack_bs_params->new_tg) {
+ max_buf_size = tg_buf_size[tg_idx];
+ remain_buf_size = max_buf_size;
+ }
+
+ // Prorate bitstream buffer size of this tile based on tile size and
+ // available buffer size. For this proration, header size is not accounted.
+ const size_t tile_buf_size = get_bs_chunk_size(
+ pack_bs_params->tile_size_mi, tg_size_mi[tg_idx], &remain_buf_size,
+ max_buf_size, pack_bs_params->is_last_tile_in_tg);
+ pack_bs_params->tile_buf_size = tile_buf_size;
+
+ // Update base address of bitstream buffer for tile and tile group.
+ if (pack_bs_params->new_tg) {
+ tile_dst = pack_bs_params->dst;
+ tile_data_curr = pack_bs_params->tile_data_curr;
+ // Account header size in first tile of a tile group.
+ pack_bs_params->tile_buf_size += pack_bs_params->curr_tg_hdr_size;
+ } else {
+ pack_bs_params->dst = tile_dst;
+ pack_bs_params->tile_data_curr = tile_data_curr;
+ }
+
+ if (pack_bs_params->is_last_tile_in_tg) tg_idx++;
+ tile_dst += pack_bs_params->tile_buf_size;
+ }
+}
+
+// Worker hook function of pack bitsteam multithreading.
+static int pack_bs_worker_hook(void *arg1, void *arg2) {
+ EncWorkerData *const thread_data = (EncWorkerData *)arg1;
+ PackBSParams *const pack_bs_params = (PackBSParams *)arg2;
+ AV1_COMP *const cpi = thread_data->cpi;
+ AV1_COMMON *const cm = &cpi->common;
+ AV1EncPackBSSync *const pack_bs_sync = &cpi->mt_info.pack_bs_sync;
+ const CommonTileParams *const tiles = &cm->tiles;
+ const int num_tiles = tiles->cols * tiles->rows;
+
+ while (1) {
+#if CONFIG_MULTITHREAD
+ pthread_mutex_lock(pack_bs_sync->mutex_);
+#endif
+ const int tile_idx = get_next_pack_bs_tile_idx(pack_bs_sync, num_tiles);
+#if CONFIG_MULTITHREAD
+ pthread_mutex_unlock(pack_bs_sync->mutex_);
+#endif
+ if (tile_idx == -1) break;
+ TileDataEnc *this_tile = &cpi->tile_data[tile_idx];
+ thread_data->td->mb.e_mbd.tile_ctx = &this_tile->tctx;
+
+ av1_pack_tile_info(cpi, thread_data->td, &pack_bs_params[tile_idx]);
+ }
+
+ return 1;
+}
+
+// Prepares thread data and workers of pack bitsteam multithreading.
+static void prepare_pack_bs_workers(AV1_COMP *const cpi,
+ PackBSParams *const pack_bs_params,
+ AVxWorkerHook hook, const int num_workers) {
+ MultiThreadInfo *const mt_info = &cpi->mt_info;
+ for (int i = num_workers - 1; i >= 0; i--) {
+ AVxWorker *worker = &mt_info->workers[i];
+ EncWorkerData *const thread_data = &mt_info->tile_thr_data[i];
+ if (i == 0) thread_data->td = &cpi->td;
+
+ if (thread_data->td != &cpi->td) thread_data->td->mb = cpi->td.mb;
+
+ thread_data->cpi = cpi;
+ thread_data->start = i;
+ thread_data->thread_id = i;
+ av1_reset_pack_bs_thread_data(thread_data->td);
+
+ worker->hook = hook;
+ worker->data1 = thread_data;
+ worker->data2 = pack_bs_params;
+ }
+
+ AV1_COMMON *const cm = &cpi->common;
+ AV1EncPackBSSync *const pack_bs_sync = &mt_info->pack_bs_sync;
+ const uint16_t num_tiles = cm->tiles.rows * cm->tiles.cols;
+#if CONFIG_MULTITHREAD
+ if (pack_bs_sync->mutex_ == NULL) {
+ CHECK_MEM_ERROR(cm, pack_bs_sync->mutex_,
+ aom_malloc(sizeof(*pack_bs_sync->mutex_)));
+ if (pack_bs_sync->mutex_) pthread_mutex_init(pack_bs_sync->mutex_, NULL);
+ }
+#endif
+ pack_bs_sync->next_job_idx = 0;
+
+ PackBSTileOrder *const pack_bs_tile_order = pack_bs_sync->pack_bs_tile_order;
+ // Reset tile order data of pack bitstream
+ av1_zero_array(pack_bs_tile_order, num_tiles);
+
+ // Populate pack bitstream tile order structure
+ for (uint16_t tile_idx = 0; tile_idx < num_tiles; tile_idx++) {
+ pack_bs_tile_order[tile_idx].abs_sum_level =
+ cpi->tile_data[tile_idx].abs_sum_level;
+ pack_bs_tile_order[tile_idx].tile_idx = tile_idx;
+ }
+
+ // Sort tiles in descending order based on tile area.
+ qsort(pack_bs_tile_order, num_tiles, sizeof(*pack_bs_tile_order),
+ compare_tile_order);
+}
+
+// Accumulates data after pack bitsteam processing.
+static void accumulate_pack_bs_data(
+ AV1_COMP *const cpi, const PackBSParams *const pack_bs_params_arr,
+ uint8_t *const dst, uint32_t *total_size, const FrameHeaderInfo *fh_info,
+ int *const largest_tile_id, unsigned int *max_tile_size,
+ uint32_t *const obu_header_size, uint8_t **tile_data_start,
+ const int num_workers) {
+ const AV1_COMMON *const cm = &cpi->common;
+ const CommonTileParams *const tiles = &cm->tiles;
+ const int tile_count = tiles->cols * tiles->rows;
+ // Fixed size tile groups for the moment
+ size_t curr_tg_data_size = 0;
+ int is_first_tg = 1;
+ uint8_t *curr_tg_start = dst;
+ size_t src_offset = 0;
+ size_t dst_offset = 0;
+
+ for (int tile_idx = 0; tile_idx < tile_count; tile_idx++) {
+ // PackBSParams stores all parameters required to pack tile and header
+ // info.
+ const PackBSParams *const pack_bs_params = &pack_bs_params_arr[tile_idx];
+ uint32_t tile_size = 0;
+
+ if (pack_bs_params->new_tg) {
+ curr_tg_start = dst + *total_size;
+ curr_tg_data_size = pack_bs_params->curr_tg_hdr_size;
+ *tile_data_start += pack_bs_params->curr_tg_hdr_size;
+ *obu_header_size = pack_bs_params->obu_header_size;
+ }
+ curr_tg_data_size +=
+ pack_bs_params->buf.size + (pack_bs_params->is_last_tile_in_tg ? 0 : 4);
+
+ if (pack_bs_params->buf.size > *max_tile_size) {
+ *largest_tile_id = tile_idx;
+ *max_tile_size = (unsigned int)pack_bs_params->buf.size;
+ }
+ tile_size +=
+ (uint32_t)pack_bs_params->buf.size + *pack_bs_params->total_size;
+
+ // Pack all the chunks of tile bitstreams together
+ if (tile_idx != 0) memmove(dst + dst_offset, dst + src_offset, tile_size);
+
+ if (pack_bs_params->is_last_tile_in_tg)
+ av1_write_last_tile_info(
+ cpi, fh_info, pack_bs_params->saved_wb, &curr_tg_data_size,
+ curr_tg_start, &tile_size, tile_data_start, largest_tile_id,
+ &is_first_tg, *obu_header_size, pack_bs_params->obu_extn_header);
+ src_offset += pack_bs_params->tile_buf_size;
+ dst_offset += tile_size;
+ *total_size += tile_size;
+ }
+
+ // Accumulate thread data
+ MultiThreadInfo *const mt_info = &cpi->mt_info;
+ for (int idx = num_workers - 1; idx >= 0; idx--) {
+ ThreadData const *td = mt_info->tile_thr_data[idx].td;
+ av1_accumulate_pack_bs_thread_data(cpi, td);
+ }
+}
+
+void av1_write_tile_obu_mt(
+ AV1_COMP *const cpi, uint8_t *const dst, uint32_t *total_size,
+ struct aom_write_bit_buffer *saved_wb, uint8_t obu_extn_header,
+ const FrameHeaderInfo *fh_info, int *const largest_tile_id,
+ unsigned int *max_tile_size, uint32_t *const obu_header_size,
+ uint8_t **tile_data_start) {
+ MultiThreadInfo *const mt_info = &cpi->mt_info;
+ const int num_workers = mt_info->num_mod_workers[MOD_PACK_BS];
+
+ PackBSParams pack_bs_params[MAX_TILES];
+ uint32_t tile_size[MAX_TILES] = { 0 };
+
+ for (int tile_idx = 0; tile_idx < MAX_TILES; tile_idx++)
+ pack_bs_params[tile_idx].total_size = &tile_size[tile_idx];
+
+ init_tile_pack_bs_params(cpi, dst, saved_wb, pack_bs_params, obu_extn_header);
+ prepare_pack_bs_workers(cpi, pack_bs_params, pack_bs_worker_hook,
+ num_workers);
+ launch_workers(mt_info, num_workers);
+ sync_enc_workers(mt_info, &cpi->common, num_workers);
+ accumulate_pack_bs_data(cpi, pack_bs_params, dst, total_size, fh_info,
+ largest_tile_id, max_tile_size, obu_header_size,
+ tile_data_start, num_workers);
+}
+
// Deallocate memory for CDEF search multi-thread synchronization.
void av1_cdef_mt_dealloc(AV1CdefSync *cdef_sync) {
(void)cdef_sync;
@@ -1780,6 +2171,9 @@ static void update_next_job_info(AV1CdefSync *cdef_sync, int nvfb, int nhfb) {
// Initializes cdef_sync parameters.
static AOM_INLINE void cdef_reset_job_info(AV1CdefSync *cdef_sync) {
+#if CONFIG_MULTITHREAD
+ if (cdef_sync->mutex_) pthread_mutex_init(cdef_sync->mutex_, NULL);
+#endif // CONFIG_MULTITHREAD
cdef_sync->end_of_frame = 0;
cdef_sync->fbr = 0;
cdef_sync->fbc = 0;
@@ -1896,6 +2290,12 @@ static AOM_INLINE int compute_num_lr_workers(AV1_COMP *cpi) {
return compute_num_enc_workers(cpi, cpi->oxcf.max_threads);
}
+// Computes num_workers for pack bitstream multi-threading.
+static AOM_INLINE int compute_num_pack_bs_workers(AV1_COMP *cpi) {
+ if (cpi->oxcf.max_threads <= 1) return 1;
+ return compute_num_enc_tile_mt_workers(&cpi->common, cpi->oxcf.max_threads);
+}
+
int compute_num_mod_workers(AV1_COMP *cpi, MULTI_THREADED_MODULES mod_name) {
int num_mod_workers = 0;
switch (mod_name) {
@@ -1915,7 +2315,9 @@ int compute_num_mod_workers(AV1_COMP *cpi, MULTI_THREADED_MODULES mod_name) {
case MOD_CDEF_SEARCH:
num_mod_workers = compute_num_cdef_workers(cpi);
break;
+ case MOD_CDEF: num_mod_workers = compute_num_cdef_workers(cpi); break;
case MOD_LR: num_mod_workers = compute_num_lr_workers(cpi); break;
+ case MOD_PACK_BS: num_mod_workers = compute_num_pack_bs_workers(cpi); break;
default: assert(0); break;
}
return (num_mod_workers);