diff options
author | akshayragir833 <110660198+akshayragir833@users.noreply.github.com> | 2024-03-06 15:13:14 +0530 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-03-06 15:13:14 +0530 |
commit | 58381f7993b4af3a07191f55a4b50f5a943a9bb2 (patch) | |
tree | 16f3f156c028f640ed7670318778538bff1238d2 | |
parent | d0702a8795b2c1d35c7bed330889326a50b1b599 (diff) | |
download | libxaac-58381f7993b4af3a07191f55a4b50f5a943a9bb2.tar.gz |
Audio pre-roll and loudness measurement support for USAC (#79)
Tests done: Smoke test
34 files changed, 1420 insertions, 242 deletions
@@ -430,6 +430,7 @@ cc_library_static { "encoder/ixheaace_hybrid.c", "encoder/ixheaace_hybrid_init.c", "encoder/ixheaace_interface.c", + "encoder/ixheaace_loudness_measurement.c", "encoder/ixheaace_mdct_480.c", "encoder/ixheaace_mps_bitstream.c", "encoder/ixheaace_mps_dct.c", diff --git a/README_enc.md b/README_enc.md index 8f0d152..65ab9ba 100644 --- a/README_enc.md +++ b/README_enc.md @@ -52,6 +52,8 @@ The configuration file for DRC is placed in `encoder\test` directory(impd_drc_co [-esbr_hq:<esbr_hq_flag>] [-drc:<drc_flag>] [-inter_tes_enc:<inter_tes_enc_flag>] +[-rap:<random access interval in ms>] +[-stream_id:<stream identifier>] where, <paramfile> is the parameter file with multiple commands @@ -101,6 +103,9 @@ where, <esbr_hq_flag> Valid values are 0 (disable high quality eSBR) and 1 (enable high quality eSBR). Default is 0. <drc_flag> Valid values are 0 (disable DRC encoding) and 1 (enable DRC encoding). Default is 0. <inter_tes_enc_flag> Valid values are 0 (disable inter-TES encoding) and 1 (enable inter-TES encoding). Default is 0. + <random access interval in ms> is the time interval between audio preroll frames in ms. It is applicable only for AOT 42. Valid values are -1 (Audio preroll sent only at beginning of file) and greater than 1000 ms. Default is -1. + <stream identifier> It is the stream id used to uniquely identify configuration of a stream within a set of associated streams. It is applicable only for AOT 42. Valid values are 0 to 65535. Any value outside this range is type-casted to a value of unsigned short type. Default is 0. + ``` Sample CLI: ``` diff --git a/docs/LIBXAAC-Enc-API.pdf b/docs/LIBXAAC-Enc-API.pdf Binary files differindex 9c2d011..6fd7a0c 100644 --- a/docs/LIBXAAC-Enc-API.pdf +++ b/docs/LIBXAAC-Enc-API.pdf diff --git a/docs/LIBXAAC-Enc-GSG.pdf b/docs/LIBXAAC-Enc-GSG.pdf Binary files differindex 008595b..7b67eab 100644 --- a/docs/LIBXAAC-Enc-GSG.pdf +++ b/docs/LIBXAAC-Enc-GSG.pdf diff --git a/encoder/drc_src/impd_drc_api.c b/encoder/drc_src/impd_drc_api.c index 0a1b6e1..75d96a7 100644 --- a/encoder/drc_src/impd_drc_api.c +++ b/encoder/drc_src/impd_drc_api.c @@ -18,6 +18,7 @@ * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore */ +#include <string.h> #include "ixheaac_type_def.h" #include "ixheaac_error_standards.h" #include "ixheaace_error_codes.h" @@ -311,6 +312,22 @@ IA_ERRORCODE impd_drc_enc_init(VOID *pstr_drc_state, VOID *ptr_drc_scratch, return err_code; } +IA_ERRORCODE impd_loudness_info_init(VOID *pstr_drc_state, ia_drc_input_config *pstr_inp_config) { + IA_ERRORCODE err_code = IA_NO_ERROR; + ia_drc_enc_state *pstr_drc_state_local = pstr_drc_state; + + iusace_create_bit_buffer(&pstr_drc_state_local->str_bit_buf_cfg_ext, + pstr_drc_state_local->bit_buf_base_cfg_ext, + sizeof(pstr_drc_state_local->bit_buf_base_cfg_ext), 1); + + memcpy(&pstr_drc_state_local->str_gain_enc.str_loudness_info_set, + &pstr_inp_config->str_enc_loudness_info_set, + sizeof(ia_drc_loudness_info_set_struct)); + + err_code = impd_drc_write_measured_loudness_info(pstr_drc_state_local); + return err_code; +} + IA_ERRORCODE impd_drc_enc(VOID *pstr_drc_state, FLOAT32 **pptr_input, UWORD32 inp_offset, WORD32 *ptr_bits_written, VOID *pstr_scratch) { LOOPIDX i, j, k; diff --git a/encoder/drc_src/impd_drc_api.h b/encoder/drc_src/impd_drc_api.h index ec82298..8e6ec9b 100644 --- a/encoder/drc_src/impd_drc_api.h +++ b/encoder/drc_src/impd_drc_api.h @@ -36,8 +36,17 @@ typedef struct { ia_drc_uni_drc_gain_ext_struct str_enc_gain_extension; } ia_drc_input_config; +typedef struct { + ia_drc_enc_params_struct str_enc_params; + ia_drc_uni_drc_config_struct str_uni_drc_config; + ia_drc_loudness_info_set_struct str_enc_loudness_info_set; + ia_drc_uni_drc_gain_ext_struct str_enc_gain_extension; +} ia_drc_internal_config; + IA_ERRORCODE impd_drc_enc_init(VOID *pstr_drc_state, VOID *ptr_drc_scratch, ia_drc_input_config *pstr_inp_config); IA_ERRORCODE impd_drc_enc(VOID *pstr_drc_state, FLOAT32 **pptr_input, UWORD32 inp_offset, WORD32 *ptr_bits_written, VOID *pstr_scratch); + +IA_ERRORCODE impd_loudness_info_init(VOID *pstr_drc_state, ia_drc_input_config *pstr_inp_config);
\ No newline at end of file diff --git a/encoder/drc_src/impd_drc_enc.h b/encoder/drc_src/impd_drc_enc.h index a2a1d6b..444a817 100644 --- a/encoder/drc_src/impd_drc_enc.h +++ b/encoder/drc_src/impd_drc_enc.h @@ -38,3 +38,9 @@ IA_ERRORCODE impd_drc_write_loudness_info_set_extension( IA_ERRORCODE impd_drc_write_uni_drc_config(ia_drc_enc_state *pstr_drc_state, WORD32 *ptr_bit_cnt); VOID impd_drc_write_uni_drc_gain(ia_drc_enc_state *pstr_drc_state, WORD32 *ptr_bit_cnt); + +IA_ERRORCODE impd_drc_write_measured_loudness_info(ia_drc_enc_state *pstr_drc_state); + +IA_ERRORCODE impd_drc_write_loudness_info_set(ia_drc_enc_state *pstr_drc_state, + ia_bit_buf_struct *it_bit_buf, + WORD32 *ptr_bit_cnt);
\ No newline at end of file diff --git a/encoder/drc_src/impd_drc_mux.c b/encoder/drc_src/impd_drc_mux.c index 1818095..4d2718e 100644 --- a/encoder/drc_src/impd_drc_mux.c +++ b/encoder/drc_src/impd_drc_mux.c @@ -3005,6 +3005,20 @@ IA_ERRORCODE impd_drc_write_uni_drc_config(ia_drc_enc_state *pstr_drc_state, return err_code; } +IA_ERRORCODE impd_drc_write_measured_loudness_info(ia_drc_enc_state *pstr_drc_state) { + + IA_ERRORCODE err_code = IA_NO_ERROR; + ia_bit_buf_struct *it_bit_buf_lis = &pstr_drc_state->str_bit_buf_cfg_ext; + WORD32 bit_cnt_lis = 0; + err_code = impd_drc_write_loudness_info_set(pstr_drc_state, it_bit_buf_lis, &bit_cnt_lis); + if (err_code & IA_FATAL_ERROR) { + return (err_code); + } + pstr_drc_state->drc_config_ext_data_size_bit = bit_cnt_lis; + + return err_code; +} + IA_ERRORCODE impd_drc_enc_initial_gain(const WORD32 gain_coding_profile, FLOAT32 gain_initial, FLOAT32 *gain_initial_quant, WORD32 *code_size, WORD32 *code) { diff --git a/encoder/drc_src/impd_drc_uni_drc.h b/encoder/drc_src/impd_drc_uni_drc.h index 95f656a..34f844e 100644 --- a/encoder/drc_src/impd_drc_uni_drc.h +++ b/encoder/drc_src/impd_drc_uni_drc.h @@ -130,6 +130,8 @@ #define MIN_METHOD_VALUE (-116.0f) #define MAX_METHOD_VALUE (121.0f) #define MAX_FLT_VAL_DB (770.6367883810890080451095799195f) +#define DEFAULT_METHOD_VALUE (-31.0f) +#define DEFAULT_SAMPLE_PEAK_VALUE (-31.0f) typedef struct { WORD32 level_estim_k_weighting_type; diff --git a/encoder/iusace_cnst.h b/encoder/iusace_cnst.h index e439f5a..fb9234e 100644 --- a/encoder/iusace_cnst.h +++ b/encoder/iusace_cnst.h @@ -201,3 +201,12 @@ #define USACE_MAX_SCR_SIZE (733836) #define USACE_SCR_STACK (10 * 1024) #define MAX_USAC_ESBR_BITRATE (96000) + +#define MAX_PREROLL_FRAMES (3) +#define MAX_OUTPUT_BYTES_PER_CH (768) +#define MAXIMUM_VALUE_8BIT (255) +#define DEFAULT_RAP_INTERVAL_IN_MS (-1) +#define MIN_RAP_INTERVAL_IN_MS (1000) +#define MAX_PREROLL_CONFIG_SIZE (1024) +#define CC_NUM_PREROLL_FRAMES (1) +#define USAC_FIRST_FRAME_FLAG_DEFAULT_VALUE (1) diff --git a/encoder/iusace_config.h b/encoder/iusace_config.h index c479a1a..bf04c69 100644 --- a/encoder/iusace_config.h +++ b/encoder/iusace_config.h @@ -143,10 +143,13 @@ typedef struct { #define ID_EXT_ELE_FILL 0 #define ID_EXT_ELE_UNI_DRC 4 +#define ID_EXT_ELE_AUDIOPREROLL (3) #define ID_CONFIG_EXT_FILL 0 #define ID_CONFIG_EXT_DOWNMIX (1) #define ID_CONFIG_EXT_LOUDNESS_INFO (2) +#define ID_CONFIG_EXT_STREAM_ID (7) +#define CONFIG_EXT_LEN_STREAM_ID (2) #define NUM_COEFF (1024) typedef enum { @@ -223,6 +226,7 @@ typedef struct { WORD32 output_channel_pos[BS_MAX_NUM_OUT_CHANNELS]; WORD32 ccfl; ia_usac_enc_element_config_struct str_usac_element_config[USAC_MAX_ELEMENTS]; + UWORD16 stream_identifier; } ia_usac_config_struct; typedef struct { @@ -272,6 +276,16 @@ typedef struct { WORD32 drc_frame_size; ia_drc_input_config str_drc_cfg; WORD32 use_acelp_only; + WORD32 random_access_interval; + WORD32 preroll_flag; + WORD32 num_preroll_frames; + WORD32 preroll_idx; + WORD32 is_ipf; + WORD32 preroll_frame; + WORD32 is_first_frame; + ia_drc_internal_config str_internal_drc_cfg; + WORD32 use_measured_loudness; + UWORD16 stream_id; } ia_usac_encoder_config_struct; typedef struct { diff --git a/encoder/iusace_enc_main.c b/encoder/iusace_enc_main.c index 948c2de..e1b95e0 100644 --- a/encoder/iusace_enc_main.c +++ b/encoder/iusace_enc_main.c @@ -618,7 +618,14 @@ IA_ERRORCODE iusace_enc_init(ia_usac_encoder_config_struct *ptr_usac_config, p_audio_specific_config->channel_configuration = ptr_usac_config->channels; } } - + ia_usac_enc_element_config_struct *pstr_usac_elem_config = + &(pstr_asc_usac_config->str_usac_element_config[pstr_asc_usac_config->num_elements]); + pstr_asc_usac_config->usac_element_type[pstr_asc_usac_config->num_elements] = ID_USAC_EXT; + pstr_usac_elem_config->usac_ext_ele_type = ID_EXT_ELE_AUDIOPREROLL; + pstr_usac_elem_config->usac_ext_ele_dflt_len_present = 0; + pstr_usac_elem_config->usac_ext_ele_payload_present = 0; + pstr_usac_elem_config->usac_ext_ele_cfg_len = 0; + pstr_asc_usac_config->num_elements++; // DRC Config if (ptr_usac_config->use_drc_element) { ptr_usac_config->str_drc_cfg.str_uni_drc_config.str_channel_layout.base_ch_count = @@ -637,26 +644,40 @@ IA_ERRORCODE iusace_enc_init(ia_usac_encoder_config_struct *ptr_usac_config, if (ptr_usac_config->use_drc_element) { ia_usac_enc_element_config_struct *pstr_usac_elem_config = - &(pstr_asc_usac_config->str_usac_element_config[pstr_asc_usac_config->num_elements]); + &(pstr_asc_usac_config->str_usac_element_config[pstr_asc_usac_config->num_elements]); pstr_asc_usac_config->usac_element_type[pstr_asc_usac_config->num_elements] = ID_USAC_EXT; pstr_usac_elem_config->usac_ext_ele_type = ID_EXT_ELE_UNI_DRC; pstr_usac_elem_config->usac_ext_ele_dflt_len_present = 0; pstr_usac_elem_config->usac_ext_ele_payload_present = 0; pstr_usac_elem_config->drc_config_data = usac_data->str_drc_state.bit_buf_base_cfg; pstr_usac_elem_config->usac_ext_ele_cfg_len = - (usac_data->str_drc_state.drc_config_data_size_bit + 7) >> 3; + (usac_data->str_drc_state.drc_config_data_size_bit + 7) >> 3; pstr_asc_usac_config->num_elements++; } + } else { + err_code = + impd_loudness_info_init(&usac_data->str_drc_state, &ptr_usac_config->str_drc_cfg); + if (err_code) { + return err_code; + } } - if (ptr_usac_config->use_drc_element) // For Loudness - { - pstr_asc_usac_config->usac_config_ext_type[pstr_asc_usac_config->num_config_extensions] = - ID_CONFIG_EXT_LOUDNESS_INFO; - pstr_asc_usac_config->usac_config_ext_len[pstr_asc_usac_config->num_config_extensions] = - (usac_data->str_drc_state.drc_config_ext_data_size_bit + 7) >> 3; - pstr_asc_usac_config->usac_config_ext_buf[pstr_asc_usac_config->num_config_extensions] = - usac_data->str_drc_state.bit_buf_base_cfg_ext; - pstr_asc_usac_config->num_config_extensions++; + + pstr_asc_usac_config->usac_config_ext_type[pstr_asc_usac_config->num_config_extensions] = + ID_CONFIG_EXT_STREAM_ID; + pstr_asc_usac_config->usac_config_ext_len[pstr_asc_usac_config->num_config_extensions] = + CONFIG_EXT_LEN_STREAM_ID; + pstr_asc_usac_config->num_config_extensions++; + pstr_asc_usac_config->stream_identifier = ptr_usac_config->stream_id; + + pstr_asc_usac_config->usac_config_ext_type[pstr_asc_usac_config->num_config_extensions] = + ID_CONFIG_EXT_LOUDNESS_INFO; + pstr_asc_usac_config->usac_config_ext_len[pstr_asc_usac_config->num_config_extensions] = + (usac_data->str_drc_state.drc_config_ext_data_size_bit + 7) >> 3; + pstr_asc_usac_config->usac_config_ext_buf[pstr_asc_usac_config->num_config_extensions] = + usac_data->str_drc_state.bit_buf_base_cfg_ext; + pstr_asc_usac_config->num_config_extensions++; + + if (pstr_asc_usac_config->num_config_extensions) { pstr_asc_usac_config->usac_cfg_ext_present = 1; } @@ -778,7 +799,7 @@ IA_ERRORCODE iusace_enc_init(ia_usac_encoder_config_struct *ptr_usac_config, for (; elem_idx < pstr_asc_usac_config->num_elements; elem_idx++) { idx = elem_idx - pstr_asc_usac_config->num_ext_elements; - pstr_asc_usac_config->str_usac_element_config[idx].noise_filling = + pstr_asc_usac_config->str_usac_element_config[elem_idx].noise_filling = usac_data->noise_filling[idx]; usac_data->channel_elem_type[idx] = pstr_asc_usac_config->usac_element_type[elem_idx]; } @@ -792,6 +813,8 @@ IA_ERRORCODE iusace_enc_init(ia_usac_encoder_config_struct *ptr_usac_config, pstr_usac_elem_config->usac_ext_ele_dflt_len_present = 0; pstr_usac_elem_config->usac_ext_ele_payload_present = 0; pstr_asc_usac_config->num_elements++; + ptr_usac_config->num_ext_elements++; + ptr_usac_config->num_elements++; } if (ptr_usac_config->codec_mode == USAC_SWITCHED) { @@ -1045,7 +1068,7 @@ IA_ERRORCODE ixheaace_usac_encode(FLOAT32 **ptr_input, WORD32 len_next_high_rate; WORD8 elem_idx, nr_core_coder_channels = 0, chn = 0; WORD32 ch_offset = 0; - WORD32 elem_idx_max = ptr_usac_config->num_elements; + WORD32 elem_idx_max = ptr_usac_config->num_elements - ptr_usac_config->num_ext_elements; WORD32 td_buffer_offset = (TD_BUFFER_OFFSET * ptr_usac_config->ccfl) / FRAME_LEN_LONG; usac_independency_flg = ptr_usac_data->usac_independency_flag; @@ -1063,15 +1086,16 @@ IA_ERRORCODE ixheaace_usac_encode(FLOAT32 **ptr_input, ptr_usac_data->min_bits_needed = 0; } - if (ptr_usac_config->use_drc_element == 1) { - elem_idx_max -= 1; - } - num_bits = 0; - - iusace_write_bits_buf(pstr_it_bit_buff, usac_independency_flg, 1); - num_bits++; - + if (ptr_usac_config->preroll_flag) { + if (ptr_usac_config->iframes_interval != ptr_usac_config->num_preroll_frames) { + iusace_write_bits_buf(pstr_it_bit_buff, usac_independency_flg, 1); + num_bits++; + } + } else { + iusace_write_bits_buf(pstr_it_bit_buff, usac_independency_flg, 1); + num_bits++; + } for (elem_idx = 0; elem_idx < elem_idx_max; elem_idx++) { switch (ptr_usac_data->channel_elem_type[elem_idx]) { case USAC_ELEMENT_TYPE_SCE: @@ -1289,7 +1313,12 @@ IA_ERRORCODE ixheaace_usac_encode(FLOAT32 **ptr_input, } } } - + if (ptr_usac_config->preroll_flag) { + if (ptr_usac_config->iframes_interval != ptr_usac_config->num_preroll_frames) { + iusace_write_bits_buf(pstr_it_bit_buff, 0, 1); // extension element present + num_bits++; + } + } if (ptr_usac_config->use_drc_element) { WORD32 num_bits_ext_elem = 0; err = iusace_enc_ext_elemts(ID_EXT_ELE_UNI_DRC, ptr_usac_config, pstr_state, pstr_asc, @@ -1424,4 +1453,4 @@ IA_ERRORCODE ixheaace_usac_encode(FLOAT32 **ptr_input, } return err; -} +}
\ No newline at end of file diff --git a/encoder/iusace_main.h b/encoder/iusace_main.h index 76fc8e9..176f737 100644 --- a/encoder/iusace_main.h +++ b/encoder/iusace_main.h @@ -97,6 +97,9 @@ typedef struct { WORD32 min_bits_needed; WORD32 num_drc_bits; WORD32 use_acelp_only; + WORD32 prev_out_bytes[MAX_PREROLL_FRAMES]; + UWORD8 prev_out_data[MAX_PREROLL_FRAMES][MAX_OUTPUT_BYTES_PER_CH * IXHEAACE_MAX_CH_IN_BS_ELE]; + UWORD32 stereo_config_index; } ia_usac_data_struct; typedef struct { diff --git a/encoder/iusace_psy_utils.c b/encoder/iusace_psy_utils.c index 9f6b6df..fe06a07 100644 --- a/encoder/iusace_psy_utils.c +++ b/encoder/iusace_psy_utils.c @@ -160,6 +160,9 @@ static VOID iusace_sfb_init(WORD32 sample_rate, WORD32 block_type, WORD32 *ptr_s case 8000: ptr_sfb_params = pstr_sfb_info_tbls[0].cb_offset_long; break; + default: + ptr_sfb_params = pstr_sfb_info_tbls[8].cb_offset_long; + break; } } else { block_len = ccfl >> 3; @@ -202,6 +205,9 @@ static VOID iusace_sfb_init(WORD32 sample_rate, WORD32 block_type, WORD32 *ptr_s case 8000: ptr_sfb_params = pstr_sfb_info_tbls[0].cb_offset_short; break; + default: + ptr_sfb_params = pstr_sfb_info_tbls[8].cb_offset_short; + break; } } diff --git a/encoder/iusace_write_bitstream.c b/encoder/iusace_write_bitstream.c index bca4f0f..da218c6 100644 --- a/encoder/iusace_write_bitstream.c +++ b/encoder/iusace_write_bitstream.c @@ -509,7 +509,7 @@ WORD32 iusace_write_cpe(ia_sfb_params_struct *pstr_sfb_prms, ia_bit_buf_struct * pstr_sfb_prms->num_window_groups[ch], pstr_sfb_prms->max_sfb_ste); { - if (ms_mask == 3) { + if ((ms_mask == 3) && (pstr_usac_data->stereo_config_index == 0)) { bit_count += iusace_write_cplx_pred_data( it_bit_buf, pstr_sfb_prms->num_window_groups[ch], pstr_sfb_prms->max_sfb_ste, pstr_usac_data->complex_coef[ch], pstr_usac_data->pred_coef_re[ch], diff --git a/encoder/ixheaace_api.c b/encoder/ixheaace_api.c index 92c24d5..2659d40 100644 --- a/encoder/ixheaace_api.c +++ b/encoder/ixheaace_api.c @@ -136,6 +136,7 @@ #include "ixheaace_api_defs.h" #include "ixheaace_write_adts_adif.h" +#include "ixheaace_loudness_measurement.h" #include "iusace_psy_utils.h" static WORD32 iusace_scratch_size(VOID) { @@ -527,6 +528,9 @@ static VOID ixheaace_set_default_config(ixheaace_api_struct *pstr_api_struct, pstr_usac_config->tns_select = USAC_TNS_DEFAULT_VALUE; pstr_usac_config->flag_noiseFilling = USAC_FLAG_NOISE_FILLING_DEFAULT_VALUE; pstr_usac_config->use_acelp_only = USAC_DEFAULT_ACELP_FLAG_VALUE; + pstr_usac_config->is_first_frame = USAC_FIRST_FRAME_FLAG_DEFAULT_VALUE; + pstr_usac_config->num_preroll_frames = CC_NUM_PREROLL_FRAMES; + pstr_usac_config->stream_id = USAC_DEFAULT_STREAM_ID_VALUE; } /* Initialize table pointers */ ia_enhaacplus_enc_init_aac_tabs(&(pstr_api_struct->pstr_aac_tabs)); @@ -681,7 +685,7 @@ static VOID ixheaace_validate_config_params(ixheaace_input_config *pstr_input_co { if ((pstr_input_config->codec_mode == USAC_SWITCHED || - pstr_input_config->codec_mode == USAC_ONLY_TD) && + pstr_input_config->codec_mode == USAC_ONLY_TD) && pstr_input_config->esbr_flag && pstr_input_config->i_samp_freq > 24000) { if (pstr_input_config->ccfl_idx == NO_SBR_CCFL_768) { pstr_input_config->ccfl_idx = SBR_8_3; // Use 8:3 eSBR @@ -709,6 +713,23 @@ static VOID ixheaace_validate_config_params(ixheaace_input_config *pstr_input_co pstr_input_config->frame_length = LEN_SUPERFRAME; } } + if (pstr_input_config->random_access_interval < MIN_RAP_INTERVAL_IN_MS) { + pstr_input_config->random_access_interval = DEFAULT_RAP_INTERVAL_IN_MS; + } + if (pstr_input_config->method_def > MAX_METHOD_DEFINITION_TYPE) { + pstr_input_config->method_def = METHOD_DEFINITION_PROGRAM_LOUDNESS; + } + if (pstr_input_config->measurement_system != MEASUREMENT_SYSTEM_BS_1770_3) { + pstr_input_config->measurement_system = MEASUREMENT_SYSTEM_BS_1770_3; + } + if (pstr_input_config->measured_loudness > MAX_METHOD_VALUE || + pstr_input_config->measured_loudness < MIN_METHOD_VALUE) { + pstr_input_config->measured_loudness = DEFAULT_METHOD_VALUE; + } + if (pstr_input_config->sample_peak_level > MAX_SAMPLE_PEAK_LEVEL || + pstr_input_config->sample_peak_level < MIN_SAMPLE_PEAK_LEVEL) { + pstr_input_config->sample_peak_level = DEFAULT_SAMPLE_PEAK_VALUE; + } } else { pstr_input_config->cplx_pred = 0; pstr_input_config->harmonic_sbr = 0; @@ -926,6 +947,25 @@ static IA_ERRORCODE ixheaace_set_config_params(ixheaace_api_struct *pstr_api_str pstr_usac_config->drc_frame_size = LEN_SUPERFRAME; break; } + pstr_usac_config->random_access_interval = pstr_input_config->random_access_interval; + if (pstr_usac_config->random_access_interval > 0) { + pstr_usac_config->random_access_interval = + (WORD32)((((WORD64)pstr_usac_config->random_access_interval * + pstr_input_config->i_native_samp_freq) + + (pstr_usac_config->ccfl * 1000 - 1)) / + (pstr_usac_config->ccfl * 1000)); + } + + if (pstr_usac_config->random_access_interval) { + pstr_usac_config->preroll_flag = 1; + } + if (pstr_usac_config->sbr_enable == 1) { + pstr_usac_config->num_preroll_frames++; + if (pstr_usac_config->sbr_harmonic == 1) { + pstr_usac_config->num_preroll_frames++; + } + } + pstr_usac_config->stream_id = pstr_input_config->stream_id; if (pstr_input_config->ccfl_idx < NO_SBR_CCFL_768 || pstr_input_config->ccfl_idx > SBR_4_1) { pstr_api_struct->config[0].ccfl_idx = NO_SBR_CCFL_1024; // default value } else { @@ -945,7 +985,7 @@ static IA_ERRORCODE ixheaace_set_config_params(ixheaace_api_struct *pstr_api_str (ixheaace_resampler_table *)&ixheaace_resamp_4_to_1_iir_filt_params; } pstr_usac_config->use_drc_element = pstr_input_config->use_drc_element; - if (pstr_usac_config->use_drc_element) { + { ia_drc_input_config *pstr_drc_cfg = (ia_drc_input_config *)pstr_input_config->pv_drc_cfg; pstr_drc_cfg->str_uni_drc_config.str_channel_layout.base_ch_count = pstr_input_config->i_channels; @@ -968,9 +1008,6 @@ static IA_ERRORCODE ixheaace_set_config_params(ixheaace_api_struct *pstr_api_str pstr_usac_config->str_drc_cfg.str_uni_drc_config.str_drc_coefficients_uni_drc ->drc_frame_size = pstr_usac_config->drc_frame_size; pstr_input_config->drc_frame_size = pstr_usac_config->drc_frame_size; - } else { - pstr_usac_config->drc_frame_size = 0; - pstr_input_config->drc_frame_size = 0; } } else { if ((pstr_input_config->i_channels > MAX_NUM_CORE_CODER_CHANNELS)) { @@ -1253,7 +1290,9 @@ static VOID ixheaace_fill_mem_tabs(ixheaace_api_struct *pstr_api_struct, WORD32 /* output */ { pstr_mem_info = &pstr_api_struct->pstr_mem_info[IA_ENHAACPLUSENC_OUTPUT_IDX]; - pstr_mem_info->ui_size = (MAXIMUM_CHANNEL_BITS_1024 / BYTE_NUMBIT) * num_channel; + pstr_mem_info->ui_size = + ((MAX_PREROLL_FRAMES + 1) * (MAX_CHANNEL_BITS / BYTE_NUMBIT) * num_channel) + + MAX_PREROLL_CONFIG_SIZE; pstr_mem_info->ui_alignment = BYTE_ALIGN_8; pstr_mem_info->ui_type = IA_MEMTYPE_OUTPUT; pstr_mem_info->ui_placement[0] = 0; @@ -1547,7 +1586,6 @@ static IA_ERRORCODE ixheaace_alloc_and_assign_mem(ixheaace_api_struct *pstr_api_ memset(p_temp, 0, (p_offset - p_temp)); p_temp = p_offset; - offset_size = 0; for (i = 0; i < pstr_usac_config->channels; i++) { pstr_usac_enc_data->ptr_time_data[i] = (FLOAT64 *)(p_offset); p_offset += IXHEAAC_GET_SIZE_ALIGNED((2 * (pstr_usac_config->ccfl) * sizeof(FLOAT64)), @@ -1680,6 +1718,138 @@ static IA_ERRORCODE ixheaace_alloc_and_assign_mem(ixheaace_api_struct *pstr_api_ } return err_code; } + +static VOID ixheaace_write_audio_preroll_data(ixheaace_api_struct *pstr_api_struct, + ia_bit_buf_struct *it_bit_buff) { + ixheaace_config_struct *pstr_config = &pstr_api_struct->config[0]; + ixheaace_state_struct *pstr_enc_state = pstr_api_struct->pstr_state; + ia_usac_data_struct *pstr_usac_data = &pstr_enc_state->str_usac_enc_data; + ia_usac_encoder_config_struct *pstr_usac_config = &pstr_config->usac_config; + WORD32 i, j, padding_bits; + + if (pstr_usac_config->is_ipf) { + if (pstr_usac_config->iframes_interval == pstr_usac_config->num_preroll_frames) { + WORD32 config_len = 0, num_bits = 0, au_len = 0, config_bits = 0; + WORD32 bytes_to_write; + UWORD8 *ptr_out = (UWORD8 *)pstr_api_struct->pp_mem[IA_MEMTYPE_OUTPUT]; + WORD32 max_output_size = + ((MAX_CHANNEL_BITS / BYTE_NUMBIT) * pstr_usac_config->channels) * MAX_PREROLL_FRAMES + + MAX_PREROLL_CONFIG_SIZE; + UWORD8 *out_data = ptr_out + max_output_size; + UWORD8 residual_bits = 0, residual_data = 0; + memmove(ptr_out + max_output_size, ptr_out, pstr_enc_state->i_out_bytes); + pstr_usac_config->is_ipf = 0; + + config_bits = ixheaace_get_usac_config_bytes(NULL, &pstr_enc_state->audio_specific_config, + pstr_config->ccfl_idx); + config_len = (config_bits + 7) >> 3; + num_bits = iusace_write_escape_value(NULL, config_len, 4, 4, 8); + num_bits += (config_len * 8); // config data bits + num_bits++; // apply-crossfade + num_bits++; // apr-reserved + // bits for number of preroll frames + num_bits += iusace_write_escape_value(NULL, pstr_usac_config->num_preroll_frames, 2, 4, 0); + // bits for au_len + for (i = 0; i < pstr_usac_config->num_preroll_frames; i++) { + num_bits += iusace_write_escape_value(NULL, pstr_usac_data->prev_out_bytes[i], 16, 16, 0); + au_len += pstr_usac_data->prev_out_bytes[i]; + } + iusace_reset_bit_buffer(it_bit_buff); + // total bytes to write + bytes_to_write = (num_bits + 7) >> 3; + // usacIndependencyFlag + iusace_write_bits_buf(it_bit_buff, pstr_usac_data->usac_independency_flag, 1); + iusace_write_bits_buf(it_bit_buff, 1, 1); // usacExtElementPresent + iusace_write_bits_buf(it_bit_buff, 0, 1); // usacExtElementUseDefaultLength + + if (au_len + bytes_to_write >= MAXIMUM_VALUE_8BIT) { + iusace_write_escape_value(it_bit_buff, au_len + bytes_to_write + 2, 8, 16, 0); + } else { + iusace_write_bits_buf(it_bit_buff, au_len + bytes_to_write, 8); + } + + iusace_write_escape_value(it_bit_buff, config_len, 4, 4, 8); // configLen + // Config + ixheaace_get_usac_config_bytes(it_bit_buff, &pstr_enc_state->audio_specific_config, + pstr_config->ccfl_idx); + + if (config_bits % 8) { + iusace_write_bits_buf(it_bit_buff, 0, (UWORD8)((config_len << 3) - config_bits)); + } + + iusace_write_bits_buf(it_bit_buff, 0, 1); // applyCrossfade + iusace_write_bits_buf(it_bit_buff, 0, 1); // apr_reserved + // numPreRollFrames + iusace_write_escape_value(it_bit_buff, pstr_usac_config->num_preroll_frames, 2, 4, 0); + for (i = 0; i < pstr_usac_config->num_preroll_frames; i++) { + au_len = pstr_usac_data->prev_out_bytes[i]; + + if (pstr_usac_config->iframes_interval != 0) { + out_data = pstr_usac_data->prev_out_data[i]; + } + + // auLen + iusace_write_escape_value(it_bit_buff, au_len, 16, 16, 0); + + // AccessUnit + for (j = 0; j < au_len; j++) { + iusace_write_bits_buf(it_bit_buff, *out_data, 8); + out_data++; + } + } + + if (num_bits % 8) { + iusace_write_bits_buf(it_bit_buff, 0, (UWORD8)((bytes_to_write << 3) - num_bits)); + } + // current frame + au_len = pstr_enc_state->i_out_bits >> 3; + residual_bits = (UWORD8)(pstr_enc_state->i_out_bits - (au_len << 3)); + out_data = ptr_out + max_output_size; + for (j = 0; j < au_len; j++) { + iusace_write_bits_buf(it_bit_buff, *out_data, 8); + out_data++; + } + residual_data = *out_data >> (8 - residual_bits); + iusace_write_bits_buf(it_bit_buff, residual_data, residual_bits); + + padding_bits = 8 - (it_bit_buff->cnt_bits & 7); + if (padding_bits > 0 && padding_bits < 8) { + ptr_out[it_bit_buff->cnt_bits >> 3] = + (WORD8)((UWORD32)ptr_out[it_bit_buff->cnt_bits >> 3]) & (0xFF << padding_bits); + } + pstr_enc_state->i_out_bytes = (it_bit_buff->cnt_bits + 7) >> 3; + pstr_usac_config->preroll_idx++; + + if (!pstr_usac_config->is_first_frame) { + pstr_usac_config->preroll_idx = pstr_usac_config->num_preroll_frames + 1; + } + if (pstr_usac_config->is_first_frame) { + pstr_usac_config->is_first_frame = 0; + } + } else { + if (pstr_usac_config->preroll_idx < pstr_usac_config->num_preroll_frames) { + WORD32 *ptr_prev_out_bytes = pstr_usac_data->prev_out_bytes; + WORD32 pr_idx = pstr_usac_config->preroll_idx; + UWORD8 *ptr_out = (UWORD8 *)pstr_api_struct->pp_mem[IA_MEMTYPE_OUTPUT]; + ptr_prev_out_bytes[pr_idx] = pstr_enc_state->i_out_bytes; + memcpy(pstr_usac_data->prev_out_data[pr_idx++], ptr_out, pstr_enc_state->i_out_bytes); + pstr_usac_config->preroll_idx = pr_idx; + pstr_enc_state->i_out_bytes = 0; + } + } + } else { + for (j = 0; j < pstr_usac_config->num_preroll_frames - 1; j++) { + pstr_usac_data->prev_out_bytes[j] = pstr_usac_data->prev_out_bytes[j + 1]; + } + if (pstr_usac_config->num_preroll_frames) { + pstr_usac_data->prev_out_bytes[pstr_usac_config->num_preroll_frames - 1] = + pstr_enc_state->i_out_bytes; + } + pstr_usac_config->preroll_idx = pstr_usac_config->num_preroll_frames + 1; + } + return; +} + static IA_ERRORCODE ia_usac_enc_init(ixheaace_api_struct *pstr_api_struct, WORD32 ccfl_idx) { IA_ERRORCODE error = IA_NO_ERROR; WORD32 i = 0; @@ -1928,28 +2098,39 @@ static IA_ERRORCODE ia_usac_enc_init(ixheaace_api_struct *pstr_api_struct, WORD3 if (error & IA_FATAL_ERROR) { return error; } - + pstr_api_struct->pstr_state->str_usac_enc_data.frame_count = 0; + pstr_usac_config->is_ipf = 1; + pstr_enc_data->stereo_config_index = (pstr_enc_state->mps_enable == 1) ? 2 : 0; ia_bit_buf_struct *pstr_ia_asc_bit_buf; pstr_ia_asc_bit_buf = iusace_create_bit_buffer( &(pstr_api_struct->pstr_state->str_bit_buf), pstr_api_struct->pp_mem[IA_MEMTYPE_OUTPUT], pstr_api_struct->pstr_mem_info[IA_MEMTYPE_OUTPUT].ui_size, 1); if (pstr_usac_config->sbr_enable) { - pstr_api_struct->pstr_state->audio_specific_config.str_usac_config.str_usac_element_config - ->stereo_config_index = (pstr_api_struct->pstr_state->mps_enable == 1) ? 2 : 0; - pstr_api_struct->pstr_state->audio_specific_config.str_usac_config.str_usac_element_config - ->str_usac_sbr_config.bs_inter_tes = pstr_usac_config->sbr_inter_tes_active; - pstr_api_struct->pstr_state->audio_specific_config.str_usac_config.str_usac_element_config - ->str_usac_sbr_config.bs_pvc = pstr_usac_config->sbr_pvc_active; - pstr_api_struct->pstr_state->audio_specific_config.str_usac_config.str_usac_element_config - ->str_usac_sbr_config.dflt_header_extra1 = 0; - pstr_api_struct->pstr_state->audio_specific_config.str_usac_config.str_usac_element_config - ->str_usac_sbr_config.dflt_header_extra2 = 0; - pstr_api_struct->pstr_state->audio_specific_config.str_usac_config.str_usac_element_config - ->str_usac_sbr_config.dflt_start_freq = 0; - pstr_api_struct->pstr_state->audio_specific_config.str_usac_config.str_usac_element_config - ->str_usac_sbr_config.dflt_stop_freq = 4; - pstr_api_struct->pstr_state->audio_specific_config.str_usac_config.str_usac_element_config - ->str_usac_sbr_config.harmonic_sbr = pstr_usac_config->sbr_harmonic; + for (UWORD32 idx = 0; idx < pstr_usac_config->num_elements; idx++) { + switch (pstr_enc_state->audio_specific_config.str_usac_config.usac_element_type[idx]) { + case ID_USAC_SCE: + case ID_USAC_CPE: + pstr_enc_state->audio_specific_config.str_usac_config.str_usac_element_config[idx] + .stereo_config_index = pstr_enc_data->stereo_config_index; + pstr_enc_state->audio_specific_config.str_usac_config.str_usac_element_config[idx] + .str_usac_sbr_config.bs_inter_tes = pstr_usac_config->sbr_inter_tes_active; + pstr_enc_state->audio_specific_config.str_usac_config.str_usac_element_config[idx] + .str_usac_sbr_config.bs_pvc = pstr_usac_config->sbr_pvc_active; + pstr_enc_state->audio_specific_config.str_usac_config.str_usac_element_config[idx] + .str_usac_sbr_config.dflt_header_extra1 = 0; + pstr_enc_state->audio_specific_config.str_usac_config.str_usac_element_config[idx] + .str_usac_sbr_config.dflt_header_extra2 = 0; + pstr_enc_state->audio_specific_config.str_usac_config.str_usac_element_config[idx] + .str_usac_sbr_config.dflt_start_freq = 0; + pstr_enc_state->audio_specific_config.str_usac_config.str_usac_element_config[idx] + .str_usac_sbr_config.dflt_stop_freq = 4; + pstr_enc_state->audio_specific_config.str_usac_config.str_usac_element_config[idx] + .str_usac_sbr_config.harmonic_sbr = pstr_usac_config->sbr_harmonic; + break; + default: + continue; + } + } } ixheaace_get_audiospecific_config_bytes(pstr_ia_asc_bit_buf, @@ -2771,9 +2952,7 @@ static IA_ERRORCODE iusace_process(ixheaace_api_struct *pstr_api_struct) { drc_sample = pstr_config->drc_frame_size * pstr_config->channels; core_coder_frame_length = pstr_config->ccfl; num_ch = pstr_config->channels; - usac_independency_flg = !(pstr_usac_data->usac_independency_flag_count % - pstr_usac_data->usac_independency_flag_interval); - pstr_usac_data->usac_independency_flag = usac_independency_flg; + usac_independency_flg = pstr_usac_data->usac_independency_flag; ps_inp_buf = (WORD16 *)pstr_api_struct->pp_mem[IA_MEMTYPE_INPUT]; pi1_inp_buf = (WORD8 *)pstr_api_struct->pp_mem[IA_MEMTYPE_INPUT]; ps_out_buf = (WORD8 *)pstr_api_struct->pp_mem[IA_MEMTYPE_OUTPUT]; @@ -2863,6 +3042,10 @@ static IA_ERRORCODE iusace_process(ixheaace_api_struct *pstr_api_struct) { time_signal_mps[idx] = (FLOAT32)ps_inp_buf[2 * idx]; time_signal_mps[num_samples_read / 2 + idx] = (FLOAT32)ps_inp_buf[2 * idx + 1]; } + ixheaace_mps_pstr_struct pstr_mps_enc = + (ixheaace_mps_pstr_struct)pstr_api_struct->pstr_mps_212_enc; + pstr_mps_enc->ptr_sac_encoder->independency_flag = usac_independency_flg; + error = ixheaace_mps_212_process(pstr_api_struct->pstr_mps_212_enc, time_signal_mps, num_samples_read, &mps_extension_payload); if (error) { @@ -3252,7 +3435,8 @@ static IA_ERRORCODE iusace_process(ixheaace_api_struct *pstr_api_struct) { pstr_api_struct->pstr_state->i_out_bytes = (padding_bits > 0 && padding_bits < 8) ? (pstr_it_bit_buff->cnt_bits + padding_bits) >> 3 : pstr_it_bit_buff->cnt_bits >> 3; - + pstr_api_struct->pstr_state->i_out_bits = pstr_it_bit_buff->cnt_bits; + ixheaace_write_audio_preroll_data(pstr_api_struct, pstr_it_bit_buff); pstr_state->str_usac_enc_data.frame_count++; pstr_usac_data->usac_independency_flag_count = (pstr_usac_data->usac_independency_flag_count + 1) % @@ -3292,6 +3476,93 @@ IA_ERRORCODE ixheaace_get_lib_id_strings(pVOID pv_output) { return err_code; } +static void ixheaace_config_drc_parameters(ixheaace_api_struct *pstr_api_struct, + ixheaace_input_config *pstr_input_config) { + ia_drc_input_config *pstr_drc_cfg; + pstr_drc_cfg = (ia_drc_input_config *)pstr_input_config->pv_drc_cfg; + + ia_drc_internal_config *pstr_internal_drc_cfg = + &pstr_api_struct->config[0].usac_config.str_internal_drc_cfg; + + ia_drc_loudness_info_set_struct *pstr_enc_loudness_info_set = + &pstr_drc_cfg->str_enc_loudness_info_set; + ia_drc_loudness_info_set_struct *pstr_enc_internal_loudness_info_set = + &pstr_internal_drc_cfg->str_enc_loudness_info_set; + + WORD32 n; + WORD32 i, k = 0; + + i = pstr_enc_loudness_info_set->loudness_info_count; + pstr_enc_loudness_info_set->loudness_info_count = + MIN(pstr_enc_loudness_info_set->loudness_info_count + + pstr_enc_internal_loudness_info_set->loudness_info_count, + MAX_LOUDNESS_INFO_COUNT); + + for (n = i; n < pstr_enc_loudness_info_set->loudness_info_count; n++, k++) { + memcpy(&pstr_enc_loudness_info_set->str_loudness_info[n], + &pstr_enc_internal_loudness_info_set->str_loudness_info[k], + sizeof(ia_drc_loudness_info_struct)); + } + i = pstr_enc_loudness_info_set->loudness_info_album_count; + k = 0; + pstr_enc_loudness_info_set->loudness_info_album_count = + MIN(pstr_enc_loudness_info_set->loudness_info_album_count + + pstr_enc_internal_loudness_info_set->loudness_info_album_count, + MAX_LOUDNESS_INFO_COUNT); + for (n = i; n < pstr_enc_loudness_info_set->loudness_info_album_count; n++, k++) { + memcpy(&pstr_enc_loudness_info_set->str_loudness_info_album[n], + &pstr_enc_internal_loudness_info_set->str_loudness_info_album[k], + sizeof(ia_drc_loudness_info_struct)); + } +} + +static void ixheaace_get_measured_loudness_info(ixheaace_api_struct *pstr_api_struct, + ixheaace_input_config *pstr_input_config) { + ia_drc_input_config *pstr_internal_drc_cfg; + if (!pstr_input_config->use_measured_loudness) { + pstr_internal_drc_cfg = + (ia_drc_input_config *)&pstr_api_struct->config[0].usac_config.str_internal_drc_cfg; + } else { + pstr_internal_drc_cfg = &pstr_api_struct->config[0].usac_config.str_drc_cfg; + } + memset(pstr_internal_drc_cfg, 0, sizeof(ia_drc_input_config)); + ia_drc_uni_drc_config_struct *pstr_uni_drc_config = &pstr_internal_drc_cfg->str_uni_drc_config; + ia_drc_loudness_info_set_struct *pstr_enc_loudness_info_set = + &pstr_internal_drc_cfg->str_enc_loudness_info_set; + { + WORD32 n, m; + pstr_uni_drc_config->sample_rate_present = 1; + pstr_uni_drc_config->loudness_info_set_present = 1; + pstr_enc_loudness_info_set->loudness_info_count = 1; + pstr_enc_loudness_info_set->loudness_info_count = + MIN(pstr_enc_loudness_info_set->loudness_info_count, MAX_LOUDNESS_INFO_COUNT); + for (n = 0; n < pstr_enc_loudness_info_set->loudness_info_count; n++) { + pstr_enc_loudness_info_set->str_loudness_info[n].drc_set_id = 0; + pstr_enc_loudness_info_set->str_loudness_info[n].downmix_id = 0; + pstr_enc_loudness_info_set->str_loudness_info[n].sample_peak_level_present = 1; + pstr_enc_loudness_info_set->str_loudness_info[n].sample_peak_level = + pstr_input_config->sample_peak_level; + pstr_enc_loudness_info_set->str_loudness_info[n].true_peak_level_present = 0; + pstr_enc_loudness_info_set->str_loudness_info[n].measurement_count = 1; + pstr_enc_loudness_info_set->str_loudness_info[n].measurement_count = + MIN(pstr_enc_loudness_info_set->str_loudness_info[n].measurement_count, + MAX_MEASUREMENT_COUNT); + + for (m = 0; m < pstr_enc_loudness_info_set->str_loudness_info[n].measurement_count; m++) { + pstr_enc_loudness_info_set->str_loudness_info[n] + .str_loudness_measure[m] + .method_definition = pstr_input_config->method_def; + pstr_enc_loudness_info_set->str_loudness_info[n].str_loudness_measure[m].method_value = + (FLOAT32)pstr_input_config->measured_loudness; + pstr_enc_loudness_info_set->str_loudness_info[n] + .str_loudness_measure[m] + .measurement_system = pstr_input_config->measurement_system; + pstr_enc_loudness_info_set->str_loudness_info[n].str_loudness_measure[m].reliability = 3; + } + } + } +} + IA_ERRORCODE ixheaace_allocate(pVOID pv_input, pVOID pv_output) { IA_ERRORCODE err_code = IA_NO_ERROR; WORD32 ui_api_size; @@ -3326,6 +3597,24 @@ IA_ERRORCODE ixheaace_allocate(pVOID pv_input, pVOID pv_output) { pstr_api_struct = (ixheaace_api_struct *)pstr_output_config->pv_ia_process_api_obj; memset(pstr_api_struct, 0, sizeof(*pstr_api_struct)); + + if (pstr_input_config->aot == AOT_USAC) { + if (pstr_input_config->use_drc_element == 0) { + pstr_input_config->use_measured_loudness = 1; + } else { + pstr_input_config->use_measured_loudness = 0; + } + ixheaace_get_measured_loudness_info(pstr_api_struct, pstr_input_config); + + if (!pstr_input_config->use_measured_loudness) + ixheaace_config_drc_parameters(pstr_api_struct, pstr_input_config); + + if (pstr_input_config->use_measured_loudness) { + memcpy(pstr_input_config->pv_drc_cfg, &pstr_api_struct->config[0].usac_config.str_drc_cfg, + sizeof(ia_drc_input_config)); + } + } + ixheaace_set_default_config(pstr_api_struct, pstr_input_config); err_code = ixheaace_set_config_params(pstr_api_struct, pstr_input_config); @@ -3361,6 +3650,12 @@ IA_ERRORCODE ixheaace_allocate(pVOID pv_input, pVOID pv_output) { pstr_output_config->malloc_count++; + if (pstr_input_config->aot == AOT_USAC) { + if (pstr_input_config->use_measured_loudness) { + pstr_api_struct->config[0].usac_config.use_measured_loudness = 1; + } + } + ixheaace_fill_mem_tabs(pstr_api_struct, pstr_input_config->aot); err_code = ixheaace_alloc_and_assign_mem(pstr_api_struct, pstr_output_config); @@ -3459,9 +3754,7 @@ IA_ERRORCODE ixheaace_init(pVOID pstr_obj_ixheaace, pVOID pv_input, pVOID pv_out ixheaace_get_audiospecific_config_bytes(pstr_ia_asc_bit_buf, &pstr_api_struct->pstr_state->audio_specific_config, - pstr_api_struct->pstr_state->aot - - , + pstr_api_struct->pstr_state->aot, pstr_input_config->ccfl_idx); pstr_api_struct->pstr_state->i_out_bytes = (pstr_ia_asc_bit_buf->cnt_bits + 7) >> 3; @@ -3595,12 +3888,43 @@ IA_ERRORCODE ixheaace_process(pVOID pstr_obj_ixheaace, pVOID pv_input, pVOID pv_ error = IA_EXHEAACE_EXE_NONFATAL_QUANTIZATION_INSUFFICIENT_BITRES; } } else { - error = iusace_process(pstr_api_struct); - if ((error == IA_NO_ERROR) && (pstr_api_struct->pstr_state->is_quant_spec_zero)) { - error = IA_EXHEAACE_EXE_NONFATAL_USAC_QUANTIZATION_SPECTRUM_ZERO; + ia_usac_encoder_config_struct *usac_config = &pstr_api_struct->config[0].usac_config; + if (usac_config->iframes_interval <= usac_config->num_preroll_frames) { + pstr_api_struct->pstr_state->str_usac_enc_data.usac_independency_flag = 1; + if (usac_config->iframes_interval == usac_config->num_preroll_frames && + usac_config->is_first_frame == 0) { + usac_config->is_ipf = 1; + } + } else { + pstr_api_struct->pstr_state->str_usac_enc_data.usac_independency_flag = 0; } - if ((error == IA_NO_ERROR) && (pstr_api_struct->pstr_state->is_gain_limited)) { - error = IA_EXHEAACE_EXE_NONFATAL_USAC_QUANTIZATION_INSUFFICIENT_BITRES; + if (pstr_api_struct->pstr_state->str_usac_enc_data.frame_count > + usac_config->num_preroll_frames) { + if (usac_config->iframes_interval <= usac_config->num_preroll_frames) { + pstr_api_struct->pstr_state->str_usac_enc_data.usac_independency_flag = 1; + } else { + pstr_api_struct->pstr_state->str_usac_enc_data.usac_independency_flag = 0; + } + } + + { + error = iusace_process(pstr_api_struct); + if (error & IA_FATAL_ERROR) { + pstr_output_config->i_out_bytes = 0; + return error; + } + if ((error == IA_NO_ERROR) && (pstr_api_struct->pstr_state->is_quant_spec_zero)) { + error = IA_EXHEAACE_EXE_NONFATAL_USAC_QUANTIZATION_SPECTRUM_ZERO; + } + if ((error == IA_NO_ERROR) && (pstr_api_struct->pstr_state->is_gain_limited)) { + error = IA_EXHEAACE_EXE_NONFATAL_USAC_QUANTIZATION_INSUFFICIENT_BITRES; + } + } + + usac_config->iframes_interval++; + if (usac_config->iframes_interval == + (usac_config->random_access_interval - usac_config->num_preroll_frames)) { + usac_config->iframes_interval = 0; } } pstr_output_config->i_out_bytes = pstr_api_struct->pstr_state->i_out_bytes; diff --git a/encoder/ixheaace_api.h b/encoder/ixheaace_api.h index 3bd594a..3c99db2 100644 --- a/encoder/ixheaace_api.h +++ b/encoder/ixheaace_api.h @@ -108,6 +108,13 @@ typedef struct { WORD32 hq_esbr; FLAG write_program_config_element; ixheaace_aac_enc_config aac_config; + WORD32 random_access_interval; + UWORD32 method_def; + FLAG use_measured_loudness; + FLOAT64 measured_loudness; + UWORD32 measurement_system; + FLOAT32 sample_peak_level; + UWORD16 stream_id; } ixheaace_input_config; typedef struct { diff --git a/encoder/ixheaace_asc_write.c b/encoder/ixheaace_asc_write.c index 436ef57..3bf2736 100644 --- a/encoder/ixheaace_asc_write.c +++ b/encoder/ixheaace_asc_write.c @@ -89,6 +89,10 @@ static WORD32 iusace_config_extension(ia_bit_buf_struct *pstr_it_bit_buff, pstr_usac_config->usac_config_ext_buf[j][i], 8); } break; + case ID_CONFIG_EXT_STREAM_ID: + bit_cnt += + iusace_write_bits_buf(pstr_it_bit_buff, pstr_usac_config->stream_identifier, 16); + break; default: for (i = 0; i < pstr_usac_config->usac_config_ext_len[j]; i++) { bit_cnt += iusace_write_bits_buf( @@ -610,3 +614,88 @@ WORD32 ixheaace_get_audiospecific_config_bytes( } return bit_cnt; } + +WORD32 ixheaace_get_usac_config_bytes( + ia_bit_buf_struct *pstr_it_bit_buff, + ixheaace_audio_specific_config_struct *pstr_audio_specific_config, WORD32 ccfl_idx) { + WORD32 sbr_ratio_idx; + ia_usac_config_struct *pstr_usac_config = &(pstr_audio_specific_config->str_usac_config); + WORD32 ia_ccfl_tbl[5] = {768, 1024, 768, 1024, 1024}; + WORD32 i, tmp, bit_cnt = 0; + pstr_audio_specific_config->core_sbr_framelength_index = + ccfl_idx; // 768 core coder frame length without SBR + pstr_usac_config->ccfl = ia_ccfl_tbl[pstr_audio_specific_config->core_sbr_framelength_index]; + tmp = 0x1f; + for (i = 0; i < sizeof(ia_usac_sampl_freq_table) / sizeof(ia_usac_sampl_freq_table[0]); i++) { + if (ia_usac_sampl_freq_table[i] == pstr_audio_specific_config->sampling_frequency) { + tmp = i; + break; + } + } + pstr_audio_specific_config->samp_freq_index = (UWORD32)tmp; + + if (pstr_audio_specific_config->samp_freq_index == 0x1f) { + bit_cnt += iusace_write_bits_buf(pstr_it_bit_buff, 0x1f, 5); + bit_cnt += iusace_write_bits_buf(pstr_it_bit_buff, + (pstr_audio_specific_config->sampling_frequency), 24); + } else { + bit_cnt += + iusace_write_bits_buf(pstr_it_bit_buff, (pstr_audio_specific_config->samp_freq_index), 5); + } + + bit_cnt += iusace_write_bits_buf(pstr_it_bit_buff, + (pstr_audio_specific_config->core_sbr_framelength_index), 3); + + bit_cnt += iusace_write_bits_buf(pstr_it_bit_buff, + (pstr_audio_specific_config->channel_configuration), 5); + + if (pstr_audio_specific_config->channel_configuration == 0) { + bit_cnt += iusace_write_escape_value( + pstr_it_bit_buff, pstr_audio_specific_config->num_audio_channels, 5, 8, 16); + + for (i = 0; i < pstr_audio_specific_config->num_audio_channels; i++) { + tmp = pstr_audio_specific_config->output_channel_pos[i]; + bit_cnt += iusace_write_bits_buf(pstr_it_bit_buff, tmp, 5); + } + } + + sbr_ratio_idx = ixheaace_sbr_ratio(pstr_audio_specific_config->core_sbr_framelength_index); + + bit_cnt += iusace_encoder_config(pstr_it_bit_buff, pstr_usac_config, sbr_ratio_idx, + &pstr_audio_specific_config->str_aac_config); + + bit_cnt += iusace_write_bits_buf(pstr_it_bit_buff, pstr_usac_config->usac_cfg_ext_present, 1); + if (pstr_usac_config->usac_cfg_ext_present) { + bit_cnt += iusace_config_extension(pstr_it_bit_buff, pstr_usac_config); + } + + if (sbr_ratio_idx) + pstr_audio_specific_config->sbr_present_flag = 1; + else + pstr_audio_specific_config->sbr_present_flag = 0; + + pstr_audio_specific_config->ext_audio_object_type = 0; + + if (pstr_audio_specific_config->ext_audio_object_type == AOT_SBR) { + pstr_audio_specific_config->ext_sync_word = 0x2b7; + bit_cnt += + iusace_write_bits_buf(pstr_it_bit_buff, (pstr_audio_specific_config->ext_sync_word), 11); + + bit_cnt += iusace_write_bits_buf(pstr_it_bit_buff, + (pstr_audio_specific_config->ext_audio_object_type), 5); + + bit_cnt += iusace_write_bits_buf(pstr_it_bit_buff, + (pstr_audio_specific_config->sbr_present_flag), 1); + + if (pstr_audio_specific_config->sbr_present_flag == 1) { + bit_cnt += iusace_write_bits_buf(pstr_it_bit_buff, + (pstr_audio_specific_config->ext_samp_freq_index), 4); + + if (pstr_audio_specific_config->ext_samp_freq_index == 0xf) { + bit_cnt += iusace_write_bits_buf( + pstr_it_bit_buff, (pstr_audio_specific_config->ext_sampling_frequency), 24); + } + } + } + return bit_cnt; +}
\ No newline at end of file diff --git a/encoder/ixheaace_asc_write.h b/encoder/ixheaace_asc_write.h index 27596c5..32f4eb1 100644 --- a/encoder/ixheaace_asc_write.h +++ b/encoder/ixheaace_asc_write.h @@ -41,4 +41,8 @@ typedef struct { WORD32 ixheaace_get_audiospecific_config_bytes( ia_bit_buf_struct *pstr_it_bit_buff, ixheaace_audio_specific_config_struct *pstr_audio_specific_config, WORD32 aot, - WORD32 ccfl_idx);
\ No newline at end of file + WORD32 ccfl_idx); + +WORD32 ixheaace_get_usac_config_bytes( + ia_bit_buf_struct *pstr_it_bit_buff, + ixheaace_audio_specific_config_struct *pstr_audio_specific_config, WORD32 ccfl_idx);
\ No newline at end of file diff --git a/encoder/ixheaace_config_params.h b/encoder/ixheaace_config_params.h index 600e94e..28cd5b9 100644 --- a/encoder/ixheaace_config_params.h +++ b/encoder/ixheaace_config_params.h @@ -57,3 +57,4 @@ #define USAC_FLAG_NOISE_FILLING_DEFAULT_VALUE (0) #define USAC_BITRATE_DEFAULT_VALUE (96000) #define USAC_DEFAULT_ACELP_FLAG_VALUE (0) +#define USAC_DEFAULT_STREAM_ID_VALUE (0) diff --git a/encoder/ixheaace_error_handler.h b/encoder/ixheaace_error_handler.h index fb0d8af..ac8c226 100644 --- a/encoder/ixheaace_error_handler.h +++ b/encoder/ixheaace_error_handler.h @@ -51,6 +51,7 @@ IA_ERRORCODE ia_error_handler(ia_error_info_struct *p_mod_err_info, WORD8 *pb_co } \ if (pstr_drc_cfg_user) { \ free_global(pstr_drc_cfg_user); \ + pstr_drc_cfg_user = NULL; \ } \ if (ia_stsz_size != NULL) { \ pv_output->free_xheaace(ia_stsz_size); \ diff --git a/encoder/ixheaace_loudness_measurement.c b/encoder/ixheaace_loudness_measurement.c new file mode 100644 index 0000000..dfb01b3 --- /dev/null +++ b/encoder/ixheaace_loudness_measurement.c @@ -0,0 +1,391 @@ +/****************************************************************************** + * * + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <float.h> +#include "ixheaac_type_def.h" +#include "ixheaac_error_standards.h" +#include "ixheaace_error_codes.h" +#include "ixheaace_api.h" +#include "ixheaace_loudness_measurement.h" +#include "iusace_cnst.h" +#include "ixheaac_constants.h" + +FLOAT64 a_coeff_pre_flt[12][3] = { + {0, -1.84460946989011, 0.85584332293064}, /* 96000Hz sample_rate*/ + {0, -1.83091998796233, 0.84414226108785}, /* 88200Hz sample_rate*/ + {0, -1.76738637827624, 0.79175893605869}, /* 64000Hz sample_rate*/ + {0, -1.69065929318241, 0.73248077421585}, /* 48000Hz sample_rate*/ + {0, -1.66365511325602, 0.71259542807323}, /* 44100Hz sample_rate*/ + {0, -1.53904509625064, 0.62696685598156}, /* 32000Hz sample_rate*/ + {0, -1.39023460519282, 0.53683848126040}, /* 24000Hz sample_rate*/ + {0, -1.33830533606613, 0.50824455891360}, /* 22050Hz sample_rate*/ + {0, -1.10153376910699, 0.39491236874986}, /* 16000Hz sample_rate*/ + {0, -0.82398044060334, 0.29429059828526}, /* 12000Hz sample_rate*/ + {0, -0.73075690963163, 0.26764083061798}, /* 11025Hz sample_rate*/ + {0, -0.29338078241492, 0.18687510604541}}; /* 8000Hz sample_rate*/ + +FLOAT64 b_coeff_pre_flt[12][3] = { + {1.55971422897580, -2.92674157825108, 1.37826120231582}, /* 96000Hz sample_rate*/ + {1.55751537557965, -2.90562707992635, 1.36133397747221}, /* 88200Hz sample_rate*/ + {1.54734277602520, -2.80819560855113, 1.28522539030837}, /* 64000Hz sample_rate*/ + {1.53512485958697, -2.69169618940638, 1.19839281085285}, /* 48000Hz sample_rate*/ + {1.53084123005034, -2.65097999515473, 1.16907907992158}, /* 44100Hz sample_rate*/ + {1.51117789956876, -2.46488941336014, 1.04163327352229}, /* 32000Hz sample_rate*/ + {1.48790022096228, -2.24620546814114, 0.90490912324644}, /* 24000Hz sample_rate*/ + {1.47982535097775, -2.17072861285683, 0.86084248472655}, /* 22050Hz sample_rate*/ + {1.44329522349136, -1.83157538126046, 0.68165875741197}, /* 16000Hz sample_rate*/ + {1.40101638596118, -1.44343141964020, 0.51272519136094}, /* 12000Hz sample_rate*/ + {1.38693639705635, -1.31515305817747, 0.46510058210747}, /* 11025Hz sample_rate*/ + {1.32162356892998, -0.72625549131569, 0.29812624601620}}; /* 8000Hz sample_rate*/ + +FLOAT64 a_coeff_RLB_flt[12][3] = { + {0, -1.99501754472472, 0.99502375904092}, /* 96000Hz sample_rate*/ + {0, -1.99457751545034, 0.99458487587805}, /* 88200Hz sample_rate*/ + {0, -1.99253095794890, 0.99254492277827}, /* 64000Hz sample_rate*/ + {0, -1.99004745483398, 0.99007225036621}, /* 48000Hz sample_rate*/ + {0, -1.98916967362979, 0.98919903578704}, /* 44100Hz sample_rate*/ + {0, -1.98508966898868, 0.98514532066955}, /* 32000Hz sample_rate*/ + {0, -1.98014412622893, 0.98024281785928}, /* 24000Hz sample_rate*/ + {0, -1.97839760259012, 0.97851441950325}, /* 22050Hz sample_rate*/ + {0, -1.97028952800443, 0.97051049053584}, /* 16000Hz sample_rate*/ + {0, -1.96048317995201, 0.96087407552357}, /* 12000Hz sample_rate*/ + {0, -1.95712192483092, 0.95758214578578}, /* 11025Hz sample_rate*/ + {0, -1.94101334282922, 0.94188430416850}}; /* 8000Hz sample_rate*/ + +FLOAT64 b_coeff_RLB_flt[12][3] = { + {1.00247575433736, -2.00497008989074, 1.00247575433736}, /* 96000Hz sample_rate*/ + {1.00225631275593, -2.00453006061636, 1.00225631275593}, /* 88200Hz sample_rate*/ + {1.00123633620603, -2.00248350311492, 1.00123633620603}, /* 64000Hz sample_rate*/ + {1.0, -2.0, 1.0}, /* 48000Hz sample_rate*/ + {0.99956006454251, -1.9991201290850, 0.99956006454251}, /* 44100Hz sample_rate*/ + {0.99751647782627, -1.9950329556525, 0.99751647782627}, /* 32000Hz sample_rate*/ + {0.99508528374654, -1.99009667139495, 0.99508528374654}, /* 24000Hz sample_rate*/ + {0.99422108456853, -1.98835014775614, 0.99422108456853}, /* 22050Hz sample_rate*/ + {0.99021912008482, -1.98024207317045, 0.99021912008482}, /* 16000Hz sample_rate*/ + {0.98540091257869, -1.97043572511803, 0.98540091257869}, /* 12000Hz sample_rate*/ + {0.98375494770979, -1.96707446999694, 0.98375494770979}, /* 11025Hz sample_rate*/ + {0.97590602690115, -1.95096588799524, 0.97590602690115}}; /* 8000Hz sample_rate*/ + +static WORD32 ixheaace_map_sample_rate(WORD32 sample_rate, + ixheaace_loudness_struct *pstr_loudness_hdl) { + WORD32 mapped_sample_rate = sample_rate; + + if ((mapped_sample_rate >= 0) && (mapped_sample_rate < 9391)) { + mapped_sample_rate = 8000; + pstr_loudness_hdl->sample_rate_idx = 11; + } else if ((mapped_sample_rate >= 9391) && (mapped_sample_rate < 11502)) { + mapped_sample_rate = 11025; + pstr_loudness_hdl->sample_rate_idx = 10; + } else if ((mapped_sample_rate >= 11502) && (mapped_sample_rate < 13856)) { + mapped_sample_rate = 12000; + pstr_loudness_hdl->sample_rate_idx = 9; + } else if ((mapped_sample_rate >= 13856) && (mapped_sample_rate < 18783)) { + mapped_sample_rate = 16000; + pstr_loudness_hdl->sample_rate_idx = 8; + } else if ((mapped_sample_rate >= 18783) && (mapped_sample_rate < 23004)) { + mapped_sample_rate = 22050; + pstr_loudness_hdl->sample_rate_idx = 7; + } else if ((mapped_sample_rate >= 23004) && (mapped_sample_rate < 27713)) { + mapped_sample_rate = 24000; + pstr_loudness_hdl->sample_rate_idx = 6; + } else if ((mapped_sample_rate >= 27713) && (mapped_sample_rate < 37566)) { + mapped_sample_rate = 32000; + pstr_loudness_hdl->sample_rate_idx = 5; + } else if ((mapped_sample_rate >= 37566) && (mapped_sample_rate < 46009)) { + mapped_sample_rate = 44100; + pstr_loudness_hdl->sample_rate_idx = 4; + } else if ((mapped_sample_rate >= 46009) && (mapped_sample_rate < 55426)) { + mapped_sample_rate = 48000; + pstr_loudness_hdl->sample_rate_idx = 3; + } else if ((mapped_sample_rate >= 55426) && (mapped_sample_rate < 75132)) { + mapped_sample_rate = 64000; + pstr_loudness_hdl->sample_rate_idx = 2; + } else if ((mapped_sample_rate >= 75132) && (mapped_sample_rate < 92017)) { + mapped_sample_rate = 88200; + pstr_loudness_hdl->sample_rate_idx = 1; + } else if (mapped_sample_rate >= 92017) { + mapped_sample_rate = 96000; + pstr_loudness_hdl->sample_rate_idx = 0; + } else { + mapped_sample_rate = 48000; + pstr_loudness_hdl->sample_rate_idx = 3; + } + return mapped_sample_rate; +} + +WORD32 ixheaace_loudness_info_get_handle_size() { + return IXHEAAC_GET_SIZE_ALIGNED(sizeof(ixheaace_loudness_struct), BYTE_ALIGN_8); +} +IA_ERRORCODE ixheaace_loudness_init_params(pVOID loudness_handle, + ixheaace_input_config *pstr_input_config, + ixheaace_output_config *pstr_output_config) { + UWORD32 count = 0; + UWORD8 temp_count = 0; + IA_ERRORCODE err_code = IA_NO_ERROR; + ixheaace_loudness_struct *pstr_loudness_hdl = (ixheaace_loudness_struct *)loudness_handle; + memset(pstr_loudness_hdl, 0, sizeof(ixheaace_loudness_struct)); + pstr_loudness_hdl->sample_rate = + ixheaace_map_sample_rate(pstr_input_config->i_samp_freq, pstr_loudness_hdl); + + pstr_output_config->samp_freq = pstr_loudness_hdl->sample_rate; + pstr_loudness_hdl->length = pstr_input_config->aac_config.length; + pstr_loudness_hdl->pcm_sz = pstr_input_config->ui_pcm_wd_sz; + if (pstr_loudness_hdl->pcm_sz != 16) { + return (IA_EXHEAACE_CONFIG_FATAL_PCM_WDSZ); + } + pstr_loudness_hdl->n_channels = pstr_input_config->i_channels; + if (pstr_loudness_hdl->n_channels > 2 || pstr_loudness_hdl->n_channels < 1) { + return (IA_EXHEAACE_CONFIG_FATAL_NUM_CHANNELS); + } + pstr_loudness_hdl->num_samples_per_ch = (pstr_loudness_hdl->sample_rate / 10); + pstr_loudness_hdl->sum_square = 0; + pstr_loudness_hdl->mom_loudness_first_time_flag = 1; + + pstr_loudness_hdl->count_fn_call_mmstl = 0; + pstr_loudness_hdl->sl_first_time_flag = 1; + pstr_loudness_hdl->local_sl_count = 0; + pstr_loudness_hdl->short_term_loudness_overlap = IXHEAACE_SL_OVERLAP; + + pstr_loudness_hdl->no_of_mf = IXHEAACE_SEC_TO_100MS_FACTOR; + pstr_loudness_hdl->no_of_mf -= IXHEAACE_MOMENTARY_LOUDNESS_OVERLAP; + pstr_loudness_hdl->no_of_stf = + (((pstr_loudness_hdl->no_of_mf + IXHEAACE_MOMENTARY_LOUDNESS_OVERLAP) - 30) / + (30 - pstr_loudness_hdl->short_term_loudness_overlap)) + + 1; + + pstr_loudness_hdl->tot_int_val_stf_passing_abs_gate = 0; + pstr_loudness_hdl->curr_stf_no = 0; + pstr_loudness_hdl->loop_curr_stf_no = 0; + pstr_loudness_hdl->no_of_stf_passing_abs_gate = 0; + pstr_loudness_hdl->max_lra_count = pstr_loudness_hdl->no_of_stf; + pstr_loudness_hdl->loop_ml_count_fn_call = 0; + pstr_loudness_hdl->ml_count_fn_call = 0; + pstr_loudness_hdl->max_il_buf_size = pstr_loudness_hdl->no_of_mf; + pstr_loudness_hdl->get_intergrated_loudness = 1; + pstr_loudness_hdl->max_sample_val = FLT_EPSILON; + + for (count = 0; count < pstr_loudness_hdl->n_channels; count++) { + for (temp_count = 0; temp_count < IXHEAACE_LOUDNESS_NUM_TAPS; temp_count++) { + pstr_loudness_hdl->w[0][count][temp_count] = 0; + pstr_loudness_hdl->w[1][count][temp_count] = 0; + } + } + + for (count = 0; count < 4; count++) { + pstr_loudness_hdl->prev_four_sum_square[count] = 0; + } + + for (count = 0; count < 30; count++) { + pstr_loudness_hdl->prev_thirty_sum_square[count] = 0; + } + + count = 0; + while (count < (pstr_loudness_hdl->no_of_stf)) { + pstr_loudness_hdl->temp_stf_instances_loudness[count] = 0; + count++; + } + + count = 0; + while (count < (pstr_loudness_hdl->no_of_stf)) { + pstr_loudness_hdl->stf_instances[count].short_term_loudness = + IXHEAACE_DEFAULT_SHORT_TERM_LOUDENSS; + pstr_loudness_hdl->stf_instances[count].int_val = 0; + pstr_loudness_hdl->stf_instances[count].passes_abs_gate = FALSE; + count++; + } + + count = 0; + while (count < pstr_loudness_hdl->no_of_mf) { + pstr_loudness_hdl->mf_instances[count].momentary_loudness = + IXHEAACE_DEFAULT_MOMENTARY_LOUDENSS; + pstr_loudness_hdl->mf_instances[count].int_val = 0.0; + pstr_loudness_hdl->mf_instances[count].passes_abs_gate = FALSE; + count++; + } + return err_code; +} + +static FLOAT64 ixheaace_loudness_gen_flt(FLOAT64 *a, FLOAT64 *b, FLOAT64 *w, FLOAT64 input) { + FLOAT64 output = 0; + UWORD8 count; + + for (count = 0; count <= IXHEAACE_LOUDNESS_NUM_TAPS - 2; count++) { + w[count] = w[count + 1]; + } + w[IXHEAACE_LOUDNESS_NUM_TAPS - 1] = 0; + + for (count = 1; count <= IXHEAACE_LOUDNESS_NUM_TAPS - 1; count++) { + w[IXHEAACE_LOUDNESS_NUM_TAPS - 1] += a[count] * w[IXHEAACE_LOUDNESS_NUM_TAPS - count - 1]; + } + w[IXHEAACE_LOUDNESS_NUM_TAPS - 1] = input - w[IXHEAACE_LOUDNESS_NUM_TAPS - 1]; + + for (count = 0; count <= IXHEAACE_LOUDNESS_NUM_TAPS - 1; count++) { + output += b[count] * w[IXHEAACE_LOUDNESS_NUM_TAPS - count - 1]; + } + + return output; +} + +static FLOAT64 ixheaace_loudness_k_flt(FLOAT64 input, ixheaace_loudness_struct *pstr_loudness_hdl, + UWORD8 channel_no) { + FLOAT64 temp; + temp = ixheaace_loudness_gen_flt(a_coeff_pre_flt[pstr_loudness_hdl->sample_rate_idx], + b_coeff_pre_flt[pstr_loudness_hdl->sample_rate_idx], + pstr_loudness_hdl->w[IXHEAACE_LOUDNESS_PRE_FLT][channel_no], + input); + + temp = ixheaace_loudness_gen_flt(a_coeff_RLB_flt[pstr_loudness_hdl->sample_rate_idx], + b_coeff_RLB_flt[pstr_loudness_hdl->sample_rate_idx], + pstr_loudness_hdl->w[IXHEAACE_LOUDNESS_RLB_FLT][channel_no], + temp); + + return temp; +} + +static VOID ixheaace_measure_sum_square(WORD16 **input, + ixheaace_loudness_struct *pstr_loudness_hdl) { + FLOAT64 tot_one_channel = 0; + FLOAT64 sum_square = 0; + UWORD32 count = 0; + FLOAT64 temp = 0; + UWORD8 channel_no = 0; + FLOAT64 input_sample; + for (channel_no = 0; channel_no < pstr_loudness_hdl->n_channels; channel_no++) { + tot_one_channel = 0; + for (count = 0; count < pstr_loudness_hdl->num_samples_per_ch; count++) { + input_sample = (FLOAT64)input[channel_no][count] / 32768.0; + pstr_loudness_hdl->max_sample_val = + MAX(fabs(input_sample), pstr_loudness_hdl->max_sample_val); + temp = ixheaace_loudness_k_flt(input_sample, pstr_loudness_hdl, channel_no); + tot_one_channel = tot_one_channel + (temp * temp); + } + sum_square += tot_one_channel; + } + pstr_loudness_hdl->sum_square = sum_square; +} + +static FLOAT64 ixheaace_measure_momentary_loudness(ixheaace_loudness_struct *pstr_loudness_hdl) { + FLOAT64 sum = 0; + FLOAT64 momentary_loudness; + UWORD32 count = 0; + FLOAT64 old_ml_val, db_old_ml_val; + { + for (count = 0; count <= 2; count++) { + pstr_loudness_hdl->prev_four_sum_square[count] = + pstr_loudness_hdl->prev_four_sum_square[count + 1]; + sum += pstr_loudness_hdl->prev_four_sum_square[count]; + } + + pstr_loudness_hdl->prev_four_sum_square[3] = pstr_loudness_hdl->sum_square; + sum += pstr_loudness_hdl->prev_four_sum_square[3]; + + if ((pstr_loudness_hdl->mom_loudness_first_time_flag == 1) && + (pstr_loudness_hdl->count_fn_call_mmstl <= 2)) { + momentary_loudness = IXHEAACE_LOUDNESS_DONT_PASS; + } else { + pstr_loudness_hdl->mom_loudness_first_time_flag = 0; + momentary_loudness = + -0.691 + 10 * log10(sum / ((FLOAT64)(4 * pstr_loudness_hdl->num_samples_per_ch))); + + if (pstr_loudness_hdl->get_intergrated_loudness == 1) { + old_ml_val = + pstr_loudness_hdl->mf_instances[pstr_loudness_hdl->loop_ml_count_fn_call].int_val; + pstr_loudness_hdl->mf_instances[pstr_loudness_hdl->loop_ml_count_fn_call].int_val = + sum / ((FLOAT64)(4 * pstr_loudness_hdl->num_samples_per_ch)); + db_old_ml_val = pstr_loudness_hdl->mf_instances[pstr_loudness_hdl->loop_ml_count_fn_call] + .momentary_loudness; + pstr_loudness_hdl->mf_instances[pstr_loudness_hdl->loop_ml_count_fn_call] + .momentary_loudness = momentary_loudness; + if (pstr_loudness_hdl->mf_instances[pstr_loudness_hdl->loop_ml_count_fn_call] + .momentary_loudness >= IXHEAACE_ABS_GATE) { + if (db_old_ml_val < IXHEAACE_ABS_GATE) { + pstr_loudness_hdl->no_of_mf_passing_abs_gate++; + old_ml_val = 0; + } + + pstr_loudness_hdl->tot_int_val_mf_passing_abs_gate = + pstr_loudness_hdl->tot_int_val_mf_passing_abs_gate + + (pstr_loudness_hdl->mf_instances[pstr_loudness_hdl->loop_ml_count_fn_call].int_val - + old_ml_val); + } else { + if (db_old_ml_val >= IXHEAACE_ABS_GATE) { + pstr_loudness_hdl->no_of_mf_passing_abs_gate--; + pstr_loudness_hdl->tot_int_val_mf_passing_abs_gate = + pstr_loudness_hdl->tot_int_val_mf_passing_abs_gate - old_ml_val; + } + } + + pstr_loudness_hdl->loop_ml_count_fn_call++; + if (pstr_loudness_hdl->ml_count_fn_call < pstr_loudness_hdl->max_il_buf_size) + pstr_loudness_hdl->ml_count_fn_call++; + + pstr_loudness_hdl->loop_ml_count_fn_call = + pstr_loudness_hdl->loop_ml_count_fn_call % pstr_loudness_hdl->max_il_buf_size; + } + } + } + return (momentary_loudness); +} + +FLOAT64 ixheaace_measure_integrated_loudness(pVOID loudness_handle) { + UWORD32 count = 0; + FLOAT64 avg = 0; + FLOAT64 loudness = 0; + ixheaace_loudness_struct *pstr_loudness_hdl = (ixheaace_loudness_struct *)loudness_handle; + pstr_loudness_hdl->no_of_mf_passing_rel_gate = 0; + pstr_loudness_hdl->tot_int_val_mf_passing_rel_gate = 0; + + avg = (pstr_loudness_hdl->tot_int_val_mf_passing_abs_gate / + pstr_loudness_hdl->no_of_mf_passing_abs_gate); + pstr_loudness_hdl->rel_gate = -0.691 + 10 * log10(avg) - 10; + + while (count < pstr_loudness_hdl->ml_count_fn_call) { + if (pstr_loudness_hdl->mf_instances[count].momentary_loudness >= + pstr_loudness_hdl->rel_gate) { + pstr_loudness_hdl->no_of_mf_passing_rel_gate++; + pstr_loudness_hdl->tot_int_val_mf_passing_rel_gate += + pstr_loudness_hdl->mf_instances[count].int_val; + } + count++; + } + + loudness = -0.691 + 10 * log10((pstr_loudness_hdl->tot_int_val_mf_passing_rel_gate / + (FLOAT64)pstr_loudness_hdl->no_of_mf_passing_rel_gate)); + + return loudness; +} + +FLOAT64 ixheaace_measure_loudness(pVOID loudness_handle, WORD16 **samples) { + FLOAT64 loudness_value; + ixheaace_loudness_struct *pstr_loudness_hdl = (ixheaace_loudness_struct *)loudness_handle; + ixheaace_measure_sum_square(samples, pstr_loudness_hdl); + loudness_value = ixheaace_measure_momentary_loudness(pstr_loudness_hdl); + pstr_loudness_hdl->count_fn_call_mmstl++; + return loudness_value; +} + +FLOAT32 ixheaace_measure_sample_peak_value(pVOID loudness_handle) { + FLOAT32 sample_peak_value; + ixheaace_loudness_struct *pstr_loudness_hdl = (ixheaace_loudness_struct *)loudness_handle; + sample_peak_value = (FLOAT32)(20 * log10(pstr_loudness_hdl->max_sample_val)); + return sample_peak_value; +}
\ No newline at end of file diff --git a/encoder/ixheaace_loudness_measurement.h b/encoder/ixheaace_loudness_measurement.h new file mode 100644 index 0000000..3856d99 --- /dev/null +++ b/encoder/ixheaace_loudness_measurement.h @@ -0,0 +1,98 @@ +/****************************************************************************** + * * + * Copyright (C) 2024 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + ***************************************************************************** + * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore + */ + +#pragma once + +#define IXHEAACE_LOUDNESS_NUM_TAPS (3) +#define IXHEAACE_LOUDNESS_PRE_FLT (0) +#define IXHEAACE_LOUDNESS_RLB_FLT (1) +#define IXHEAACE_LOUDNESS_DONT_PASS (-1) +#define IXHEAACE_ABS_GATE (-70) +#define IXHEAACE_MOMENTARY_LOUDNESS_OVERLAP (3) +#define IXHEAACE_SL_OVERLAP (20) +#define BYTE_ALIGN_8 (8) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) +#define IXHEAACE_DEFAULT_SHORT_TERM_LOUDENSS (-1000) +#define IXHEAACE_DEFAULT_MOMENTARY_LOUDENSS (-1000) +#define IXHEAACE_SEC_TO_100MS_FACTOR (60 * 10) + +typedef struct { + BOOL passes_abs_gate; + FLOAT64 short_term_loudness; + FLOAT64 int_val; +} short_term_frame_t; + +typedef struct { + BOOL passes_abs_gate; + FLOAT64 momentary_loudness; + FLOAT64 int_val; +} momentary_frame_t; +typedef struct { + UWORD32 num_samples_per_ch; + UWORD32 n_channels; + UWORD32 length; + UWORD32 sample_rate; + UWORD32 pcm_sz; + FLOAT64 sum_square; + FLOAT64 prev_four_sum_square[4]; + FLOAT64 w[2][2][4]; + UWORD32 count_fn_call_mmstl; + UWORD32 mom_loudness_first_time_flag; + FLOAT64 average_loudness_val; + FLOAT64 prev_thirty_sum_square[30]; + WORD32 sl_first_time_flag; + WORD32 local_sl_count; + UWORD32 short_term_loudness_overlap; + short_term_frame_t stf_instances[100]; + UWORD32 no_of_stf; + UWORD32 curr_stf_no; + UWORD32 loop_curr_stf_no; + UWORD32 no_of_stf_passing_abs_gate; + FLOAT64 tot_int_val_stf_passing_abs_gate; + FLOAT64 temp_stf_instances_loudness[100]; + BOOL get_LRA; + UWORD32 max_lra_count; + UWORD32 no_of_mf; + + UWORD32 no_of_mf_passing_abs_gate; + FLOAT64 tot_int_val_mf_passing_abs_gate; + UWORD32 no_of_mf_passing_rel_gate; + FLOAT64 tot_int_val_mf_passing_rel_gate; + FLOAT64 rel_gate; + UWORD32 ml_count_fn_call; + UWORD32 loop_ml_count_fn_call; + momentary_frame_t mf_instances[1000]; + WORD32 get_intergrated_loudness; + UWORD32 max_il_buf_size; + FLOAT64 max_sample_val; + WORD32 sample_rate_idx; +} ixheaace_loudness_struct; + +IA_ERRORCODE ixheaace_loudness_init_params(pVOID loudness_handle, + ixheaace_input_config *pstr_input_config, + ixheaace_output_config *pstr_output_config); + +FLOAT64 ixheaace_measure_loudness(pVOID loudness_handle, WORD16 **samples); + +FLOAT64 ixheaace_measure_integrated_loudness(pVOID loudness_handle); + +WORD32 ixheaace_loudness_info_get_handle_size(); + +FLOAT32 ixheaace_measure_sample_peak_value(pVOID loudness_handle);
\ No newline at end of file diff --git a/encoder/ixheaace_mps_bitstream.c b/encoder/ixheaace_mps_bitstream.c index 069c5da..1be1fd8 100644 --- a/encoder/ixheaace_mps_bitstream.c +++ b/encoder/ixheaace_mps_bitstream.c @@ -686,7 +686,7 @@ IA_ERRORCODE ixheaace_mps_212_write_spatial_frame( ixheaace_mps_212_write_smg_data(pstr_bit_buf, &pstr_spatial_frame->smg_data, num_param_sets, pstr_specific_config->num_bands); } - ixheaace_byte_align_buffer(pstr_bit_buf); + *ptr_output_bits = ia_enhaacplus_enc_get_bits_available(pstr_bit_buf); if ((*ptr_output_bits) > (output_buffer_size * 8)) { return IA_EXHEAACE_CONFIG_NONFATAL_MPS_INVALID_CONFIG; diff --git a/encoder/ixheaace_mps_enc.c b/encoder/ixheaace_mps_enc.c index 671825a..dec61dc 100644 --- a/encoder/ixheaace_mps_enc.c +++ b/encoder/ixheaace_mps_enc.c @@ -416,17 +416,19 @@ static IA_ERRORCODE ixheaace_mps_212_encode( } pstr_frame_data = &pstr_space_enc->pstr_bitstream_formatter->frame; - if (pstr_space_enc->num_discard_out_frames > 0) { - pstr_space_enc->independency_count = 0; - pstr_space_enc->independency_flag = 1; - } else { - pstr_space_enc->independency_flag = (pstr_space_enc->independency_count == 0) ? 1 : 0; - if (pstr_space_enc->independency_factor > 0) { - pstr_space_enc->independency_count++; - pstr_space_enc->independency_count = - pstr_space_enc->independency_count % ((WORD32)pstr_space_enc->independency_factor); + if (aot != AOT_USAC) { + if (pstr_space_enc->num_discard_out_frames > 0) { + pstr_space_enc->independency_count = 0; + pstr_space_enc->independency_flag = 1; } else { - pstr_space_enc->independency_count = -1; + pstr_space_enc->independency_flag = (pstr_space_enc->independency_count == 0) ? 1 : 0; + if (pstr_space_enc->independency_factor > 0) { + pstr_space_enc->independency_count++; + pstr_space_enc->independency_count = + pstr_space_enc->independency_count % ((WORD32)pstr_space_enc->independency_factor); + } else { + pstr_space_enc->independency_count = -1; + } } } diff --git a/encoder/ixheaace_sbr_frame_info_gen.c b/encoder/ixheaace_sbr_frame_info_gen.c index 1265470..2460728 100644 --- a/encoder/ixheaace_sbr_frame_info_gen.c +++ b/encoder/ixheaace_sbr_frame_info_gen.c @@ -945,9 +945,7 @@ ixheaace_frame_info_generator(ixheaace_pstr_sbr_env_frame pstr_sbr_env_frame, ixheaace_freq_res *ptr_tuning_freq = (ixheaace_freq_res *)(ptr_v_tuning + 3); ixheaace_freq_res freq_res_fix = pstr_sbr_env_frame->freq_res_fix; - if (pstr_sbr_env_frame->use_low_freq_res == 1) { - freq_res_fix = FREQ_RES_LOW; - } + if (is_ld_sbr) { if ((!tran_flag && ptr_v_pre_transient_info[1]) && (num_time_slots - ptr_v_pre_transient_info[0] < 4)) { diff --git a/encoder/ixheaace_sbr_main.c b/encoder/ixheaace_sbr_main.c index 8182365..7836985 100644 --- a/encoder/ixheaace_sbr_main.c +++ b/encoder/ixheaace_sbr_main.c @@ -81,7 +81,11 @@ VOID ixheaace_set_usac_sbr_params(ixheaace_pstr_sbr_enc pstr_env_enc, WORD32 usa pstr_env_enc->str_sbr_bs.usac_indep_flag = usac_indep_flag; pstr_env_enc->str_sbr_hdr.sbr_pre_proc = sbr_pre_proc; pstr_env_enc->str_sbr_hdr.sbr_pvc_active = sbr_pvc_active; - pstr_env_enc->str_sbr_hdr.sbr_pvc_mode = sbr_pvc_mode; + if (pstr_env_enc->str_sbr_cfg.num_ch == 2) { + pstr_env_enc->str_sbr_hdr.sbr_pvc_mode = 0; + } else { + pstr_env_enc->str_sbr_hdr.sbr_pvc_mode = sbr_pvc_mode; + } pstr_env_enc->str_sbr_hdr.sbr_inter_tes_active = inter_tes_active; pstr_env_enc->str_sbr_hdr.sbr_harmonic = sbr_harmonic; for (ch = 0; ch < pstr_env_enc->str_sbr_cfg.num_ch; ch++) { @@ -188,7 +192,11 @@ static IA_ERRORCODE ixheaace_create_env_channel( e = ixheaac_shl32(1, params->e); - pstr_env->enc_env_data.freq_res_fix = FREQ_RES_HIGH; + if (params->use_low_freq_res == 1) { + pstr_env->enc_env_data.freq_res_fix = FREQ_RES_LOW; + } else { + pstr_env->enc_env_data.freq_res_fix = FREQ_RES_HIGH; + } pstr_env->enc_env_data.sbr_xpos_mode = (ixheaace_sbr_xpos_mode)params->sbr_xpos_mode; pstr_env->enc_env_data.sbr_xpos_ctrl = params->sbr_xpos_ctrl; @@ -697,8 +705,10 @@ ixheaace_env_encode_frame(ixheaace_pstr_sbr_enc pstr_env_encoder, FLOAT32 *ptr_s for (WORD32 k = 0; k < num_bytes; k++) { ixheaace_write_bits(&pstr_env_encoder->str_cmon_data.str_sbr_bit_buf, *ptr_mps_data++, 8); } - ixheaace_write_bits(&pstr_env_encoder->str_cmon_data.str_sbr_bit_buf, *ptr_mps_data++, - mps_bits & 0x7); + if (mps_bits & 0x7) { + ixheaace_write_bits(&pstr_env_encoder->str_cmon_data.str_sbr_bit_buf, + (*ptr_mps_data++) >> (8 - (mps_bits & 0x7)), mps_bits & 0x7); + } } ixheaace_assemble_sbr_bitstream(&pstr_env_encoder->str_cmon_data, diff --git a/encoder/ixheaace_sbr_write_bitstream.c b/encoder/ixheaace_sbr_write_bitstream.c index 1fa53f0..1afe4d1 100644 --- a/encoder/ixheaace_sbr_write_bitstream.c +++ b/encoder/ixheaace_sbr_write_bitstream.c @@ -561,13 +561,13 @@ static WORD32 ixheaace_write_synthetic_coding_data(ixheaace_pstr_sbr_env_data ps payload_cnt_bits += ixheaace_write_bits(pstr_bs_handle, pstr_sbr_env_info->add_harmonic[i], 1); } - } - if (USAC_SBR == sbr_codec && 0 != sbr_pvc_mode) { - if (pstr_sbr_env_info->sbr_sinusoidal_pos_flag) { - payload_cnt_bits += ixheaace_write_bits(pstr_bs_handle, 1, 1); - payload_cnt_bits += ixheaace_write_bits(pstr_bs_handle, 31, 5); - } else { - payload_cnt_bits += ixheaace_write_bits(pstr_bs_handle, 0, 1); + if (USAC_SBR == sbr_codec && 0 != sbr_pvc_mode) { + if (pstr_sbr_env_info->sbr_sinusoidal_pos_flag) { + payload_cnt_bits += ixheaace_write_bits(pstr_bs_handle, 1, 1); + payload_cnt_bits += ixheaace_write_bits(pstr_bs_handle, 31, 5); + } else { + payload_cnt_bits += ixheaace_write_bits(pstr_bs_handle, 0, 1); + } } } return payload_cnt_bits; diff --git a/encoder/ixheaace_struct_def.h b/encoder/ixheaace_struct_def.h index 4f0c3c7..cbf4a0b 100644 --- a/encoder/ixheaace_struct_def.h +++ b/encoder/ixheaace_struct_def.h @@ -106,6 +106,8 @@ typedef struct ixheaace_state_struct { ixheaace_mps_515_memory_struct *mps_515_pers_mem; WORD32 is_quant_spec_zero; WORD32 is_gain_limited; + WORD32 i_out_bits; + VOID *loudness_handle; } ixheaace_state_struct; typedef struct ixheaace_api_struct { diff --git a/encoder/libxaacenc.cmake b/encoder/libxaacenc.cmake index d2a7193..3a3772a 100644 --- a/encoder/libxaacenc.cmake +++ b/encoder/libxaacenc.cmake @@ -59,6 +59,7 @@ list( "${XAAC_ROOT}/encoder/ixheaace_hybrid.c" "${XAAC_ROOT}/encoder/ixheaace_hybrid_init.c" "${XAAC_ROOT}/encoder/ixheaace_interface.c" + "${XAAC_ROOT}/encoder/ixheaace_loudness_measurement.c" "${XAAC_ROOT}/encoder/ixheaace_mdct_480.c" "${XAAC_ROOT}/encoder/ixheaace_mps_bitstream.c" "${XAAC_ROOT}/encoder/ixheaace_mps_dct.c" diff --git a/fuzzer/xaac_enc_fuzzer.cpp b/fuzzer/xaac_enc_fuzzer.cpp index 1952e97..5148025 100644 --- a/fuzzer/xaac_enc_fuzzer.cpp +++ b/fuzzer/xaac_enc_fuzzer.cpp @@ -32,6 +32,7 @@ extern "C" { #include "impd_drc_uni_drc.h" #include "impd_drc_api.h" #include "ixheaace_api.h" +#include "ixheaace_loudness_measurement.h" } static constexpr WORD32 k_sample_rates[] = {7350, 8000, 11025, 12000, 16000, 22050, 24000, @@ -341,7 +342,11 @@ static VOID ixheaace_fuzzer_flag(ixheaace_input_config *pstr_in_cfg, pstr_in_cfg->use_drc_element = fuzzed_data->ConsumeBool(); pstr_in_cfg->inter_tes_active = fuzzed_data->ConsumeBool(); pstr_in_cfg->codec_mode = fuzzed_data->ConsumeIntegral<WORD8>(); - + pstr_in_cfg->random_access_interval = fuzzed_data->ConsumeIntegral<WORD32>(); + pstr_in_cfg->method_def = fuzzed_data->ConsumeIntegral<WORD32>(); + pstr_in_cfg->measurement_system = fuzzed_data->ConsumeIntegral<WORD32>(); + pstr_in_cfg->measured_loudness = fuzzed_data->ConsumeIntegral<WORD32>(); + pstr_in_cfg->stream_id = fuzzed_data->ConsumeIntegral<UWORD16>(); /* DRC */ if (pstr_in_cfg->use_drc_element == 1) { ixheaace_read_drc_config_params(&pstr_drc_cfg->str_enc_params, @@ -351,8 +356,107 @@ static VOID ixheaace_fuzzer_flag(ixheaace_input_config *pstr_in_cfg, } } +IA_ERRORCODE ia_enhaacplus_enc_pcm_data_read(std::vector<WORD8> input_vec, UWORD32 num_samples, + WORD32 num_channels, WORD16 **data) +{ + UWORD32 count = 0; + UWORD8 channel_no; + UWORD32 sample_no = 0; + UWORD32 i = 0; + while (count < num_samples) + { + sample_no = (count / num_channels); + channel_no = (UWORD8)(count%num_channels); + data[channel_no][sample_no] = *input_vec.data(); + i++; + count++; + } + return 0; +} + +static IA_ERRORCODE ixheaace_calculate_loudness_measure(ixheaace_input_config *pstr_in_cfg, + ixheaace_output_config *pstr_out_cfg, FuzzedDataProvider *fuzzed_data) +{ + WORD32 input_size; + WORD32 count = 0; + IA_ERRORCODE err_code = 0; + VOID *loudness_handle = malloc_global(ixheaace_loudness_info_get_handle_size(), + DEFAULT_MEM_ALIGN_8); + if (loudness_handle == NULL) { + printf("fatal error: libxaac encoder: Memory allocation failed"); + return -1; + } + + err_code = ixheaace_loudness_init_params(loudness_handle, pstr_in_cfg, pstr_out_cfg); + + if (err_code) { + free_global(loudness_handle); + return -1; + } + input_size = (pstr_out_cfg->samp_freq / 10)*(pstr_in_cfg->i_channels); + WORD16 **samples = 0; + samples = (WORD16 **)malloc_global(pstr_in_cfg->i_channels * sizeof(*samples), + DEFAULT_MEM_ALIGN_8); + if (samples == NULL) + { + printf("fatal error: libxaac encoder: Memory allocation failed"); + free_global(loudness_handle); + return -1; + } + for (count = 0; count < pstr_in_cfg->i_channels; count++) + { + samples[count] = + (WORD16 *)malloc_global((pstr_out_cfg->samp_freq / 10) * sizeof(samples), + DEFAULT_MEM_ALIGN_8); + if (samples[count] == NULL) + { + printf("fatal error: libxaac encoder: Memory allocation failed"); + while (count) + { + count--; + free_global(samples[count]); + } + free_global(samples); + free_global(loudness_handle); + return -1; + } + } + count = 0; + while (count <= fuzzed_data->remaining_bytes()) + { + std::vector<WORD8> input_vec = fuzzed_data->ConsumeBytes<WORD8>(input_size); + err_code = ia_enhaacplus_enc_pcm_data_read(input_vec, input_size, + pstr_in_cfg->i_channels, samples); + if (input_size > input_vec.size()) { + memset((*samples + input_vec.size()), 0, (input_size - input_vec.size())); + } + if (err_code) { + for (count = 0; count < pstr_in_cfg->i_channels; count++) + { + free_global(samples[count]); + } + free_global(samples); + free_global(loudness_handle); + return -1; + } + pstr_in_cfg->measured_loudness = ixheaace_measure_loudness(loudness_handle, samples); + count += input_size; + } + if (pstr_in_cfg->method_def == METHOD_DEFINITION_PROGRAM_LOUDNESS) { + pstr_in_cfg->measured_loudness = ixheaace_measure_integrated_loudness(loudness_handle); + } + for (count = 0; count < pstr_in_cfg->i_channels; count++) + { + free_global(samples[count]); + } + free_global(samples); + free_global(loudness_handle); + return err_code; +} + extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { FuzzedDataProvider fuzzed_data(data, size); + FuzzedDataProvider fuzzed_data_loudness(data, size); /* Error code */ IA_ERRORCODE err_code = 0; @@ -362,6 +466,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { pWORD8 pb_inp_buf = NULL; WORD32 input_size = 0; + WORD32 read_inp_data = 1; WORD32 num_proc_iterations = 0; /* ******************************************************************/ @@ -383,7 +488,26 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { /* Parse input configuration parameters */ /* ******************************************************************/ ixheaace_fuzzer_flag(pstr_in_cfg, pstr_drc_cfg, &fuzzed_data); - + + /*1st pass -> Loudness Measurement */ + if (pstr_in_cfg->aot == AOT_USAC) + { + err_code = ixheaace_calculate_loudness_measure(pstr_in_cfg, pstr_out_cfg, + &fuzzed_data_loudness); + if (err_code) { + if (pstr_drc_cfg) { + free(pstr_drc_cfg); + pstr_drc_cfg = NULL; + } + /* Fatal error code */ + if (err_code & 0x80000000) { + ixheaace_delete((pVOID)pstr_out_cfg); + return 0; + } + return -1; + } + } + err_code = ixheaace_create((pVOID)pstr_in_cfg, (pVOID)pstr_out_cfg); if (err_code) { if (pstr_drc_cfg) { @@ -410,17 +534,23 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { memset(pb_inp_buf, 0, input_size); while (fuzzed_data.remaining_bytes()) { - if (fuzzed_data.ConsumeBool()) { - std::vector<WORD8> inputVec = fuzzed_data.ConsumeBytes<WORD8>(input_size); - if (inputVec.size()) { - memcpy(pb_inp_buf, inputVec.data(), inputVec.size()); + if (read_inp_data) { + if (fuzzed_data.ConsumeBool()) { + std::vector<WORD8> input_vec = fuzzed_data.ConsumeBytes<WORD8>(input_size); + if (input_vec.size()) { + memcpy(pb_inp_buf, input_vec.data(), input_vec.size()); + } + } else { + memset(pb_inp_buf, fuzzed_data.ConsumeIntegral<WORD8>(), input_size); } - } else { - memset(pb_inp_buf, fuzzed_data.ConsumeIntegral<WORD8>(), input_size); } ixheaace_process(pv_ia_process_api_obj, (pVOID)pstr_in_cfg, (pVOID)pstr_out_cfg); num_proc_iterations++; - + if (pstr_out_cfg->i_out_bytes == 0) { + read_inp_data = 0; + } else { + read_inp_data = 1; + } /* Stop processing after 500 frames */ if (num_proc_iterations > 500) break; diff --git a/test/encoder/impd_drc_config_params.txt b/test/encoder/impd_drc_config_params.txt index cad2fa2..fc2ac26 100644 --- a/test/encoder/impd_drc_config_params.txt +++ b/test/encoder/impd_drc_config_params.txt @@ -169,48 +169,3 @@ attack:2.0 decay:5.0 start_sub_band_index:44 #end gain parameters -#loudness info parameters -loudness_info_count:1 -#n=0 -drc_set_id:1 -downmix_id:1 -sample_peak_level_present:1 -sample_peak_level:0.0 -true_peak_level_present:1 -true_peak_level:1.0 -true_peak_level_measurement_system:2 -true_peak_level_reliability:3 -measurement_count:2 -#m=0 -method_definition:1 -method_value:-10.0 -measurement_system:2 -reliability:3 -#m=1 -method_definition:3 -method_value:-6.0 -measurement_system:1 -reliability:3 -#end loudness info parameters -#loudness info album parameters -loudness_info_album_count:1 -#n=0 -drc_set_id:1 -downmix_id:1 -sample_peak_level_present:0 -#sample_peak_level: -true_peak_level_present:0 -#true_peak_level: -#true_peak_level_measurement_system: -#true_peak_level_reliability: -measurement_count:1 -#m=0 -method_definition:1 -method_value:-10.0 -measurement_system:2 -reliability:3 -#end loudness info album parameters -#####str_downmix_instructions##### -#n=0 -target_layout:0 -downmix_coefficients_present:0 diff --git a/test/encoder/impd_drc_user_config.c b/test/encoder/impd_drc_user_config.c index 92d2bcf..c6363b7 100644 --- a/test/encoder/impd_drc_user_config.c +++ b/test/encoder/impd_drc_user_config.c @@ -232,93 +232,6 @@ VOID ixheaace_read_drc_config_params(FILE *fp, ia_drc_enc_params_struct *pstr_en } } } - - pstr_enc_loudness_info_set->loudness_info_count = impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->loudness_info_count = - MIN(pstr_enc_loudness_info_set->loudness_info_count, MAX_LOUDNESS_INFO_COUNT); - for (n = 0; n < pstr_enc_loudness_info_set->loudness_info_count; n++) { - pstr_enc_loudness_info_set->str_loudness_info[n].drc_set_id = impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info[n].downmix_id = impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info[n].sample_peak_level_present = - impd_drc_get_integer_value(fp); - if (1 == pstr_enc_loudness_info_set->str_loudness_info[n].sample_peak_level_present) { - pstr_enc_loudness_info_set->str_loudness_info[n].sample_peak_level = - impd_drc_get_float_value(fp); - } - pstr_enc_loudness_info_set->str_loudness_info[n].true_peak_level_present = - impd_drc_get_integer_value(fp); - if (1 == pstr_enc_loudness_info_set->str_loudness_info[n].true_peak_level_present) { - pstr_enc_loudness_info_set->str_loudness_info[n].true_peak_level = - impd_drc_get_float_value(fp); - pstr_enc_loudness_info_set->str_loudness_info[n].true_peak_level_measurement_system = - impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info[n].true_peak_level_reliability = - impd_drc_get_integer_value(fp); - } - pstr_enc_loudness_info_set->str_loudness_info[n].measurement_count = - impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info[n].measurement_count = - MIN(pstr_enc_loudness_info_set->str_loudness_info[n].measurement_count, - MAX_MEASUREMENT_COUNT); - - for (m = 0; m < pstr_enc_loudness_info_set->str_loudness_info[n].measurement_count; m++) { - pstr_enc_loudness_info_set->str_loudness_info[n].str_loudness_measure[m].method_definition = - impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info[n].str_loudness_measure[m].method_value = - impd_drc_get_float_value(fp); - pstr_enc_loudness_info_set->str_loudness_info[n] - .str_loudness_measure[m] - .measurement_system = impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info[n].str_loudness_measure[m].reliability = - impd_drc_get_integer_value(fp); - } - } - - pstr_enc_loudness_info_set->loudness_info_album_count = impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->loudness_info_album_count = - MIN(pstr_enc_loudness_info_set->loudness_info_album_count, MAX_LOUDNESS_INFO_COUNT); - for (n = 0; n < pstr_enc_loudness_info_set->loudness_info_album_count; n++) { - pstr_enc_loudness_info_set->str_loudness_info_album[n].drc_set_id = - impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info_album[n].downmix_id = - impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info_album[n].sample_peak_level_present = - impd_drc_get_integer_value(fp); - if (1 == pstr_enc_loudness_info_set->str_loudness_info_album[n].sample_peak_level_present) { - pstr_enc_loudness_info_set->str_loudness_info_album[n].sample_peak_level = - impd_drc_get_float_value(fp); - } - pstr_enc_loudness_info_set->str_loudness_info_album[n].true_peak_level_present = - impd_drc_get_integer_value(fp); - if (1 == pstr_enc_loudness_info_set->str_loudness_info_album[n].true_peak_level_present) { - pstr_enc_loudness_info_set->str_loudness_info_album[n].true_peak_level = - impd_drc_get_float_value(fp); - pstr_enc_loudness_info_set->str_loudness_info_album[n].true_peak_level_measurement_system = - impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info_album[n].true_peak_level_reliability = - impd_drc_get_integer_value(fp); - } - pstr_enc_loudness_info_set->str_loudness_info_album[n].measurement_count = - impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info_album[n].measurement_count = - MIN(pstr_enc_loudness_info_set->str_loudness_info_album[n].measurement_count, - MAX_MEASUREMENT_COUNT); - for (m = 0; m < pstr_enc_loudness_info_set->str_loudness_info_album[n].measurement_count; - m++) { - pstr_enc_loudness_info_set->str_loudness_info_album[n] - .str_loudness_measure[m] - .method_definition = impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info_album[n] - .str_loudness_measure[m] - .method_value = impd_drc_get_float_value(fp); - pstr_enc_loudness_info_set->str_loudness_info_album[n] - .str_loudness_measure[m] - .measurement_system = impd_drc_get_integer_value(fp); - pstr_enc_loudness_info_set->str_loudness_info_album[n].str_loudness_measure[m].reliability = - impd_drc_get_integer_value(fp); - } - } - /*********** str_channel_layout *************/ pstr_uni_drc_config->str_channel_layout.layout_signaling_present = 0; diff --git a/test/encoder/ixheaace_testbench.c b/test/encoder/ixheaace_testbench.c index 94d5891..2dee450 100644 --- a/test/encoder/ixheaace_testbench.c +++ b/test/encoder/ixheaace_testbench.c @@ -33,6 +33,7 @@ #include "ixheaace_api.h" #include "ixheaac_error_standards.h" #include "ixheaace_error_handler.h" +#include "ixheaace_loudness_measurement.h" VOID ia_enhaacplus_enc_error_handler_init(); VOID ia_testbench_error_handler_init(); @@ -70,6 +71,31 @@ int ia_enhaacplus_enc_fread(void *buf, int size, int bytes, FILE *fp) { return (int)fread(buf, size, bytes, fp); } +IA_ERRORCODE ia_enhaacplus_enc_pcm_data_read(FILE *in_file, UWORD32 num_samples, + WORD32 num_channels, WORD16 **data) { + UWORD32 count = 0; + WORD16 temp; + WORD16 *buf = &temp; + UWORD8 channel_no; + UWORD32 sample_no = 0; + + while (count < num_samples) { + sample_no = count / num_channels; + channel_no = (UWORD8)(count % num_channels); + if (fread(buf, sizeof(WORD16), 1, in_file) != 1) { + if (feof(in_file)) { + printf("End of file reached.\n"); + } else { + printf("Error reading a file.\n"); + return -1; + } + } + data[channel_no][sample_no] = temp; + count++; + } + return IA_NO_ERROR; +} + int ia_enhaacplus_enc_fwrite(void *pb_buf, FILE *pf_out, WORD32 i_out_bytes) { fwrite(pb_buf, sizeof(char), i_out_bytes, pf_out); return 1; @@ -201,6 +227,8 @@ void ia_enhaacplus_enc_print_usage() { printf("\n[-esbr_hq:<esbr_hq_flag>]"); printf("\n[-drc:<drc_flag>]"); printf("\n[-inter_tes_enc:<inter_tes_enc_flag>]"); + printf("\n[-rap:<random access interval in ms>]"); + printf("\n[-stream_id:<stream identifier>]"); printf("\n\nwhere, \n <paramfile> is the parameter file with multiple commands"); printf("\n <inputfile> is the input 16-bit WAV or PCM file name"); printf("\n <outputfile> is the output ADTS/ES file name"); @@ -279,7 +307,16 @@ void ia_enhaacplus_enc_print_usage() { "1 (enable DRC encoding). Default is 0."); printf( "\n <inter_tes_enc_flag> Valid values are 0 (disable inter - TES encoding) and " - "1 (enable inter - TES encoding). Default is 0.\n"); + "1 (enable inter - TES encoding). Default is 0."); + printf( + "\n <random access interval in ms> is the time interval between audio preroll frames in " + "ms. It is applicable only for AOT 42." + "\n Valid values are -1 (Audio preroll sent only at beginning of file) and " + "greater than 1000 ms. Default is -1."); + printf( + "\n <stream identifier> is the stream id used to uniquely identify configuration of a " + "stream within a set of associated streams." + "\n It is applicable only for AOT 42. Valid values are 0 to 65535. Default is 0."); exit(1); } @@ -380,6 +417,14 @@ static VOID ixheaace_parse_config_param(WORD32 argc, pWORD8 argv[], pVOID ptr_en pCHAR8 pb_arg_val = (pCHAR8)(argv[i] + 15); pstr_enc_api->input_config.inter_tes_active = atoi(pb_arg_val); } + if (!strncmp((const char *)argv[i], "-rap:", 5)) { + pCHAR8 pb_arg_val = (pCHAR8)(argv[i] + 5); + pstr_enc_api->input_config.random_access_interval = atoi(pb_arg_val); + } + if (!strncmp((const char *)argv[i], "-stream_id:", 11)) { + pCHAR8 pb_arg_val = (pCHAR8)(argv[i] + 11); + pstr_enc_api->input_config.stream_id = atoi(pb_arg_val); + } } return; @@ -921,12 +966,91 @@ static VOID ixheaace_print_config_params(ixheaace_input_config *pstr_input_confi printf("\nDRC : 1"); ixheaace_print_drc_config_params(pstr_input_config, pstr_input_config_user); } + + if (pstr_input_config->random_access_interval != + pstr_input_config_user->random_access_interval) { + printf("\nRandom access interval (Invalid config value, setting to default) : %d", + pstr_input_config->random_access_interval); + } } printf( "\n*************************************************************************************" "***********\n\n"); } + +static IA_ERRORCODE ixheaace_calculate_loudness_measure(ixheaace_input_config *pstr_in_cfg, + ixheaace_output_config *pstr_out_cfg, + FILE *in_file) { + WORD32 temp_pos, input_size; + WORD32 count = 0; + IA_ERRORCODE err_code = IA_NO_ERROR; + temp_pos = ftell(in_file); + VOID *loudness_handle = + malloc_global(ixheaace_loudness_info_get_handle_size(), DEFAULT_MEM_ALIGN_8); + if (loudness_handle == NULL) { + printf("fatal error: libxaac encoder: Memory allocation failed"); + return -1; + } + input_size = (pstr_in_cfg->i_samp_freq / 10) * (pstr_in_cfg->i_channels); + err_code = ixheaace_loudness_init_params(loudness_handle, pstr_in_cfg, pstr_out_cfg); + if (err_code) { + free_global(loudness_handle); + return -1; + } + WORD16 **samples = 0; + samples = + (WORD16 **)malloc_global(pstr_in_cfg->i_channels * sizeof(*samples), DEFAULT_MEM_ALIGN_8); + if (samples == NULL) { + printf("fatal error: libxaac encoder: Memory allocation failed"); + free_global(loudness_handle); + return -1; + } + for (count = 0; count < pstr_in_cfg->i_channels; count++) { + samples[count] = (WORD16 *)malloc_global( + (pstr_out_cfg->samp_freq / 10) * sizeof(*samples[count]), DEFAULT_MEM_ALIGN_8); + if (samples[count] == NULL) { + printf("fatal error: libxaac encoder: Memory allocation failed"); + while (count) { + count--; + free_global(samples[count]); + } + free_global(samples); + free_global(loudness_handle); + return -1; + } + memset(samples[count], 0, (pstr_out_cfg->samp_freq / 10) * sizeof(*samples[count])); + } + count = 0; + WORD32 no_samples_per_frame = (WORD32)(pstr_out_cfg->samp_freq * 0.1 * pstr_in_cfg->i_channels); + while (count <= ((pstr_in_cfg->aac_config.length / 2) - no_samples_per_frame)) { + err_code = + ia_enhaacplus_enc_pcm_data_read(in_file, input_size, pstr_in_cfg->i_channels, samples); + if (err_code) { + printf("fatal error: libxaac encoder: Reading PCM data failed"); + for (count = 0; count < pstr_in_cfg->i_channels; count++) { + free_global(samples[count]); + } + free_global(samples); + free_global(loudness_handle); + return -1; + } + pstr_in_cfg->measured_loudness = ixheaace_measure_loudness(loudness_handle, samples); + count += no_samples_per_frame; + } + if (pstr_in_cfg->method_def == METHOD_DEFINITION_PROGRAM_LOUDNESS) { + pstr_in_cfg->measured_loudness = ixheaace_measure_integrated_loudness(loudness_handle); + pstr_in_cfg->sample_peak_level = ixheaace_measure_sample_peak_value(loudness_handle); + } + fseek(in_file, temp_pos, SEEK_SET); + for (count = 0; count < pstr_in_cfg->i_channels; count++) { + free_global(samples[count]); + } + free_global(samples); + free_global(loudness_handle); + return err_code; +} + IA_ERRORCODE ia_enhaacplus_enc_main_process(ixheaace_app_context *pstr_context, WORD32 argc, pWORD8 argv[]) { LOOPIDX frame_count = 0; @@ -1019,6 +1143,10 @@ IA_ERRORCODE ia_enhaacplus_enc_main_process(ixheaace_app_context *pstr_context, pstr_in_cfg->user_tns_flag = 0; pstr_in_cfg->user_esbr_flag = 0; pstr_in_cfg->i_use_adts = !pstr_context->use_ga_hdr; + pstr_in_cfg->random_access_interval = DEFAULT_RAP_INTERVAL_IN_MS; + pstr_in_cfg->method_def = METHOD_DEFINITION_PROGRAM_LOUDNESS; + pstr_in_cfg->measurement_system = MEASUREMENT_SYSTEM_BS_1770_3; + /* ******************************************************************/ /* Parse input configuration parameters */ /* ******************************************************************/ @@ -1090,6 +1218,19 @@ IA_ERRORCODE ia_enhaacplus_enc_main_process(ixheaace_app_context *pstr_context, pstr_in_cfg->aac_config.length = i_total_length; } + /*1st pass -> Loudness Measurement */ + if (pstr_in_cfg->aot == AOT_USAC) { + err_code = + ixheaace_calculate_loudness_measure(pstr_in_cfg, pstr_out_cfg, pstr_context->pf_inp); + if (err_code) { + printf("\n Error in calculating loudness.\n"); + exit(1); + } else { + printf("\n loudness level : %lf", pstr_in_cfg->measured_loudness); + printf("\n sample_peak_level : %lf \n", pstr_in_cfg->sample_peak_level); + } + } + ixheaace_input_config pstr_in_cfg_user = *pstr_in_cfg; ia_drc_input_config *pstr_drc_cfg_user = NULL; @@ -1175,6 +1316,7 @@ IA_ERRORCODE ia_enhaacplus_enc_main_process(ixheaace_app_context *pstr_context, if (pstr_drc_cfg_user) { free_global(pstr_drc_cfg_user); + pstr_drc_cfg_user = NULL; } start_offset_samples = 0; @@ -1214,7 +1356,6 @@ IA_ERRORCODE ia_enhaacplus_enc_main_process(ixheaace_app_context *pstr_context, /*****************************************************************************/ /* Print frame number */ /*****************************************************************************/ - frame_count++; fprintf(stdout, "Frames Processed [%d]\r", frame_count); fflush(stdout); @@ -1234,20 +1375,16 @@ IA_ERRORCODE ia_enhaacplus_enc_main_process(ixheaace_app_context *pstr_context, i_out_bytes = pstr_out_cfg->i_out_bytes; if (max_frame_size < i_out_bytes) max_frame_size = i_out_bytes; + if (i_out_bytes) { + frame_count++; + ia_stsz_size[frame_count - 1] = pstr_out_cfg->i_out_bytes; - if (pstr_in_cfg->usac_en || pstr_in_cfg->i_use_es || !(pstr_in_cfg->i_use_adts)) { - if (i_out_bytes) - ia_stsz_size[frame_count - 1] = i_out_bytes; - else - frame_count--; - } - - ia_enhaacplus_enc_fwrite(pb_out_buf, pstr_context->pf_out, i_out_bytes); - fflush(pstr_context->pf_out); - - i_bytes_read = ia_enhaacplus_enc_fread((pVOID)pb_inp_buf, sizeof(WORD8), input_size, - pstr_context->pf_inp); + ia_enhaacplus_enc_fwrite(pb_out_buf, pstr_context->pf_out, i_out_bytes); + fflush(pstr_context->pf_out); + i_bytes_read = ia_enhaacplus_enc_fread((pVOID)pb_inp_buf, sizeof(WORD8), input_size, + pstr_context->pf_inp); + } if (frame_count == expected_frame_count) break; } |