FAAC源码分析之faacEncEncode
FAAC编码代码流程图
通用的AAC编码系统框图(偷来的图)
对比可以发现,其实FAAC的编码和一般的AAC编码大致一致,主要包括心理声学模型处理部分和量化处理两部分,还包括一些优化处理的过程。实现的源码+注释:
int FAACAPI faacEncEncode(faacEncHandle hEncoder, int32_t *inputBuffer, unsigned int samplesInput, unsigned char *outputBuffer, unsigned int bufferSize ) { unsigned int channel, i; int sb, frameBytes; unsigned int offset; BitStream *bitStream; /* bitstream used for writing the frame to */ TnsInfo *tnsInfo_for_LTP; TnsInfo *tnsDecInfo; #ifdef DRM int desbits, diff; double fix; #endif /* local copy's of parameters */ ChannelInfo *channelInfo = hEncoder->channelInfo; CoderInfo *coderInfo = hEncoder->coderInfo; unsigned int numChannels = hEncoder->numChannels; unsigned int sampleRate = hEncoder->sampleRate; unsigned int aacObjectType = hEncoder->config.aacObjectType; unsigned int mpegVersion = hEncoder->config.mpegVersion; unsigned int useLfe = hEncoder->config.useLfe; unsigned int useTns = hEncoder->config.useTns; unsigned int allowMidside = hEncoder->config.allowMidside; unsigned int bandWidth = hEncoder->config.bandWidth; unsigned int shortctl = hEncoder->config.shortctl; /* Increase frame number */ hEncoder->frameNum++; if (samplesInput == 0) hEncoder->flushFrame++; /* After 4 flush frames all samples have been encoded, return 0 bytes written */ if (hEncoder->flushFrame > 4) return 0; /* Determine the channel configuration */ GetChannelInfo(channelInfo, numChannels, useLfe); /* Update current sample buffers */ for (channel = 0; channel < numChannels; channel++) { double *tmp; if (hEncoder->sampleBuff[channel]) { for(i = 0; i < FRAME_LEN; i++) { hEncoder->ltpTimeBuff[channel][i] = hEncoder->sampleBuff[channel][i]; } } if (hEncoder->nextSampleBuff[channel]) { for(i = 0; i < FRAME_LEN; i++) { hEncoder->ltpTimeBuff[channel][FRAME_LEN + i] = hEncoder->nextSampleBuff[channel][i]; } } if (!hEncoder->sampleBuff[channel]) hEncoder->sampleBuff[channel] = (double*)AllocMemory(FRAME_LEN*sizeof(double)); tmp = hEncoder->sampleBuff[channel]; hEncoder->sampleBuff[channel] = hEncoder->nextSampleBuff[channel]; hEncoder->nextSampleBuff[channel] = hEncoder->next2SampleBuff[channel]; hEncoder->next2SampleBuff[channel] = hEncoder->next3SampleBuff[channel]; hEncoder->next3SampleBuff[channel] = tmp; if (samplesInput == 0) { /* start flushing*/ for (i = 0; i < FRAME_LEN; i++) hEncoder->next3SampleBuff[channel][i] = 0.0; } else { int samples_per_channel = samplesInput/numChannels; /* handle the various input formats and channel remapping */ switch( hEncoder->config.inputFormat ) { case FAAC_INPUT_16BIT: { short *input_channel = (short*)inputBuffer + hEncoder->config.channel_map[channel]; for (i = 0; i < samples_per_channel; i++) { hEncoder->next3SampleBuff[channel][i] = (double)*input_channel; input_channel += numChannels; } } break; case FAAC_INPUT_32BIT: { int32_t *input_channel = (int32_t*)inputBuffer + hEncoder->config.channel_map[channel]; for (i = 0; i < samples_per_channel; i++) { hEncoder->next3SampleBuff[channel][i] = (1.0/256) * (double)*input_channel; input_channel += numChannels; } } break; case FAAC_INPUT_FLOAT: { float *input_channel = (float*)inputBuffer + hEncoder->config.channel_map[channel]; for (i = 0; i < samples_per_channel; i++) { hEncoder->next3SampleBuff[channel][i] = (double)*input_channel; input_channel += numChannels; } } break; default: return -1; /* invalid input format */ break; } for (i = (int)(samplesInput/numChannels); i < FRAME_LEN; i++) hEncoder->next3SampleBuff[channel][i] = 0.0; } /* Psychoacoustics */ /* Update buffers and run FFT on new samples */ /* LFE psychoacoustic can run without it */ if (!channelInfo[channel].lfe || channelInfo[channel].cpe) { // 心理声学模型的缓冲区更新, 计算当前帧能量值 hEncoder->psymodel->PsyBufferUpdate( &hEncoder->fft_tables, &hEncoder->gpsyInfo, &hEncoder->psyInfo[channel], hEncoder->next3SampleBuff[channel], bandWidth, hEncoder->srInfo->cb_width_short, hEncoder->srInfo->num_cb_short); } } if (hEncoder->frameNum <= 3) /* Still filling up the buffers */ return 0; // 内部调用实现检测瞬变信号, 判断长短块 /* Psychoacoustics */ hEncoder->psymodel->PsyCalculate(channelInfo, &hEncoder->gpsyInfo, hEncoder->psyInfo, hEncoder->srInfo->cb_width_long, hEncoder->srInfo->num_cb_long, hEncoder->srInfo->cb_width_short, hEncoder->srInfo->num_cb_short, numChannels); // 长短块切换 hEncoder->psymodel->BlockSwitch(coderInfo, hEncoder->psyInfo, numChannels); /* force block type */ if (shortctl == SHORTCTL_NOSHORT) { for (channel = 0; channel < numChannels; channel++) { coderInfo[channel].block_type = ONLY_LONG_WINDOW; } } if (shortctl == SHORTCTL_NOLONG) { for (channel = 0; channel < numChannels; channel++) { coderInfo[channel].block_type = ONLY_SHORT_WINDOW; } } /* AAC Filterbank, MDCT with overlap and add */ for (channel = 0; channel < numChannels; channel++) { int k; FilterBank(hEncoder, &coderInfo[channel], hEncoder->sampleBuff[channel], hEncoder->freqBuff[channel], hEncoder->overlapBuff[channel], MOVERLAPPED); if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) { for (k = 0; k < 8; k++) { specFilter(hEncoder->freqBuff[channel]+k*BLOCK_LEN_SHORT, sampleRate, bandWidth, BLOCK_LEN_SHORT); } } else { specFilter(hEncoder->freqBuff[channel], sampleRate, bandWidth, BLOCK_LEN_LONG); } } /* TMP: Build sfb offset table and other stuff */ for (channel = 0; channel < numChannels; channel++) { channelInfo[channel].msInfo.is_present = 0; if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) { coderInfo[channel].max_sfb = hEncoder->srInfo->num_cb_short; coderInfo[channel].nr_of_sfb = hEncoder->srInfo->num_cb_short; coderInfo[channel].num_window_groups = 1; coderInfo[channel].window_group_length[0] = 8; coderInfo[channel].window_group_length[1] = 0; coderInfo[channel].window_group_length[2] = 0; coderInfo[channel].window_group_length[3] = 0; coderInfo[channel].window_group_length[4] = 0; coderInfo[channel].window_group_length[5] = 0; coderInfo[channel].window_group_length[6] = 0; coderInfo[channel].window_group_length[7] = 0; offset = 0; for (sb = 0; sb < coderInfo[channel].nr_of_sfb; sb++) { coderInfo[channel].sfb_offset[sb] = offset; offset += hEncoder->srInfo->cb_width_short[sb]; } coderInfo[channel].sfb_offset[coderInfo[channel].nr_of_sfb] = offset; } else { coderInfo[channel].max_sfb = hEncoder->srInfo->num_cb_long; coderInfo[channel].nr_of_sfb = hEncoder->srInfo->num_cb_long; coderInfo[channel].num_window_groups = 1; coderInfo[channel].window_group_length[0] = 1; offset = 0; for (sb = 0; sb < coderInfo[channel].nr_of_sfb; sb++) { coderInfo[channel].sfb_offset[sb] = offset; offset += hEncoder->srInfo->cb_width_long[sb]; } coderInfo[channel].sfb_offset[coderInfo[channel].nr_of_sfb] = offset; } } /* Perform TNS analysis and filtering */ for (channel = 0; channel < numChannels; channel++) { if ((!channelInfo[channel].lfe) && (useTns)) { TnsEncode(&(coderInfo[channel].tnsInfo), coderInfo[channel].max_sfb, coderInfo[channel].max_sfb, (WINDOW_TYPE)coderInfo[channel].block_type, coderInfo[channel].sfb_offset, hEncoder->freqBuff[channel]); } else { coderInfo[channel].tnsInfo.tnsDataPresent = 0; /* TNS not used for LFE */ } } for(channel = 0; channel < numChannels; channel++) { if((coderInfo[channel].tnsInfo.tnsDataPresent != 0) && (useTns)) tnsInfo_for_LTP = &(coderInfo[channel].tnsInfo); else tnsInfo_for_LTP = NULL; if(channelInfo[channel].present && (!channelInfo[channel].lfe) && (coderInfo[channel].block_type != ONLY_SHORT_WINDOW) && (mpegVersion == MPEG4) && (aacObjectType == LTP)) { LtpEncode(hEncoder, &coderInfo[channel], &(coderInfo[channel].ltpInfo), tnsInfo_for_LTP, hEncoder->freqBuff[channel], hEncoder->ltpTimeBuff[channel]); } else { coderInfo[channel].ltpInfo.global_pred_flag = 0; } } for(channel = 0; channel < numChannels; channel++) { if ((aacObjectType == MAIN) && (!channelInfo[channel].lfe)) { int numPredBands = min(coderInfo[channel].max_pred_sfb, coderInfo[channel].nr_of_sfb); PredCalcPrediction(hEncoder->freqBuff[channel], coderInfo[channel].requantFreq, coderInfo[channel].block_type, numPredBands, (coderInfo[channel].block_type==ONLY_SHORT_WINDOW)? hEncoder->srInfo->cb_width_short:hEncoder->srInfo->cb_width_long, coderInfo, channelInfo, channel); } else { coderInfo[channel].pred_global_flag = 0; } } for (channel = 0; channel < numChannels; channel++) { if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) { SortForGrouping(&coderInfo[channel], &hEncoder->psyInfo[channel], &channelInfo[channel], hEncoder->srInfo->cb_width_short, hEncoder->freqBuff[channel]); } CalcAvgEnrg(&coderInfo[channel], hEncoder->freqBuff[channel]); // reduce LFE bandwidth if (!channelInfo[channel].cpe && channelInfo[channel].lfe) { coderInfo[channel].nr_of_sfb = coderInfo[channel].max_sfb = 3; } } MSEncode(coderInfo, channelInfo, hEncoder->freqBuff, numChannels, allowMidside); for (channel = 0; channel < numChannels; channel++) { CalcAvgEnrg(&coderInfo[channel], hEncoder->freqBuff[channel]); } #ifdef DRM /* loop the quantization until the desired bit-rate is reached */ diff = 1; /* to enter while loop */ hEncoder->aacquantCfg.quality = 120; /* init quality setting */ while (diff > 0) { /* if too many bits, do it again */ #endif /* Quantize and code the signal */ for (channel = 0; channel < numChannels; channel++) { if (coderInfo[channel].block_type == ONLY_SHORT_WINDOW) { AACQuantize(&coderInfo[channel], &hEncoder->psyInfo[channel], &channelInfo[channel], hEncoder->srInfo->cb_width_short, hEncoder->srInfo->num_cb_short, hEncoder->freqBuff[channel], &(hEncoder->aacquantCfg)); } else { AACQuantize(&coderInfo[channel], &hEncoder->psyInfo[channel], &channelInfo[channel], hEncoder->srInfo->cb_width_long, hEncoder->srInfo->num_cb_long, hEncoder->freqBuff[channel], &(hEncoder->aacquantCfg)); } } #ifdef DRM /* Write the AAC bitstream */ bitStream = OpenBitStream(bufferSize, outputBuffer); WriteBitstream(hEncoder, coderInfo, channelInfo, bitStream, numChannels); /* Close the bitstream and return the number of bytes written */ frameBytes = CloseBitStream(bitStream); /* now calculate desired bits and compare with actual encoded bits */ desbits = (int) ((double) numChannels * (hEncoder->config.bitRate * FRAME_LEN) / hEncoder->sampleRate); diff = ((frameBytes - 1 /* CRC */) * 8) - desbits; /* do linear correction according to relative difference */ fix = (double) desbits / ((frameBytes - 1 /* CRC */) * 8); /* speed up convergence. A value of 0.92 gives approx up to 10 iterations */ if (fix > 0.92) fix = 0.92; hEncoder->aacquantCfg.quality *= fix; /* quality should not go lower than 1, set diff to exit loop */ if (hEncoder->aacquantCfg.quality <= 1) diff = -1; } #endif // fix max_sfb in CPE mode for (channel = 0; channel < numChannels; channel++) { if (channelInfo[channel].present && (channelInfo[channel].cpe) && (channelInfo[channel].ch_is_left)) { CoderInfo *cil, *cir; cil = &coderInfo[channel]; cir = &coderInfo[channelInfo[channel].paired_ch]; cil->max_sfb = cir->max_sfb = max(cil->max_sfb, cir->max_sfb); cil->nr_of_sfb = cir->nr_of_sfb = cil->max_sfb; } } MSReconstruct(coderInfo, channelInfo, numChannels); for (channel = 0; channel < numChannels; channel++) { /* If short window, reconstruction not needed for prediction */ if ((coderInfo[channel].block_type == ONLY_SHORT_WINDOW)) { int sind; for (sind = 0; sind < BLOCK_LEN_LONG; sind++) { coderInfo[channel].requantFreq[sind] = 0.0; } } else { if((coderInfo[channel].tnsInfo.tnsDataPresent != 0) && (useTns)) tnsDecInfo = &(coderInfo[channel].tnsInfo); else tnsDecInfo = NULL; if ((!channelInfo[channel].lfe) && (aacObjectType == LTP)) { /* no reconstruction needed for LFE channel*/ LtpReconstruct(&coderInfo[channel], &(coderInfo[channel].ltpInfo), coderInfo[channel].requantFreq); if(tnsDecInfo != NULL) TnsDecodeFilterOnly(&(coderInfo[channel].tnsInfo), coderInfo[channel].nr_of_sfb, coderInfo[channel].max_sfb, (WINDOW_TYPE)coderInfo[channel].block_type, coderInfo[channel].sfb_offset, coderInfo[channel].requantFreq); IFilterBank(hEncoder, &coderInfo[channel], coderInfo[channel].requantFreq, coderInfo[channel].ltpInfo.time_buffer, coderInfo[channel].ltpInfo.ltp_overlap_buffer, MOVERLAPPED); LtpUpdate(&(coderInfo[channel].ltpInfo), coderInfo[channel].ltpInfo.time_buffer, coderInfo[channel].ltpInfo.ltp_overlap_buffer, BLOCK_LEN_LONG); } } } #ifndef DRM /* Write the AAC bitstream */ bitStream = OpenBitStream(bufferSize, outputBuffer); WriteBitstream(hEncoder, coderInfo, channelInfo, bitStream, numChannels); /* Close the bitstream and return the number of bytes written */ frameBytes = CloseBitStream(bitStream); /* Adjust quality to get correct average bitrate */ if (hEncoder->config.bitRate) { double fix; int desbits = numChannels * (hEncoder->config.bitRate * FRAME_LEN) / hEncoder->sampleRate; int diff = (frameBytes * 8) - desbits; hEncoder->bitDiff += diff; fix = (double)hEncoder->bitDiff / desbits; fix *= 0.01; fix = max(fix, -0.2); fix = min(fix, 0.2); if (((diff > 0) && (fix > 0.0)) || ((diff < 0) && (fix < 0.0))) { hEncoder->aacquantCfg.quality *= (1.0 - fix); if (hEncoder->aacquantCfg.quality > 300) hEncoder->aacquantCfg.quality = 300; if (hEncoder->aacquantCfg.quality < 50) hEncoder->aacquantCfg.quality = 50; } } #endif return frameBytes; }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。