diff --git a/libavcodec/x86/h264_weight.asm b/libavcodec/x86/h264_weight.asm index b23e804dcf..4759a063a6 100644 --- a/libavcodec/x86/h264_weight.asm +++ b/libavcodec/x86/h264_weight.asm @@ -70,8 +70,8 @@ SECTION .text packuswb m0, m1 %endmacro -INIT_MMX -cglobal h264_weight_16_mmxext, 6, 6, 0 +INIT_MMX mmxext +cglobal h264_weight_16, 6, 6, 0 WEIGHT_SETUP .nextrow: WEIGHT_OP 0, 4 @@ -83,8 +83,8 @@ cglobal h264_weight_16_mmxext, 6, 6, 0 jnz .nextrow REP_RET -%macro WEIGHT_FUNC_MM 3 -cglobal h264_weight_%1_%3, 6, 6, %2 +%macro WEIGHT_FUNC_MM 2 +cglobal h264_weight_%1, 6, 6, %2 WEIGHT_SETUP .nextrow: WEIGHT_OP 0, mmsize/2 @@ -95,13 +95,13 @@ cglobal h264_weight_%1_%3, 6, 6, %2 REP_RET %endmacro -INIT_MMX -WEIGHT_FUNC_MM 8, 0, mmxext -INIT_XMM -WEIGHT_FUNC_MM 16, 8, sse2 +INIT_MMX mmxext +WEIGHT_FUNC_MM 8, 0 +INIT_XMM sse2 +WEIGHT_FUNC_MM 16, 8 -%macro WEIGHT_FUNC_HALF_MM 3 -cglobal h264_weight_%1_%3, 6, 6, %2 +%macro WEIGHT_FUNC_HALF_MM 2 +cglobal h264_weight_%1, 6, 6, %2 WEIGHT_SETUP sar r2d, 1 lea r3, [r1*2] @@ -120,10 +120,10 @@ cglobal h264_weight_%1_%3, 6, 6, %2 REP_RET %endmacro -INIT_MMX -WEIGHT_FUNC_HALF_MM 4, 0, mmxext -INIT_XMM -WEIGHT_FUNC_HALF_MM 8, 8, sse2 +INIT_MMX mmxext +WEIGHT_FUNC_HALF_MM 4, 0 +INIT_XMM sse2 +WEIGHT_FUNC_HALF_MM 8, 8 %macro BIWEIGHT_SETUP 0 %if ARCH_X86_64 @@ -135,12 +135,32 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2 add off_regd, 1 or off_regd, 1 add r4, 1 + cmp r5, 128 + jne .normal + sar r5, 1 + sar r6, 1 + sar off_regd, 1 + sub r4, 1 +.normal +%if cpuflag(ssse3) + movd m4, r5d + movd m0, r6d +%else movd m3, r5d movd m4, r6d +%endif movd m5, off_regd movd m6, r4d pslld m5, m6 psrld m5, 1 +%if cpuflag(ssse3) + punpcklbw m4, m0 + pshuflw m4, m4, 0 + pshuflw m5, m5, 0 + punpcklqdq m4, m4 + punpcklqdq m5, m5 + +%else %if mmsize == 16 pshuflw m3, m3, 0 pshuflw m4, m4, 0 @@ -154,6 +174,7 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2 pshufw m5, m5, 0 %endif pxor m7, m7 +%endif %endmacro %macro BIWEIGHT_STEPA 3 @@ -174,8 +195,8 @@ WEIGHT_FUNC_HALF_MM 8, 8, sse2 packuswb m0, m1 %endmacro -INIT_MMX -cglobal h264_biweight_16_mmxext, 7, 8, 0 +INIT_MMX mmxext +cglobal h264_biweight_16, 7, 8, 0 BIWEIGHT_SETUP movifnidn r3d, r3m .nextrow: @@ -193,8 +214,8 @@ cglobal h264_biweight_16_mmxext, 7, 8, 0 jnz .nextrow REP_RET -%macro BIWEIGHT_FUNC_MM 3 -cglobal h264_biweight_%1_%3, 7, 8, %2 +%macro BIWEIGHT_FUNC_MM 2 +cglobal h264_biweight_%1, 7, 8, %2 BIWEIGHT_SETUP movifnidn r3d, r3m .nextrow: @@ -209,13 +230,13 @@ cglobal h264_biweight_%1_%3, 7, 8, %2 REP_RET %endmacro -INIT_MMX -BIWEIGHT_FUNC_MM 8, 0, mmxext -INIT_XMM -BIWEIGHT_FUNC_MM 16, 8, sse2 +INIT_MMX mmxext +BIWEIGHT_FUNC_MM 8, 0 +INIT_XMM sse2 +BIWEIGHT_FUNC_MM 16, 8 -%macro BIWEIGHT_FUNC_HALF_MM 3 -cglobal h264_biweight_%1_%3, 7, 8, %2 +%macro BIWEIGHT_FUNC_HALF_MM 2 +cglobal h264_biweight_%1, 7, 8, %2 BIWEIGHT_SETUP movifnidn r3d, r3m sar r3, 1 @@ -238,40 +259,10 @@ cglobal h264_biweight_%1_%3, 7, 8, %2 REP_RET %endmacro -INIT_MMX -BIWEIGHT_FUNC_HALF_MM 4, 0, mmxext -INIT_XMM -BIWEIGHT_FUNC_HALF_MM 8, 8, sse2 - -%macro BIWEIGHT_SSSE3_SETUP 0 -%if ARCH_X86_64 -%define off_regd r7d -%else -%define off_regd r3d -%endif - mov off_regd, r7m - add off_regd, 1 - or off_regd, 1 - add r4, 1 - cmp r5, 128 - jne .normal - sar r5, 1 - sar r6, 1 - sar off_regd, 1 - sub r4, 1 -.normal - movd m4, r5d - movd m0, r6d - movd m5, off_regd - movd m6, r4d - pslld m5, m6 - psrld m5, 1 - punpcklbw m4, m0 - pshuflw m4, m4, 0 - pshuflw m5, m5, 0 - punpcklqdq m4, m4 - punpcklqdq m5, m5 -%endmacro +INIT_MMX mmxext +BIWEIGHT_FUNC_HALF_MM 4, 0 +INIT_XMM sse2 +BIWEIGHT_FUNC_HALF_MM 8, 8 %macro BIWEIGHT_SSSE3_OP 0 pmaddubsw m0, m4 @@ -283,9 +274,9 @@ BIWEIGHT_FUNC_HALF_MM 8, 8, sse2 packuswb m0, m2 %endmacro -INIT_XMM -cglobal h264_biweight_16_ssse3, 7, 8, 8 - BIWEIGHT_SSSE3_SETUP +INIT_XMM ssse3 +cglobal h264_biweight_16, 7, 8, 8 + BIWEIGHT_SETUP movifnidn r3d, r3m .nextrow: @@ -302,9 +293,9 @@ cglobal h264_biweight_16_ssse3, 7, 8, 8 jnz .nextrow REP_RET -INIT_XMM -cglobal h264_biweight_8_ssse3, 7, 8, 8 - BIWEIGHT_SSSE3_SETUP +INIT_XMM ssse3 +cglobal h264_biweight_8, 7, 8, 8 + BIWEIGHT_SETUP movifnidn r3d, r3m sar r3, 1 lea r4, [r2*2] diff --git a/libavresample/audio_convert.c b/libavresample/audio_convert.c index e9835c8e8b..dcf8a39b06 100644 --- a/libavresample/audio_convert.c +++ b/libavresample/audio_convert.c @@ -284,9 +284,10 @@ AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, return ac; } -int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in, int len) +int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in) { int use_generic = 1; + int len = in->nb_samples; /* determine whether to use the optimized function based on pointer and samples alignment in both the input and output */ diff --git a/libavresample/audio_convert.h b/libavresample/audio_convert.h index 2b8bface7d..bc27223140 100644 --- a/libavresample/audio_convert.h +++ b/libavresample/audio_convert.h @@ -72,13 +72,16 @@ AudioConvert *ff_audio_convert_alloc(AVAudioResampleContext *avr, * examined to determine whether to use the generic or optimized conversion * function (when available). * + * The number of samples to convert is determined by in->nb_samples. The output + * buffer must be large enough to handle this many samples. out->nb_samples is + * set by this function before a successful return. + * * @param ac AudioConvert context * @param out output audio data * @param in input audio data - * @param len number of samples to convert * @return 0 on success, negative AVERROR code on failure */ -int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in, int len); +int ff_audio_convert(AudioConvert *ac, AudioData *out, AudioData *in); /* arch-specific initialization functions */ diff --git a/libavresample/resample.c b/libavresample/resample.c index f0af1ffd58..381d673717 100644 --- a/libavresample/resample.c +++ b/libavresample/resample.c @@ -394,10 +394,9 @@ static int resample(ResampleContext *c, void *dst, const void *src, return dst_index; } -int ff_audio_resample(ResampleContext *c, AudioData *dst, AudioData *src, - int *consumed) +int ff_audio_resample(ResampleContext *c, AudioData *dst, AudioData *src) { - int ch, in_samples, in_leftover, out_samples = 0; + int ch, in_samples, in_leftover, consumed = 0, out_samples = 0; int ret = AVERROR(EINVAL); in_samples = src ? src->nb_samples : 0; @@ -430,7 +429,7 @@ int ff_audio_resample(ResampleContext *c, AudioData *dst, AudioData *src, /* resample each channel plane */ for (ch = 0; ch < c->buffer->channels; ch++) { out_samples = resample(c, (void *)dst->data[ch], - (const void *)c->buffer->data[ch], consumed, + (const void *)c->buffer->data[ch], &consumed, c->buffer->nb_samples, dst->allocated_samples, ch + 1 == c->buffer->channels); } @@ -440,7 +439,7 @@ int ff_audio_resample(ResampleContext *c, AudioData *dst, AudioData *src, } /* drain consumed samples from the internal buffer */ - ff_audio_data_drain(c->buffer, *consumed); + ff_audio_data_drain(c->buffer, consumed); av_dlog(c->avr, "resampled %d in + %d leftover to %d out + %d leftover\n", in_samples, in_leftover, out_samples, c->buffer->nb_samples); diff --git a/libavresample/resample.h b/libavresample/resample.h index b42fdbbaac..7534e26ad4 100644 --- a/libavresample/resample.h +++ b/libavresample/resample.h @@ -61,10 +61,8 @@ void ff_audio_resample_free(ResampleContext **c); * @param c ResampleContext * @param dst destination audio data * @param src source audio data - * @param consumed number of samples consumed from the source - * @return number of samples written to the destination + * @return 0 on success, negative AVERROR code on failure */ -int ff_audio_resample(ResampleContext *c, AudioData *dst, AudioData *src, - int *consumed); +int ff_audio_resample(ResampleContext *c, AudioData *dst, AudioData *src); #endif /* AVRESAMPLE_RESAMPLE_H */ diff --git a/libavresample/utils.c b/libavresample/utils.c index ad49e880af..5591f1575e 100644 --- a/libavresample/utils.c +++ b/libavresample/utils.c @@ -313,8 +313,8 @@ int attribute_align_arg avresample_convert(AVAudioResampleContext *avr, if (ret < 0) return ret; av_dlog(avr, "[convert] %s to in_buffer\n", current_buffer->name); - ret = ff_audio_convert(avr->ac_in, avr->in_buffer, current_buffer, - current_buffer->nb_samples); + ret = ff_audio_convert(avr->ac_in, avr->in_buffer, + current_buffer); if (ret < 0) return ret; } else { @@ -342,7 +342,6 @@ int attribute_align_arg avresample_convert(AVAudioResampleContext *avr, if (avr->resample_needed) { AudioData *resample_out; - int consumed = 0; if (!avr->out_convert_needed && direct_output && out_samples > 0) resample_out = &output_buffer; @@ -351,7 +350,7 @@ int attribute_align_arg avresample_convert(AVAudioResampleContext *avr, av_dlog(avr, "[resample] %s to %s\n", current_buffer->name, resample_out->name); ret = ff_audio_resample(avr->resample, resample_out, - current_buffer, &consumed); + current_buffer); if (ret < 0) return ret; @@ -381,8 +380,7 @@ int attribute_align_arg avresample_convert(AVAudioResampleContext *avr, if (direct_output && out_samples >= current_buffer->nb_samples) { /* convert directly to output */ av_dlog(avr, "[convert] %s to output\n", current_buffer->name); - ret = ff_audio_convert(avr->ac_out, &output_buffer, current_buffer, - current_buffer->nb_samples); + ret = ff_audio_convert(avr->ac_out, &output_buffer, current_buffer); if (ret < 0) return ret; @@ -395,7 +393,7 @@ int attribute_align_arg avresample_convert(AVAudioResampleContext *avr, return ret; av_dlog(avr, "[convert] %s to out_buffer\n", current_buffer->name); ret = ff_audio_convert(avr->ac_out, avr->out_buffer, - current_buffer, current_buffer->nb_samples); + current_buffer); if (ret < 0) return ret; current_buffer = avr->out_buffer;