mirror of
https://code.videolan.org/videolan/vlc
synced 2024-09-12 13:44:56 +02:00
add ARM/NEON version of simple channel mixer
Signed-off-by: Jean-Baptiste Kempf <jb@videolan.org> Signed-off-by: Rafaël Carré <funman@videolan.org>
This commit is contained in:
parent
c311b5fa97
commit
04a376f95c
@ -289,6 +289,7 @@ $Id$
|
||||
* shm: Shared memory framebuffer access module
|
||||
* sid: Sidplay demuxer
|
||||
* simple_channel_mixer: channel mixer
|
||||
* simple_channel_mixer_neon: channel mixer using NEON assembly
|
||||
* skins2: Skinnable interface, new generation
|
||||
* smf: Standard MIDI file demuxer
|
||||
* smooth: Microsoft Smooth Streaming input
|
||||
|
@ -8,6 +8,13 @@ libaudio_format_neon_plugin_la_SOURCES = \
|
||||
libaudio_format_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
|
||||
libaudio_format_neon_plugin_la_LIBADD = $(AM_LIBADD)
|
||||
|
||||
libsimple_channel_mixer_neon_plugin_la_SOURCES = \
|
||||
simple_channel_mixer.S \
|
||||
simple_channel_mixer.c
|
||||
libsimple_channel_mixer_neon_plugin_la_CFLAGS = $(AM_CFLAGS)
|
||||
libsimple_channel_mixer_neon_plugin_la_LIBADD = $(AM_LIBADD)
|
||||
libsimple_channel_mixer_neon_plugin_la_DEPENDENCIES =
|
||||
|
||||
libchroma_yuv_neon_plugin_la_SOURCES = \
|
||||
i420_yuyv.S \
|
||||
i422_yuyv.S \
|
||||
@ -30,6 +37,7 @@ libyuv_rgb_neon_plugin_la_LIBADD = $(AM_LIBADD)
|
||||
|
||||
libvlc_LTLIBRARIES += \
|
||||
libaudio_format_neon_plugin.la \
|
||||
libsimple_channel_mixer_neon_plugin.la \
|
||||
libchroma_yuv_neon_plugin.la \
|
||||
libvolume_neon_plugin.la \
|
||||
libyuv_rgb_neon_plugin.la \
|
||||
|
279
modules/arm_neon/simple_channel_mixer.S
Normal file
279
modules/arm_neon/simple_channel_mixer.S
Normal file
@ -0,0 +1,279 @@
|
||||
@*****************************************************************************
|
||||
@ simple_channel_mixer.S : ARM NEON channel mixer
|
||||
@*****************************************************************************
|
||||
@ Copyright (C) 2012 David Geldreich <david.geldreich@free.fr>
|
||||
@ Sébastien Toque
|
||||
@
|
||||
@ This program is free software; you can redistribute it and/or modify
|
||||
@ it under the terms of the GNU General Public License as published by
|
||||
@ the Free Software Foundation; either version 2 of the License, or
|
||||
@ (at your option) any later version.
|
||||
@
|
||||
@ This program is distributed in the hope that it will be useful,
|
||||
@ but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
@ GNU General Public License for more details.
|
||||
@
|
||||
@ You should have received a copy of the GNU General Public License
|
||||
@ along with this program; if not, write to the Free Software Foundation,
|
||||
@ Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
|
||||
@****************************************************************************/
|
||||
|
||||
.fpu neon
|
||||
.text
|
||||
.align
|
||||
|
||||
#define DST r0
|
||||
#define SRC r1
|
||||
#define NUM r2
|
||||
#define LFE r3
|
||||
#define COEFF r4
|
||||
|
||||
coeff_7to2:
|
||||
.float 0.5
|
||||
.float 0.5
|
||||
.float 0.25
|
||||
.float 0.25
|
||||
.global convert_7to2_neon_asm
|
||||
.type convert_7to2_neon_asm, %function
|
||||
convert_7to2_neon_asm:
|
||||
push {r4,lr}
|
||||
|
||||
adr COEFF, coeff_7to2
|
||||
vld1.32 {q0},[COEFF]
|
||||
0: @ use local label
|
||||
vld1.32 {q2},[SRC]! @ load 0,1,2,3
|
||||
vmul.f32 q2,q2,q0 @ 0.5*src[0] 0.5*src[1] 0.25*src[2] 0.25*src[3]
|
||||
vld1.32 {d6},[SRC]! @ load 4,5
|
||||
vmul.f32 d6,d6,d1 @ 0.25*src[4] 0.25*src[5]
|
||||
vadd.f32 d4,d4,d5 @ 0.5*src[0] + 0.25*src[2]
|
||||
@ 0.5*src[1] + 0.25*src[3]
|
||||
vadd.f32 d4,d4,d6 @ 0.5*src[0] + 0.25*src[2] + 0.25*src[4]
|
||||
@ 0.5*src[1] + 0.25*src[3] + 0.25*src[5]
|
||||
flds s14,[SRC] @ load 6
|
||||
vdup.32 d7,d7[0]
|
||||
teq LFE,#0
|
||||
ite eq
|
||||
addeq SRC,SRC,#4
|
||||
addne SRC,SRC,#8 @ skip the lfe channel
|
||||
vadd.f32 d4,d4,d7 @ 0.5*src[0] + 0.25*src[2] + 0.25*src[4] + src[6]
|
||||
@ 0.5*src[1] + 0.25*src[3] + 0.25*src[5] + src[6]
|
||||
vst1.32 d4, [DST]!
|
||||
subs NUM,NUM,#1
|
||||
bne 0b
|
||||
|
||||
pop {r4,pc}
|
||||
|
||||
|
||||
coeff_5to2:
|
||||
.float 0.5
|
||||
.float 0.5
|
||||
.float 0.33
|
||||
.float 0.33
|
||||
.global convert_5to2_neon_asm
|
||||
.type convert_5to2_neon_asm, %function
|
||||
convert_5to2_neon_asm:
|
||||
push {r4,lr}
|
||||
|
||||
adr COEFF, coeff_5to2
|
||||
vld1.32 {q0},[COEFF] @ load constants
|
||||
0: @ use local label
|
||||
vld1.32 {q1},[SRC]! @ load 0,1,2,3
|
||||
flds s8,[SRC] @ load 4
|
||||
vdup.32 d4,d4[0]
|
||||
teq LFE,#0
|
||||
ite eq
|
||||
addeq SRC,SRC,#4
|
||||
addne SRC,SRC,#8 @ skip the lfe channel
|
||||
vmul.f32 q1,q1,q0 @ 0.5*src[0] 0.5*src[1] 0.33*src[2] 0.33*src[3]/3
|
||||
vadd.f32 d2,d2,d3 @ 0.5*src[0] + 0.33*src[2]
|
||||
@ 0.5*src[1] + 0.33*src[3]
|
||||
vadd.f32 d2,d2,d4 @ 0.5*src[0] + 0.33*src[2] + src[4]
|
||||
@ 0.5*src[1] + 0.33*src[3] + src[4]
|
||||
vst1.32 d2,[DST]!
|
||||
subs NUM,NUM,#1
|
||||
bne 0b
|
||||
|
||||
pop {r4,pc}
|
||||
|
||||
|
||||
coeff_4to2:
|
||||
.float 0.5
|
||||
.float 0.5
|
||||
.global convert_4to2_neon_asm
|
||||
.type convert_4to2_neon_asm, %function
|
||||
convert_4to2_neon_asm:
|
||||
push {r4,lr}
|
||||
|
||||
adr COEFF, coeff_4to2
|
||||
vld1.32 {d0},[COEFF] @ load constants
|
||||
0: @ use local label
|
||||
vld1.32 {q1},[SRC]!
|
||||
vmul.f32 d2,d2,d0 @ 0.5*src[0] 0.5*src[1]
|
||||
vdup.32 d4,d3[0] @ dup src[2]
|
||||
vdup.32 d3,d3[1] @ dup src[3]
|
||||
vadd.f32 d2,d2,d3 @ +src[3]
|
||||
vadd.f32 d2,d2,d4 @ +src[2]
|
||||
vst1.32 d2,[DST]!
|
||||
subs NUM,NUM,#1
|
||||
bne 0b
|
||||
|
||||
pop {r4,pc}
|
||||
|
||||
|
||||
coeff_3to2:
|
||||
.float 0.5
|
||||
.float 0.5
|
||||
.global convert_3to2_neon_asm
|
||||
.type convert_3to2_neon_asm, %function
|
||||
convert_3to2_neon_asm:
|
||||
push {r4,lr}
|
||||
|
||||
adr COEFF, coeff_3to2
|
||||
vld1.32 {d0},[COEFF] @ load constants
|
||||
0: @ use local label
|
||||
vld1.32 {d1},[SRC]! @ load 0,1
|
||||
flds s4,[SRC] @ load 2
|
||||
vdup.32 d2,d2[0]
|
||||
teq LFE,#0
|
||||
ite eq
|
||||
addeq SRC,SRC,#4
|
||||
addne SRC,SRC,#8 @ skip the lfe channel
|
||||
vmul.f32 d1,d1,d0 @ 0.5*src[0] 0.5*src[1]
|
||||
vadd.f32 d1,d1,d2 @ 0.5*src[0] + src[2]
|
||||
@ 0.5*src[1] + src[2]
|
||||
vst1.32 d1,[DST]!
|
||||
subs NUM,NUM,#1
|
||||
bne 0b
|
||||
|
||||
pop {r4,pc}
|
||||
|
||||
|
||||
coeff_7to1:
|
||||
.float 0.25
|
||||
.float 0.25
|
||||
.float 0.125
|
||||
.float 0.125
|
||||
.global convert_7to1_neon_asm
|
||||
.type convert_7to1_neon_asm, %function
|
||||
convert_7to1_neon_asm:
|
||||
push {r4,lr}
|
||||
|
||||
adr COEFF, coeff_7to1
|
||||
vld1.32 {q0},[COEFF]
|
||||
0: @ use local label
|
||||
vld1.32 {q1},[SRC]! @ load 0,1,2,3
|
||||
vmul.f32 q1,q1,q0 @ 0.25*src[0] 0.25*src[1] 0.125*src[2] 0.125*src[3]
|
||||
vld1.32 {d4},[SRC]! @ load 4,5
|
||||
vmul.f32 d4,d4,d1 @ 0.125*src[4] 0.125*src[5]
|
||||
vadd.f32 d2,d2,d3
|
||||
vadd.f32 d2,d2,d4
|
||||
flds s10,[SRC] @ load 6
|
||||
teq LFE,#0
|
||||
ite eq
|
||||
addeq SRC,SRC,#4
|
||||
addne SRC,SRC,#8 @ skip the lfe channel
|
||||
vadd.f32 s4,s4,s5
|
||||
vadd.f32 s4,s4,s10
|
||||
fsts s4,[DST]
|
||||
add DST,DST,#4
|
||||
subs NUM,NUM,#1
|
||||
bne 0b
|
||||
|
||||
pop {r4,pc}
|
||||
|
||||
|
||||
coeff_5to1:
|
||||
.float 0.25
|
||||
.float 0.25
|
||||
.float 0.16666667
|
||||
.float 0.16666667
|
||||
.global convert_5to1_neon_asm
|
||||
.type convert_5to1_neon_asm, %function
|
||||
convert_5to1_neon_asm:
|
||||
push {r4,lr}
|
||||
|
||||
adr COEFF, coeff_5to1
|
||||
vld1.32 {q0},[COEFF]
|
||||
0: @ use local label
|
||||
vld1.32 {q1},[SRC]! @ load 0,1,2,3
|
||||
vmul.f32 q1,q1,q0 @ 0.25*src[0] 0.25*src[1] src[2]/6 src[3]/6
|
||||
vadd.f32 d2,d2,d3
|
||||
flds s10,[SRC] @ load 4
|
||||
teq LFE,#0
|
||||
ite eq
|
||||
addeq SRC,SRC,#4
|
||||
addne SRC,SRC,#8 @ skip the lfe channel
|
||||
vadd.f32 s4,s4,s5
|
||||
vadd.f32 s4,s4,s10
|
||||
fsts s4,[DST]
|
||||
add DST,DST,#4
|
||||
subs NUM,NUM,#1
|
||||
bne 0b
|
||||
|
||||
pop {r4,pc}
|
||||
|
||||
|
||||
coeff_7to4:
|
||||
.float 0.5
|
||||
.float 0.5
|
||||
.float 0.16666667
|
||||
.float 0.16666667
|
||||
.global convert_7to4_neon_asm
|
||||
.type convert_7to4_neon_asm, %function
|
||||
convert_7to4_neon_asm:
|
||||
push {r4,lr}
|
||||
|
||||
adr COEFF, coeff_7to4
|
||||
vld1.32 {q0},[COEFF]
|
||||
0: @ use local label
|
||||
vld1.32 {q1},[SRC]! @ load 0,1,2,3
|
||||
vmul.f32 q1,q1,q0 @ 0.5*src[0] 0.5*src[1] src[2]/6 src[3]/6
|
||||
vld1.32 {d5},[SRC]! @ load 4,5
|
||||
flds s14,[SRC] @ load 6
|
||||
vadd.f32 d2,d2,d3 @ 0.5*src[0] + src[2]/6
|
||||
@ 0.5*src[1] + src[3]/6
|
||||
vdup.32 d4,d7[0] @ so q2 : src[6] src[6] src[4] src[5]
|
||||
vadd.f32 q2,q2,q1 @ src[6] + 0.5*src[0] + src[2]/6
|
||||
@ src[6] + 0.5*src[1] + src[3]/6
|
||||
@ src[4] + src[2]/6
|
||||
@ src[5] + src[3]/6
|
||||
teq LFE,#0
|
||||
ite eq
|
||||
addeq SRC,SRC,#4
|
||||
addne SRC,SRC,#8 @ skip the lfe channel
|
||||
vst1.32 {q2}, [DST]!
|
||||
subs NUM,NUM,#1
|
||||
bne 0b
|
||||
|
||||
pop {r4,pc}
|
||||
|
||||
|
||||
coeff_5to4:
|
||||
.float 0.5
|
||||
.float 0.5
|
||||
.global convert_5to4_neon_asm
|
||||
.type convert_5to4_neon_asm, %function
|
||||
convert_5to4_neon_asm:
|
||||
push {r4,lr}
|
||||
|
||||
adr COEFF, coeff_5to4
|
||||
vld1.32 {d0},[COEFF]
|
||||
0: @ use local label
|
||||
vld1.32 {q1},[SRC]! @ load 0,1,2,3
|
||||
vmul.f32 d2,d2,d0 @ 0.5*src[0] 0.5*src[1]
|
||||
flds s8,[SRC] @ load 4
|
||||
vdup.32 d4,d4[0]
|
||||
vadd.f32 d2,d2,d4 @ 0.5*src[0] + src[4]
|
||||
@ 0.5*src[1] + src[4]
|
||||
@ src[2]
|
||||
@ src[3]
|
||||
teq LFE,#0
|
||||
ite eq
|
||||
addeq SRC,SRC,#4
|
||||
addne SRC,SRC,#8 @ skip the lfe channel
|
||||
vst1.32 {q1}, [DST]!
|
||||
subs NUM,NUM,#1
|
||||
bne 0b
|
||||
|
||||
pop {r4,pc}
|
175
modules/arm_neon/simple_channel_mixer.c
Normal file
175
modules/arm_neon/simple_channel_mixer.c
Normal file
@ -0,0 +1,175 @@
|
||||
/*****************************************************************************
|
||||
* simple_channel_mixer.c : simple channel mixer plug-in using NEON assembly
|
||||
*****************************************************************************
|
||||
* Copyright (C) 2002, 2004, 2006-2009, 2012 the VideoLAN team
|
||||
* $Id$
|
||||
*
|
||||
* Authors: Gildas Bazin <gbazin@videolan.org>
|
||||
* David Geldreich <david.geldreich@free.fr>
|
||||
* Sébastien Toque
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301, USA.
|
||||
*****************************************************************************/
|
||||
|
||||
/*****************************************************************************
|
||||
* Preamble
|
||||
*****************************************************************************/
|
||||
#ifdef HAVE_CONFIG_H
|
||||
# include "config.h"
|
||||
#endif
|
||||
|
||||
#include <vlc_common.h>
|
||||
#include <vlc_plugin.h>
|
||||
#include <vlc_aout.h>
|
||||
#include <vlc_filter.h>
|
||||
#include <vlc_block.h>
|
||||
#include <vlc_cpu.h>
|
||||
#include <assert.h>
|
||||
|
||||
/*****************************************************************************
|
||||
* Module descriptor
|
||||
*****************************************************************************/
|
||||
static int OpenFilter( vlc_object_t * );
|
||||
|
||||
vlc_module_begin ()
|
||||
set_description( N_("Audio filter for simple channel mixing using NEON assembly") )
|
||||
set_category( CAT_AUDIO )
|
||||
set_subcategory( SUBCAT_AUDIO_MISC )
|
||||
set_capability( "audio filter", 20 )
|
||||
set_callbacks( OpenFilter, NULL )
|
||||
vlc_module_end ()
|
||||
|
||||
#define FILTER_WRAPPER(in, out) \
|
||||
void convert_##in##to##out##_neon_asm(float *dst, const float *src, int num, bool lfeChannel); \
|
||||
static block_t *Filter_##in##to##out (filter_t *p_filter, block_t *p_block) \
|
||||
{ \
|
||||
block_t *p_out; \
|
||||
if (!FilterInit( p_filter, p_block, &p_out )) \
|
||||
return NULL; \
|
||||
const float *p_src = (const float *)p_block->p_buffer; \
|
||||
float *p_dest = (float *)p_out->p_buffer; \
|
||||
convert_##in##to##out##_neon_asm( p_dest, p_src, p_block->i_nb_samples, \
|
||||
p_filter->fmt_in.audio.i_physical_channels & AOUT_CHAN_LFE ); \
|
||||
block_Release( p_block ); \
|
||||
return p_out; \
|
||||
}
|
||||
|
||||
#define TRY_FILTER(in, out) \
|
||||
if ( b_input_##in && b_output_##out ) \
|
||||
{ \
|
||||
p_filter->pf_audio_filter = Filter_##in##to##out ; \
|
||||
return VLC_SUCCESS; \
|
||||
}
|
||||
|
||||
/*****************************************************************************
|
||||
* Filter:
|
||||
*****************************************************************************/
|
||||
static bool FilterInit( filter_t *p_filter, block_t *p_block, block_t **pp_out )
|
||||
{
|
||||
if( !p_block || !p_block->i_nb_samples )
|
||||
{
|
||||
if( p_block )
|
||||
block_Release( p_block );
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t i_out_size = p_block->i_nb_samples *
|
||||
p_filter->fmt_out.audio.i_bitspersample *
|
||||
p_filter->fmt_out.audio.i_channels / 8;
|
||||
|
||||
block_t *p_out = filter_NewAudioBuffer( p_filter, i_out_size );
|
||||
if( !p_out )
|
||||
{
|
||||
msg_Warn( p_filter, "can't get output buffer" );
|
||||
block_Release( p_block );
|
||||
return false;
|
||||
}
|
||||
|
||||
p_out->i_nb_samples = p_block->i_nb_samples;
|
||||
p_out->i_dts = p_block->i_dts;
|
||||
p_out->i_pts = p_block->i_pts;
|
||||
p_out->i_length = p_block->i_length;
|
||||
|
||||
int i_input_nb = aout_FormatNbChannels( &p_filter->fmt_in.audio );
|
||||
int i_output_nb = aout_FormatNbChannels( &p_filter->fmt_out.audio );
|
||||
p_out->i_buffer = p_block->i_buffer * i_output_nb / i_input_nb;
|
||||
|
||||
*pp_out = p_out;
|
||||
return true;
|
||||
}
|
||||
|
||||
FILTER_WRAPPER(7,2)
|
||||
FILTER_WRAPPER(5,2)
|
||||
FILTER_WRAPPER(4,2)
|
||||
FILTER_WRAPPER(3,2)
|
||||
FILTER_WRAPPER(7,1)
|
||||
FILTER_WRAPPER(5,1)
|
||||
FILTER_WRAPPER(7,4)
|
||||
FILTER_WRAPPER(5,4)
|
||||
|
||||
/*****************************************************************************
|
||||
* OpenFilter:
|
||||
*****************************************************************************/
|
||||
static int OpenFilter( vlc_object_t *p_this )
|
||||
{
|
||||
filter_t *p_filter = (filter_t *)p_this;
|
||||
|
||||
if (!vlc_CPU_ARM_NEON())
|
||||
return VLC_EGENERIC;
|
||||
|
||||
audio_format_t fmt_in = p_filter->fmt_in.audio;
|
||||
audio_format_t fmt_out = p_filter->fmt_out.audio;
|
||||
|
||||
fmt_in.i_format = p_filter->fmt_in.i_codec;
|
||||
fmt_out.i_format = p_filter->fmt_out.i_codec;
|
||||
|
||||
if( fmt_in.i_format != VLC_CODEC_FL32 ||
|
||||
fmt_in.i_format != fmt_out.i_format ||
|
||||
fmt_in.i_rate != fmt_out.i_rate )
|
||||
{
|
||||
return VLC_EGENERIC;
|
||||
}
|
||||
|
||||
if( fmt_in.i_physical_channels == fmt_out.i_physical_channels &&
|
||||
fmt_in.i_original_channels == fmt_out.i_original_channels )
|
||||
{
|
||||
return VLC_EGENERIC;
|
||||
}
|
||||
|
||||
const bool b_input_7 = (fmt_in.i_physical_channels & ~AOUT_CHAN_LFE) == AOUT_CHANS_7_0;
|
||||
const bool b_input_5 = ( (fmt_in.i_physical_channels & AOUT_CHANS_5_0) == AOUT_CHANS_5_0 ||
|
||||
(fmt_in.i_physical_channels & AOUT_CHANS_5_0_MIDDLE) == AOUT_CHANS_5_0_MIDDLE );
|
||||
const bool b_input_4 = (fmt_in.i_physical_channels & ~AOUT_CHAN_LFE) == AOUT_CHANS_4_CENTER_REAR;
|
||||
const bool b_input_3 = (fmt_in.i_physical_channels & ~AOUT_CHAN_LFE) == AOUT_CHANS_3_0;
|
||||
|
||||
const bool b_output_1 = fmt_out.i_physical_channels == AOUT_CHAN_CENTER;
|
||||
const bool b_output_2 = fmt_out.i_physical_channels == AOUT_CHANS_2_0;
|
||||
const bool b_output_4 = fmt_out.i_physical_channels == AOUT_CHANS_4_0;
|
||||
|
||||
/* Only conversion to Mono, Stereo and 4.0 right now */
|
||||
/* Only from 7/7.1/5/5.1/3/3.1/2.0
|
||||
* XXX 5.X rear and middle are handled the same way */
|
||||
|
||||
TRY_FILTER(7,2)
|
||||
TRY_FILTER(5,2)
|
||||
TRY_FILTER(4,2)
|
||||
TRY_FILTER(3,2)
|
||||
TRY_FILTER(7,1)
|
||||
TRY_FILTER(5,1)
|
||||
TRY_FILTER(7,4)
|
||||
TRY_FILTER(5,4)
|
||||
|
||||
return VLC_EGENERIC;
|
||||
}
|
Loading…
Reference in New Issue
Block a user