Skip to content
Snippets Groups Projects
Commit e36b639f authored by Luca Barbato's avatar Luca Barbato
Browse files

Partially address issue299, no performance change apparently

Originally committed as revision 11303 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 8d8d178d
No related branches found
No related tags found
No related merge requests found
...@@ -51,6 +51,27 @@ ...@@ -51,6 +51,27 @@
dst += stride;\ dst += stride;\
src += stride; src += stride;
#define CHROMA_MC8_ALTIVEC_CORE_SIMPLE \
vsrc2ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc2uc);\
\
psum = vec_mladd(vA, vsrc0ssH, v32ss);\
psum = vec_mladd(vB, vsrc1ssH, psum);\
psum = vec_mladd(vC, vsrc2ssH, psum);\
psum = vec_sr(psum, v6us);\
\
vdst = vec_ld(0, dst);\
ppsum = (vec_u8_t)vec_pack(psum, psum);\
vfdst = vec_perm(vdst, ppsum, fperm);\
\
OP_U8_ALTIVEC(fsum, vfdst, vdst);\
\
vec_st(fsum, 0, dst);\
\
vsrc0ssH = vsrc1ssH;\
vsrc1ssH = vsrc2ssH;\
\
dst += stride;\
src += stride;
void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) { void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, int h, int x, int y) {
POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1); POWERPC_PERF_DECLARE(PREFIX_h264_chroma_mc8_num, 1);
...@@ -109,6 +130,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in ...@@ -109,6 +130,7 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc); vsrc0ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc0uc);
vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc); vsrc1ssH = (vec_s16_t)vec_mergeh(zero_u8v,(vec_u8_t)vsrc1uc);
if (ABCD[3]) {
if (!loadSecond) {// -> !reallyBadAlign if (!loadSecond) {// -> !reallyBadAlign
for (i = 0 ; i < h ; i++) { for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(stride + 0, src); vsrcCuc = vec_ld(stride + 0, src);
...@@ -130,6 +152,26 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in ...@@ -130,6 +152,26 @@ void PREFIX_h264_chroma_mc8_altivec(uint8_t * dst, uint8_t * src, int stride, in
CHROMA_MC8_ALTIVEC_CORE CHROMA_MC8_ALTIVEC_CORE
} }
}
} else {
if (!loadSecond) {// -> !reallyBadAlign
for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(stride + 0, src);
vsrc2uc = vec_perm(vsrcCuc, vsrcCuc, vsrcperm0);
CHROMA_MC8_ALTIVEC_CORE_SIMPLE
}
} else {
vec_u8_t vsrcDuc;
for (i = 0 ; i < h ; i++) {
vsrcCuc = vec_ld(stride + 0, src);
vsrcDuc = vec_ld(stride + 16, src);
vsrc2uc = vec_perm(vsrcCuc, vsrcDuc, vsrcperm0);
CHROMA_MC8_ALTIVEC_CORE_SIMPLE
}
}
} }
POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1); POWERPC_PERF_STOP_COUNT(PREFIX_h264_chroma_mc8_num, 1);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment