Skip to content
Snippets Groups Projects
Commit 2fdf9cb2 authored by Falk Hüffner's avatar Falk Hüffner
Browse files

Communicate proper aliasing to gcc (needed for 4.1).

Originally committed as revision 4384 to svn://svn.ffmpeg.org/ffmpeg/trunk
parent 1a2f5491
No related branches found
No related tags found
No related merge requests found
...@@ -56,13 +56,33 @@ static inline uint64_t WORD_VEC(uint64_t x) ...@@ -56,13 +56,33 @@ static inline uint64_t WORD_VEC(uint64_t x)
return x; return x;
} }
#define ldq(p) (*(const uint64_t *) (p))
#define ldl(p) (*(const int32_t *) (p))
#define stl(l, p) do { *(uint32_t *) (p) = (l); } while (0)
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
#define sextw(x) ((int16_t) (x)) #define sextw(x) ((int16_t) (x))
#ifdef __GNUC__ #ifdef __GNUC__
#define ldq(p) \
(((union { \
uint64_t __l; \
__typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \
} *) (p))->__l)
#define ldl(p) \
(((union { \
int32_t __l; \
__typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \
} *) (p))->__l)
#define stq(l, p) \
do { \
(((union { \
uint64_t __l; \
__typeof__(*(p)) __s[sizeof (uint64_t) / sizeof *(p)]; \
} *) (p))->__l) = l; \
} while (0)
#define stl(l, p) \
do { \
(((union { \
int32_t __l; \
__typeof__(*(p)) __s[sizeof (int32_t) / sizeof *(p)]; \
} *) (p))->__l) = l; \
} while (0)
struct unaligned_long { uint64_t l; } __attribute__((packed)); struct unaligned_long { uint64_t l; } __attribute__((packed));
#define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul)) #define ldq_u(p) (*(const uint64_t *) (((uint64_t) (p)) & ~7ul))
#define uldq(a) (((const struct unaligned_long *) (a))->l) #define uldq(a) (((const struct unaligned_long *) (a))->l)
...@@ -132,6 +152,10 @@ struct unaligned_long { uint64_t l; } __attribute__((packed)); ...@@ -132,6 +152,10 @@ struct unaligned_long { uint64_t l; } __attribute__((packed));
#elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */ #elif defined(__DECC) /* Digital/Compaq/hp "ccc" compiler */
#include <c_asm.h> #include <c_asm.h>
#define ldq(p) (*(const uint64_t *) (p))
#define ldl(p) (*(const int32_t *) (p))
#define stq(l, p) do { *(uint64_t *) (p) = (l); } while (0)
#define stl(l, p) do { *(int32_t *) (p) = (l); } while (0)
#define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a) #define ldq_u(a) asm ("ldq_u %v0,0(%a0)", a)
#define uldq(a) (*(const __unaligned uint64_t *) (a)) #define uldq(a) (*(const __unaligned uint64_t *) (a))
#define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b) #define cmpbge(a, b) asm ("cmpbge %a0,%a1,%v0", a, b)
......
...@@ -235,25 +235,22 @@ static inline void idct_col2(DCTELEM *col) ...@@ -235,25 +235,22 @@ static inline void idct_col2(DCTELEM *col)
{ {
int i; int i;
uint64_t l, r; uint64_t l, r;
uint64_t *lcol = (uint64_t *) col;
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
int_fast32_t a0 = col[0] + (1 << (COL_SHIFT - 1)) / W4; int_fast32_t a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
a0 *= W4; a0 *= W4;
col[0] = a0 >> COL_SHIFT; col[i] = a0 >> COL_SHIFT;
++col;
} }
l = lcol[0]; l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
r = lcol[1]; stq(l, col + 2 * 4); stq(r, col + 3 * 4);
lcol[ 2] = l; lcol[ 3] = r; stq(l, col + 4 * 4); stq(r, col + 5 * 4);
lcol[ 4] = l; lcol[ 5] = r; stq(l, col + 6 * 4); stq(r, col + 7 * 4);
lcol[ 6] = l; lcol[ 7] = r; stq(l, col + 8 * 4); stq(r, col + 9 * 4);
lcol[ 8] = l; lcol[ 9] = r; stq(l, col + 10 * 4); stq(r, col + 11 * 4);
lcol[10] = l; lcol[11] = r; stq(l, col + 12 * 4); stq(r, col + 13 * 4);
lcol[12] = l; lcol[13] = r; stq(l, col + 14 * 4); stq(r, col + 15 * 4);
lcol[14] = l; lcol[15] = r;
} }
void simple_idct_axp(DCTELEM *block) void simple_idct_axp(DCTELEM *block)
...@@ -275,22 +272,20 @@ void simple_idct_axp(DCTELEM *block) ...@@ -275,22 +272,20 @@ void simple_idct_axp(DCTELEM *block)
if (rowsZero) { if (rowsZero) {
idct_col2(block); idct_col2(block);
} else if (rowsConstant) { } else if (rowsConstant) {
uint64_t *lblock = (uint64_t *) block;
idct_col(block); idct_col(block);
for (i = 0; i < 8; i += 2) { for (i = 0; i < 8; i += 2) {
uint64_t v = (uint16_t) block[i * 8]; uint64_t v = (uint16_t) block[0];
uint64_t w = (uint16_t) block[i * 8 + 8]; uint64_t w = (uint16_t) block[8];
v |= v << 16; v |= v << 16;
w |= w << 16; w |= w << 16;
v |= v << 32; v |= v << 32;
w |= w << 32; w |= w << 32;
lblock[0] = v; stq(v, block + 0 * 4);
lblock[1] = v; stq(v, block + 1 * 4);
lblock[2] = w; stq(w, block + 2 * 4);
lblock[3] = w; stq(w, block + 3 * 4);
lblock += 4; block += 4 * 4;
} }
} else { } else {
for (i = 0; i < 8; i++) for (i = 0; i < 8; i++)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment