add missing source file

2008-10-03 16:03:15 +00:00 · 2008-10-03 16:03:15 +00:00 · e8b45f1de4
commit e8b45f1de4
parent 50c9bd86cb
3 changed files with 412 additions and 101 deletions
--- a/dct_decode_mmx.c
+++ b/dct_decode_mmx.c
@ -0,0 +1,409 @@
 /********************************************************************
 *                                                                  *
 * THIS FILE IS PART OF THE OggTheora SOFTWARE CODEC SOURCE CODE.   *
 * USE, DISTRIBUTION AND REPRODUCTION OF THIS LIBRARY SOURCE IS     *
 * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE *
 * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING.       *
 *                                                                  *
 * THE Theora SOURCE CODE IS COPYRIGHT (C) 2002-2008                *
 * by the Xiph.Org Foundation http://www.xiph.org/                  *
 *                                                                  *
 ********************************************************************
  function:
  last mod: $Id$
 ********************************************************************/
 #include <stdlib.h>
 #include "codec_internal.h"
 #if defined(USE_ASM)
 static const __attribute__((aligned(8),used)) ogg_int64_t OC_V3=
 0x0003000300030003LL;
 static const __attribute__((aligned(8),used)) ogg_int64_t OC_V4=
 0x0004000400040004LL;
 static void loop_filter_v(unsigned char *_pix,int _ystride,
                          const ogg_int16_t *_ll){
  long esi;
  _pix-=_ystride*2;
  __asm__ __volatile__(
    /*mm0=0*/
    "pxor %%mm0,%%mm0\n\t"
    /*esi=_ystride*3*/
    "lea (%[ystride],%[ystride],2),%[s]\n\t"
    /*mm7=_pix[0...8]*/
    "movq (%[pix]),%%mm7\n\t"
    /*mm4=_pix[0...8+_ystride*3]*/
    "movq (%[pix],%[s]),%%mm4\n\t"
    /*mm6=_pix[0...8]*/
    "movq %%mm7,%%mm6\n\t"
    /*Expand unsigned _pix[0...3] to 16 bits.*/
    "punpcklbw %%mm0,%%mm6\n\t"
    "movq %%mm4,%%mm5\n\t"
    /*Expand unsigned _pix[4...8] to 16 bits.*/
    "punpckhbw %%mm0,%%mm7\n\t"
    /*Expand other arrays too.*/
    "punpcklbw %%mm0,%%mm4\n\t"
    "punpckhbw %%mm0,%%mm5\n\t"
    /*mm7:mm6=_p[0...8]-_p[0...8+_ystride*3]:*/
    "psubw %%mm4,%%mm6\n\t"
    "psubw %%mm5,%%mm7\n\t"
    /*mm5=mm4=_pix[0...8+_ystride]*/
    "movq (%[pix],%[ystride]),%%mm4\n\t"
    /*mm1=mm3=mm2=_pix[0..8]+_ystride*2]*/
    "movq (%[pix],%[ystride],2),%%mm2\n\t"
    "movq %%mm4,%%mm5\n\t"
    "movq %%mm2,%%mm3\n\t"
    "movq %%mm2,%%mm1\n\t"
    /*Expand these arrays.*/
    "punpckhbw %%mm0,%%mm5\n\t"
    "punpcklbw %%mm0,%%mm4\n\t"
    "punpckhbw %%mm0,%%mm3\n\t"
    "punpcklbw %%mm0,%%mm2\n\t"
    /*Preload...*/
    "movq %[OC_V3],%%mm0\n\t"
    /*mm3:mm2=_pix[0...8+_ystride*2]-_pix[0...8+_ystride]*/
    "psubw %%mm5,%%mm3\n\t"
    "psubw %%mm4,%%mm2\n\t"
    /*Scale by 3.*/
    "pmullw %%mm0,%%mm3\n\t"
    "pmullw %%mm0,%%mm2\n\t"
    /*Preload...*/
    "movq %[OC_V4],%%mm0\n\t"
    /*f=mm3:mm2==_pix[0...8]-_pix[0...8+_ystride*3]+
       3*(_pix[0...8+_ystride*2]-_pix[0...8+_ystride])*/
    "paddw %%mm7,%%mm3\n\t"
    "paddw %%mm6,%%mm2\n\t"
    /*Add 4.*/
    "paddw %%mm0,%%mm3\n\t"
    "paddw %%mm0,%%mm2\n\t"
    /*"Divide" by 8.*/
    "psraw $3,%%mm3\n\t"
    "psraw $3,%%mm2\n\t"
    /*Now compute lflim of mm3:mm2 cf. Section 7.10 of the sepc.*/
    /*Free up mm5.*/
    "packuswb %%mm5,%%mm4\n\t"
    /*mm0=L L L L*/
    "movq (%[ll]),%%mm0\n\t"
    /*if(R_i<-2L||R_i>2L)R_i=0:*/
    "movq %%mm2,%%mm5\n\t"
    "pxor %%mm6,%%mm6\n\t"
    "movq %%mm0,%%mm7\n\t"
    "psubw %%mm0,%%mm6\n\t"
    "psllw $1,%%mm7\n\t"
    "psllw $1,%%mm6\n\t"
    /*mm2==R_3 R_2 R_1 R_0*/
    /*mm5==R_3 R_2 R_1 R_0*/
    /*mm6==-2L -2L -2L -2L*/
    /*mm7==2L 2L 2L 2L*/
    "pcmpgtw %%mm2,%%mm7\n\t"
    "pcmpgtw %%mm6,%%mm5\n\t"
    "pand %%mm7,%%mm2\n\t"
    "movq %%mm0,%%mm7\n\t"
    "pand %%mm5,%%mm2\n\t"
    "psllw $1,%%mm7\n\t"
    "movq %%mm3,%%mm5\n\t"
    /*mm3==R_7 R_6 R_5 R_4*/
    /*mm5==R_7 R_6 R_5 R_4*/
    /*mm6==-2L -2L -2L -2L*/
    /*mm7==2L 2L 2L 2L*/
    "pcmpgtw %%mm3,%%mm7\n\t"
    "pcmpgtw %%mm6,%%mm5\n\t"
    "pand %%mm7,%%mm3\n\t"
    "movq %%mm0,%%mm7\n\t"
    "pand %%mm5,%%mm3\n\t"
    /*if(R_i<-L)R_i'=R_i+2L;
      if(R_i>L)R_i'=R_i-2L;
      if(R_i<-L||R_i>L)R_i=-R_i':*/
    "psraw $1,%%mm6\n\t"
    "movq %%mm2,%%mm5\n\t"
    "psllw $1,%%mm7\n\t"
    /*mm2==R_3 R_2 R_1 R_0*/
    /*mm5==R_3 R_2 R_1 R_0*/
    /*mm6==-L -L -L -L*/
    /*mm0==L L L L*/
    /*mm5=R_i>L?FF:00*/
    "pcmpgtw %%mm0,%%mm5\n\t"
    /*mm6=-L>R_i?FF:00*/
    "pcmpgtw %%mm2,%%mm6\n\t"
    /*mm7=R_i>L?2L:0*/
    "pand %%mm5,%%mm7\n\t"
    /*mm2=R_i>L?R_i-2L:R_i*/
    "psubw %%mm7,%%mm2\n\t"
    "movq %%mm0,%%mm7\n\t"
    /*mm5=-L>R_i||R_i>L*/
    "por %%mm6,%%mm5\n\t"
    "psllw $1,%%mm7\n\t"
    /*mm7=-L>R_i?2L:0*/
    "pand %%mm6,%%mm7\n\t"
    "pxor %%mm6,%%mm6\n\t"
    /*mm2=-L>R_i?R_i+2L:R_i*/
    "paddw %%mm7,%%mm2\n\t"
    "psubw %%mm0,%%mm6\n\t"
    /*mm5=-L>R_i||R_i>L?-R_i':0*/
    "pand %%mm2,%%mm5\n\t"
    "movq %%mm0,%%mm7\n\t"
    /*mm2=-L>R_i||R_i>L?0:R_i*/
    "psubw %%mm5,%%mm2\n\t"
    "psllw $1,%%mm7\n\t"
    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
    "psubw %%mm5,%%mm2\n\t"
    "movq %%mm3,%%mm5\n\t"
    /*mm3==R_7 R_6 R_5 R_4*/
    /*mm5==R_7 R_6 R_5 R_4*/
    /*mm6==-L -L -L -L*/
    /*mm0==L L L L*/
    /*mm6=-L>R_i?FF:00*/
    "pcmpgtw %%mm3,%%mm6\n\t"
    /*mm5=R_i>L?FF:00*/
    "pcmpgtw %%mm0,%%mm5\n\t"
    /*mm7=R_i>L?2L:0*/
    "pand %%mm5,%%mm7\n\t"
    /*mm2=R_i>L?R_i-2L:R_i*/
    "psubw %%mm7,%%mm3\n\t"
    "psllw $1,%%mm0\n\t"
    /*mm5=-L>R_i||R_i>L*/
    "por %%mm6,%%mm5\n\t"
    /*mm0=-L>R_i?2L:0*/
    "pand %%mm6,%%mm0\n\t"
    /*mm3=-L>R_i?R_i+2L:R_i*/
    "paddw %%mm0,%%mm3\n\t"
    /*mm5=-L>R_i||R_i>L?-R_i':0*/
    "pand %%mm3,%%mm5\n\t"
    /*mm2=-L>R_i||R_i>L?0:R_i*/
    "psubw %%mm5,%%mm3\n\t"
    /*mm2=-L>R_i||R_i>L?-R_i':R_i*/
    "psubw %%mm5,%%mm3\n\t"
    /*Unfortunately, there's no unsigned byte+signed byte with unsigned
       saturation op code, so we have to promote things back 16 bits.*/
    "pxor %%mm0,%%mm0\n\t"
    "movq %%mm4,%%mm5\n\t"
    "punpcklbw %%mm0,%%mm4\n\t"
    "punpckhbw %%mm0,%%mm5\n\t"
    "movq %%mm1,%%mm6\n\t"
    "punpcklbw %%mm0,%%mm1\n\t"
    "punpckhbw %%mm0,%%mm6\n\t"
    /*_pix[0...8+_ystride]+=R_i*/
    "paddw %%mm2,%%mm4\n\t"
    "paddw %%mm3,%%mm5\n\t"
    /*_pix[0...8+_ystride*2]-=R_i*/
    "psubw %%mm2,%%mm1\n\t"
    "psubw %%mm3,%%mm6\n\t"
    "packuswb %%mm5,%%mm4\n\t"
    "packuswb %%mm6,%%mm1\n\t"
    /*Write it back out.*/
    "movq %%mm4,(%[pix],%[ystride])\n\t"
    "movq %%mm1,(%[pix],%[ystride],2)\n\t"
    :[s]"=&S"(esi)
    :[pix]"r"(_pix),[ystride]"r"((long)_ystride),[ll]"r"(_ll),
     [OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4)
    :"memory"
  );
 }
 /*This code implements the bulk of loop_filter_h().
  Data are striped p0 p1 p2 p3 ... p0 p1 p2 p3 ..., so in order to load all
   four p0's to one register we must transpose the values in four mmx regs.
  When half is done we repeat this for the rest.*/
 static void loop_filter_h4(unsigned char *_pix,long _ystride,
                           const ogg_int16_t *_ll){
  long esi;
  long edi;
  __asm__ __volatile__(
    /*x x x x 3 2 1 0*/
    "movd (%[pix]),%%mm0\n\t"
    /*esi=_ystride*3*/
    "lea (%[ystride],%[ystride],2),%[s]\n\t"
    /*x x x x 7 6 5 4*/
    "movd (%[pix],%[ystride]),%%mm1\n\t"
    /*x x x x B A 9 8*/
    "movd (%[pix],%[ystride],2),%%mm2\n\t"
    /*x x x x F E D C*/
    "movd (%[pix],%[s]),%%mm3\n\t"
    /*mm0=7 3 6 2 5 1 4 0*/
    "punpcklbw %%mm1,%%mm0\n\t"
    /*mm2=F B E A D 9 C 8*/
    "punpcklbw %%mm3,%%mm2\n\t"
    /*mm1=7 3 6 2 5 1 4 0*/
    "movq %%mm0,%%mm1\n\t"
    /*mm0=F B 7 3 E A 6 2*/
    "punpckhwd %%mm2,%%mm0\n\t"
    /*mm1=D 9 5 1 C 8 4 0*/
    "punpcklwd %%mm2,%%mm1\n\t"
    "pxor %%mm7,%%mm7\n\t"
    /*mm5=D 9 5 1 C 8 4 0*/
    "movq %%mm1,%%mm5\n\t"
    /*mm1=x C x 8 x 4 x 0==pix[0]*/
    "punpcklbw %%mm7,%%mm1\n\t"
    /*mm5=x D x 9 x 5 x 1==pix[1]*/
    "punpckhbw %%mm7,%%mm5\n\t"
    /*mm3=F B 7 3 E A 6 2*/
    "movq %%mm0,%%mm3\n\t"
    /*mm0=x E x A x 6 x 2==pix[2]*/
    "punpcklbw %%mm7,%%mm0\n\t"
    /*mm3=x F x B x 7 x 3==pix[3]*/
    "punpckhbw %%mm7,%%mm3\n\t"
    /*mm1=mm1-mm3==pix[0]-pix[3]*/
    "psubw %%mm3,%%mm1\n\t"
    /*Save a copy of pix[2] for later.*/
    "movq %%mm0,%%mm4\n\t"
    /*mm0=mm0-mm5==pix[2]-pix[1]*/
    "psubw %%mm5,%%mm0\n\t"
    /*Scale by 3.*/
    "pmullw %[OC_V3],%%mm0\n\t"
    /*f=mm1==_pix[0]-_pix[3]+ 3*(_pix[2]-_pix[1])*/
    "paddw %%mm1,%%mm0\n\t"
    /*Add 4.*/
    "paddw %[OC_V4],%%mm0\n\t"
    /*"Divide" by 8, producing the residuals R_i.*/
    "psraw $3,%%mm0\n\t"
    /*Now compute lflim of mm0 cf. Section 7.10 of the sepc.*/
    /*mm6=L L L L*/
    "movq (%[ll]),%%mm6\n\t"
    /*if(R_i<-2L||R_i>2L)R_i=0:*/
    "movq %%mm0,%%mm1\n\t"
    "pxor %%mm2,%%mm2\n\t"
    "movq %%mm6,%%mm3\n\t"
    "psubw %%mm6,%%mm2\n\t"
    "psllw $1,%%mm3\n\t"
    "psllw $1,%%mm2\n\t"
    /*mm0==R_3 R_2 R_1 R_0*/
    /*mm1==R_3 R_2 R_1 R_0*/
    /*mm2==-2L -2L -2L -2L*/
    /*mm3==2L 2L 2L 2L*/
    "pcmpgtw %%mm0,%%mm3\n\t"
    "pcmpgtw %%mm2,%%mm1\n\t"
    "pand %%mm3,%%mm0\n\t"
    "pand %%mm1,%%mm0\n\t"
    /*if(R_i<-L)R_i'=R_i+2L;
      if(R_i>L)R_i'=R_i-2L;
      if(R_i<-L||R_i>L)R_i=-R_i':*/
    "psraw $1,%%mm2\n\t"
    "movq %%mm0,%%mm1\n\t"
    "movq %%mm6,%%mm3\n\t"
    /*mm0==R_3 R_2 R_1 R_0*/
    /*mm1==R_3 R_2 R_1 R_0*/
    /*mm2==-L -L -L -L*/
    /*mm6==L L L L*/
    /*mm2=-L>R_i?FF:00*/
    "pcmpgtw %%mm0,%%mm2\n\t"
    /*mm1=R_i>L?FF:00*/
    "pcmpgtw %%mm6,%%mm1\n\t"
    /*mm3=2L 2L 2L 2L*/
    "psllw $1,%%mm3\n\t"
    /*mm6=2L 2L 2L 2L*/
    "psllw $1,%%mm6\n\t"
    /*mm3=R_i>L?2L:0*/
    "pand %%mm1,%%mm3\n\t"
    /*mm6=-L>R_i?2L:0*/
    "pand %%mm2,%%mm6\n\t"
    /*mm0=R_i>L?R_i-2L:R_i*/
    "psubw %%mm3,%%mm0\n\t"
    /*mm1=-L>R_i||R_i>L*/
    "por %%mm2,%%mm1\n\t"
    /*mm0=-L>R_i?R_i+2L:R_i*/
    "paddw %%mm6,%%mm0\n\t"
    /*mm1=-L>R_i||R_i>L?R_i':0*/
    "pand %%mm0,%%mm1\n\t"
    /*mm0=-L>R_i||R_i>L?0:R_i*/
    "psubw %%mm1,%%mm0\n\t"
    /*mm0=-L>R_i||R_i>L?-R_i':R_i*/
    "psubw %%mm1,%%mm0\n\t"
    /*_pix[1]+=R_i;*/
    "paddw %%mm0,%%mm5\n\t"
    /*_pix[2]-=R_i;*/
    "psubw %%mm0,%%mm4\n\t"
    /*mm5=x x x x D 9 5 1*/
    "packuswb %%mm7,%%mm5\n\t"
    /*mm4=x x x x E A 6 2*/
    "packuswb %%mm7,%%mm4\n\t"
    /*mm5=E D A 9 6 5 2 1*/
    "punpcklbw %%mm4,%%mm5\n\t"
    /*edi=6 5 2 1*/
    "movd %%mm5,%%edi\n\t"
    "movw %%di,1(%[pix])\n\t"
    /*Why is there such a big stall here?*/
    "psrlq $32,%%mm5\n\t"
    "shrl $16,%%edi\n\t"
    "movw %%di,1(%[pix],%[ystride])\n\t"
    /*edi=E D A 9*/
    "movd %%mm5,%%edi\n\t"
    "movw %%di,1(%[pix],%[ystride],2)\n\t"
    "shrl $16,%%edi\n\t"
    "movw %%di,1(%[pix],%[s])\n\t"
    :[s]"=&S"(esi),[d]"=&D"(edi),
     [pix]"+r"(_pix),[ystride]"+r"(_ystride),[ll]"+r"(_ll)
    :[OC_V3]"m"(OC_V3),[OC_V4]"m"(OC_V4)
    :"memory"
  );
 }
 static void loop_filter_h(unsigned char *_pix,int _ystride,
                          const ogg_int16_t *_ll){
  _pix-=2;
  loop_filter_h4(_pix,_ystride,_ll);
  loop_filter_h4(_pix+(_ystride<<2),_ystride,_ll);
 }
 static void loop_filter_mmx(PB_INSTANCE *pbi, int FLimit){
  int j;
  ogg_int16_t __attribute__((aligned(8)))  ll[4];
  unsigned char *cp = pbi->display_fragments;
  ogg_uint32_t *bp = pbi->recon_pixel_index_table;
  if ( FLimit == 0 ) return;
  ll[0]=ll[1]=ll[2]=ll[3]=FLimit;
  for ( j = 0; j < 3 ; j++){
    ogg_uint32_t *bp_begin = bp;
    ogg_uint32_t *bp_end;
    int stride;
    int h;
    switch(j) {
    case 0: /* y */
      bp_end = bp + pbi->YPlaneFragments;
      h = pbi->HFragments;
      stride = pbi->YStride;
      break;
    default: /* u,v, 4:20 specific */
      bp_end = bp + pbi->UVPlaneFragments;
      h = pbi->HFragments >> 1;
      stride = pbi->UVStride;
      break;
    }
    while(bp<bp_end){
      ogg_uint32_t *bp_left = bp;
      ogg_uint32_t *bp_right = bp + h;
      while(bp<bp_right){
        if(cp[0]){
          if(bp>bp_left)
            loop_filter_h(&pbi->LastFrameRecon[bp[0]],stride,ll);
          if(bp_left>bp_begin)
            loop_filter_v(&pbi->LastFrameRecon[bp[0]],stride,ll);
          if(bp+1<bp_right && !cp[1])
            loop_filter_h(&pbi->LastFrameRecon[bp[0]]+8,stride,ll);
          if(bp+h<bp_end && !cp[h])
            loop_filter_v(&pbi->LastFrameRecon[bp[h]],stride,ll);
        }
        bp++;
        cp++;
      }
    }
  }
  __asm__ __volatile__("emms\n\t");
 }
 /* install our implementation in the function table */
 void dsp_mmx_dct_decode_init(DspFunctions *funcs)
 {
  funcs->LoopFilter = loop_filter_mmx;
 }
 #endif /* USE_ASM */
--- a/libtheora-x86_64.patch
+++ b/libtheora-x86_64.patch
@ -1,98 +0,0 @@
 diff -up libtheora-1.0RC1/lib/Makefile.am.x86_64 libtheora-1.0RC1/lib/Makefile.am
 --- libtheora-1.0RC1/lib/Makefile.am.x86_64	2008-10-03 11:52:51.000000000 -0400
 +++ libtheora-1.0RC1/lib/Makefile.am	2008-10-03 11:53:08.000000000 -0400
@@ -52,7 +52,6 @@ encoder_sources = \
 if CPU_x86_64
 enc_arch_dir = enc/x86_64
 encoder_arch_sources= \
 -	$(enc_arch_dir)/dct_decode_mmx.c \
 	$(enc_arch_dir)/dsp_mmx.c \
 	$(enc_arch_dir)/dsp_mmxext.c \
 	$(enc_arch_dir)/recon_mmx.c \
 diff -up libtheora-1.0RC1/lib/Makefile.in.x86_64 libtheora-1.0RC1/lib/Makefile.in
 --- libtheora-1.0RC1/lib/Makefile.in.x86_64	2008-10-03 11:54:42.000000000 -0400
 +++ libtheora-1.0RC1/lib/Makefile.in	2008-10-03 11:55:05.000000000 -0400
@@ -38,10 +38,7 @@ subdir = lib
 DIST_COMMON = $(noinst_HEADERS) $(srcdir)/Makefile.am \
 	$(srcdir)/Makefile.in
 ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
 -am__aclocal_m4_deps = $(top_srcdir)/m4/as-ac-expand.m4 \
 -	$(top_srcdir)/m4/ogg.m4 $(top_srcdir)/m4/pkg.m4 \
 -	$(top_srcdir)/m4/sdl.m4 $(top_srcdir)/m4/vorbis.m4 \
 -	$(top_srcdir)/configure.ac
 +am__aclocal_m4_deps = $(top_srcdir)/configure.ac
 am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
 	$(ACLOCAL_M4)
 mkinstalldirs = $(install_sh) -d
@@ -84,7 +81,7 @@ am__objects_3 = apiwrapper.lo bitpack.lo
 @CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@	recon_mmx.lo \
 @CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@	idct_mmx.lo \
 @CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@	fdct_mmx.lo
 -@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_4 = dct_decode_mmx.lo \
 +@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@am__objects_4 =  \
 @CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@	dsp_mmx.lo \
 @CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@	dsp_mmxext.lo \
 @CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@	recon_mmx.lo \
@@ -119,7 +116,6 @@ libtheoradec_la_OBJECTS = $(am_libtheora
 libtheoradec_la_LINK = $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) \
 	$(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \
 	$(libtheoradec_la_LDFLAGS) $(LDFLAGS) -o $@
 -libtheoraenc_la_LIBADD = libtheoradec.la
 libtheoraenc_la_DEPENDENCIES = libtheoradec.la
 am__libtheoraenc_la_SOURCES_DIST = cpu.c \
 	$(enc_arch_dir)/dct_decode_mmx.c $(enc_arch_dir)/dsp_mmx.c \
@@ -166,7 +162,6 @@ AUTOCONF = @AUTOCONF@
 AUTOHEADER = @AUTOHEADER@
 AUTOMAKE = @AUTOMAKE@
 AWK = @AWK@
 -BINDIR = @BINDIR@
 BUILDABLE_EXAMPLES = @BUILDABLE_EXAMPLES@
 CC = @CC@
 CCDEPMODE = @CCDEPMODE@
@@ -182,7 +177,6 @@ DEBUG = @DEBUG@
 DEFS = @DEFS@
 DEPDIR = @DEPDIR@
 DLLTOOL = @DLLTOOL@
 -DOCDIR = @DOCDIR@
 DSYMUTIL = @DSYMUTIL@
 ECHO = @ECHO@
 ECHO_C = @ECHO_C@
@@ -200,14 +194,12 @@ HAVE_PDFLATEX = @HAVE_PDFLATEX@
 HAVE_PKG_CONFIG = @HAVE_PKG_CONFIG@
 HAVE_TRANSFIG = @HAVE_TRANSFIG@
 HAVE_VALGRIND = @HAVE_VALGRIND@
 -INCLUDEDIR = @INCLUDEDIR@
 INSTALL = @INSTALL@
 INSTALL_DATA = @INSTALL_DATA@
 INSTALL_PROGRAM = @INSTALL_PROGRAM@
 INSTALL_SCRIPT = @INSTALL_SCRIPT@
 INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
 LDFLAGS = @LDFLAGS@
 -LIBDIR = @LIBDIR@
 LIBOBJS = @LIBOBJS@
 LIBS = @LIBS@
 LIBTOOL = @LIBTOOL@
@@ -311,6 +303,7 @@ target_alias = @target_alias@
 target_cpu = @target_cpu@
 target_os = @target_os@
 target_vendor = @target_vendor@
 +top_build_prefix = @top_build_prefix@
 top_builddir = @top_builddir@
 top_srcdir = @top_srcdir@
 INCLUDES = -I$(top_srcdir)/include -I$(top_srcdir)/lib -I$(top_srcdir)/lib/dec -I$(top_srcdir)/lib/enc
@@ -372,7 +365,6 @@ lib_LTLIBRARIES = libtheoradec.la libthe
 @CPU_x86_32_TRUE@@CPU_x86_64_FALSE@@THEORA_DISABLE_ENCODE_FALSE@	$(enc_arch_dir)/fdct_mmx.c
 @CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@encoder_arch_sources = \
 -@CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@	$(enc_arch_dir)/dct_decode_mmx.c \
 @CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@	$(enc_arch_dir)/dsp_mmx.c \
 @CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@	$(enc_arch_dir)/dsp_mmxext.c \
 @CPU_x86_64_TRUE@@THEORA_DISABLE_ENCODE_FALSE@	$(enc_arch_dir)/recon_mmx.c \
@@ -448,6 +440,7 @@ libtheoraenc_la_LDFLAGS = \
   -version-info @THENC_LIB_CURRENT@:@THENC_LIB_REVISION@:@THENC_LIB_AGE@ \
   @THEORAENC_LDFLAGS@ $(OGG_LIBS)
 +libtheoraenc_la_LIBADD = libtheoradec.la
 libtheora_la_SOURCES = \
   cpu.c \
 	$(decoder_arch_sources) \
--- a/libtheora.spec
+++ b/libtheora.spec
@ -7,8 +7,8 @@ License: BSD
 Group: System Environment/Libraries
 URL: http://www.theora.org
 Source0: http://downloads.xiph.org/releases/theora/libtheora-1.0RC1.tar.bz2
-# fix the build on x86-64
+# missing from tarball
-Patch0: libtheora-x86_64.patch
+Source1: dct_decode_mmx.c
 Patch2: libtheora-1.0beta3-undef-nonweak-symbols.patch
 BuildRoot: %{_tmppath}/%{name}-%{version}-%{release}-root-%(%{__id_u} -n) 
 BuildRequires: libogg-devel >= 2:1.1
@ -57,7 +57,7 @@ with theora bitstreams.
 %prep
 %setup -q -n libtheora-1.0RC1
-%patch0 -p1 -b .x86_64
+cp %{SOURCE1} lib/enc/x86_64
 %patch2 -p1
 # no custom CFLAGS please
 sed -i 's/CFLAGS="$CFLAGS $cflags_save"/CFLAGS="$cflags_save"/g' configure