yadif.h

Go to the documentation of this file.
00001 /*
00002  * Copyright (C) 2006 Michael Niedermayer <michaelni@gmx.at>
00003  *
00004  * This file is part of MPlayer.
00005  *
00006  * MPlayer is free software; you can redistribute it and/or modify
00007  * it under the terms of the GNU General Public License as published by
00008  * the Free Software Foundation; either version 2 of the License, or
00009  * (at your option) any later version.
00010  *
00011  * MPlayer is distributed in the hope that it will be useful,
00012  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00013  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014  * GNU General Public License for more details.
00015  *
00016  * You should have received a copy of the GNU General Public License along
00017  * with MPlayer; if not, write to the Free Software Foundation, Inc.,
00018  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
00019  */
00020 
00021 /* */
00022 #if defined(CAN_COMPILE_SSE2) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ > 0))
00023 
00024 #define HAVE_YADIF_SSE2
00025 
00026 #define LOAD4(mem,dst) \
00027             "movd      "mem", "#dst" \n\t"\
00028             "punpcklbw %%mm7, "#dst" \n\t"
00029 
00030 #define PABS(tmp,dst) \
00031             "pxor     "#tmp", "#tmp" \n\t"\
00032             "psubw    "#dst", "#tmp" \n\t"\
00033             "pmaxsw   "#tmp", "#dst" \n\t"
00034 
00035 #define CHECK(pj,mj) \
00036             "movq "#pj"(%[cur],%[mrefs]), %%mm2 \n\t" /* cur[x-refs-1+j] */\
00037             "movq "#mj"(%[cur],%[prefs]), %%mm3 \n\t" /* cur[x+refs-1-j] */\
00038             "movq      %%mm2, %%mm4 \n\t"\
00039             "movq      %%mm2, %%mm5 \n\t"\
00040             "pxor      %%mm3, %%mm4 \n\t"\
00041             "pavgb     %%mm3, %%mm5 \n\t"\
00042             "pand     %[pb1], %%mm4 \n\t"\
00043             "psubusb   %%mm4, %%mm5 \n\t"\
00044             "psrlq     $8,    %%mm5 \n\t"\
00045             "punpcklbw %%mm7, %%mm5 \n\t" /* (cur[x-refs+j] + cur[x+refs-j])>>1 */\
00046             "movq      %%mm2, %%mm4 \n\t"\
00047             "psubusb   %%mm3, %%mm2 \n\t"\
00048             "psubusb   %%mm4, %%mm3 \n\t"\
00049             "pmaxub    %%mm3, %%mm2 \n\t"\
00050             "movq      %%mm2, %%mm3 \n\t"\
00051             "movq      %%mm2, %%mm4 \n\t" /* ABS(cur[x-refs-1+j] - cur[x+refs-1-j]) */\
00052             "psrlq      $8,   %%mm3 \n\t" /* ABS(cur[x-refs  +j] - cur[x+refs  -j]) */\
00053             "psrlq     $16,   %%mm4 \n\t" /* ABS(cur[x-refs+1+j] - cur[x+refs+1-j]) */\
00054             "punpcklbw %%mm7, %%mm2 \n\t"\
00055             "punpcklbw %%mm7, %%mm3 \n\t"\
00056             "punpcklbw %%mm7, %%mm4 \n\t"\
00057             "paddw     %%mm3, %%mm2 \n\t"\
00058             "paddw     %%mm4, %%mm2 \n\t" /* score */
00059 
00060 #define CHECK1 \
00061             "movq      %%mm0, %%mm3 \n\t"\
00062             "pcmpgtw   %%mm2, %%mm3 \n\t" /* if(score < spatial_score) */\
00063             "pminsw    %%mm2, %%mm0 \n\t" /* spatial_score= score; */\
00064             "movq      %%mm3, %%mm6 \n\t"\
00065             "pand      %%mm3, %%mm5 \n\t"\
00066             "pandn     %%mm1, %%mm3 \n\t"\
00067             "por       %%mm5, %%mm3 \n\t"\
00068             "movq      %%mm3, %%mm1 \n\t" /* spatial_pred= (cur[x-refs+j] + cur[x+refs-j])>>1; */
00069 
00070 #define CHECK2 /* pretend not to have checked dir=2 if dir=1 was bad.\
00071                   hurts both quality and speed, but matches the C version. */\
00072             "paddw    %[pw1], %%mm6 \n\t"\
00073             "psllw     $14,   %%mm6 \n\t"\
00074             "paddsw    %%mm6, %%mm2 \n\t"\
00075             "movq      %%mm0, %%mm3 \n\t"\
00076             "pcmpgtw   %%mm2, %%mm3 \n\t"\
00077             "pminsw    %%mm2, %%mm0 \n\t"\
00078             "pand      %%mm3, %%mm5 \n\t"\
00079             "pandn     %%mm1, %%mm3 \n\t"\
00080             "por       %%mm5, %%mm3 \n\t"\
00081             "movq      %%mm3, %%mm1 \n\t"
00082 
00083 static void yadif_filter_line_mmx2(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity){
00084     static const uint64_t pw_1 = 0x0001000100010001ULL;
00085     static const uint64_t pb_1 = 0x0101010101010101ULL;
00086     const int mode = p->mode;
00087     uint64_t tmp0, tmp1, tmp2, tmp3;
00088     int x;
00089 
00090 #define FILTER\
00091     for(x=0; x<w; x+=4){\
00092         __asm__ volatile(\
00093             "pxor      %%mm7, %%mm7 \n\t"\
00094             LOAD4("(%[cur],%[mrefs])", %%mm0) /* c = cur[x-refs] */\
00095             LOAD4("(%[cur],%[prefs])", %%mm1) /* e = cur[x+refs] */\
00096             LOAD4("(%["prev2"])", %%mm2) /* prev2[x] */\
00097             LOAD4("(%["next2"])", %%mm3) /* next2[x] */\
00098             "movq      %%mm3, %%mm4 \n\t"\
00099             "paddw     %%mm2, %%mm3 \n\t"\
00100             "psraw     $1,    %%mm3 \n\t" /* d = (prev2[x] + next2[x])>>1 */\
00101             "movq      %%mm0, %[tmp0] \n\t" /* c */\
00102             "movq      %%mm3, %[tmp1] \n\t" /* d */\
00103             "movq      %%mm1, %[tmp2] \n\t" /* e */\
00104             "psubw     %%mm4, %%mm2 \n\t"\
00105             PABS(      %%mm4, %%mm2) /* temporal_diff0 */\
00106             LOAD4("(%[prev],%[mrefs])", %%mm3) /* prev[x-refs] */\
00107             LOAD4("(%[prev],%[prefs])", %%mm4) /* prev[x+refs] */\
00108             "psubw     %%mm0, %%mm3 \n\t"\
00109             "psubw     %%mm1, %%mm4 \n\t"\
00110             PABS(      %%mm5, %%mm3)\
00111             PABS(      %%mm5, %%mm4)\
00112             "paddw     %%mm4, %%mm3 \n\t" /* temporal_diff1 */\
00113             "psrlw     $1,    %%mm2 \n\t"\
00114             "psrlw     $1,    %%mm3 \n\t"\
00115             "pmaxsw    %%mm3, %%mm2 \n\t"\
00116             LOAD4("(%[next],%[mrefs])", %%mm3) /* next[x-refs] */\
00117             LOAD4("(%[next],%[prefs])", %%mm4) /* next[x+refs] */\
00118             "psubw     %%mm0, %%mm3 \n\t"\
00119             "psubw     %%mm1, %%mm4 \n\t"\
00120             PABS(      %%mm5, %%mm3)\
00121             PABS(      %%mm5, %%mm4)\
00122             "paddw     %%mm4, %%mm3 \n\t" /* temporal_diff2 */\
00123             "psrlw     $1,    %%mm3 \n\t"\
00124             "pmaxsw    %%mm3, %%mm2 \n\t"\
00125             "movq      %%mm2, %[tmp3] \n\t" /* diff */\
00126 \
00127             "paddw     %%mm0, %%mm1 \n\t"\
00128             "paddw     %%mm0, %%mm0 \n\t"\
00129             "psubw     %%mm1, %%mm0 \n\t"\
00130             "psrlw     $1,    %%mm1 \n\t" /* spatial_pred */\
00131             PABS(      %%mm2, %%mm0)      /* ABS(c-e) */\
00132 \
00133             "movq -1(%[cur],%[mrefs]), %%mm2 \n\t" /* cur[x-refs-1] */\
00134             "movq -1(%[cur],%[prefs]), %%mm3 \n\t" /* cur[x+refs-1] */\
00135             "movq      %%mm2, %%mm4 \n\t"\
00136             "psubusb   %%mm3, %%mm2 \n\t"\
00137             "psubusb   %%mm4, %%mm3 \n\t"\
00138             "pmaxub    %%mm3, %%mm2 \n\t"\
00139             "pshufw $9,%%mm2, %%mm3 \n\t"\
00140             "punpcklbw %%mm7, %%mm2 \n\t" /* ABS(cur[x-refs-1] - cur[x+refs-1]) */\
00141             "punpcklbw %%mm7, %%mm3 \n\t" /* ABS(cur[x-refs+1] - cur[x+refs+1]) */\
00142             "paddw     %%mm2, %%mm0 \n\t"\
00143             "paddw     %%mm3, %%mm0 \n\t"\
00144             "psubw    %[pw1], %%mm0 \n\t" /* spatial_score */\
00145 \
00146             CHECK(-2,0)\
00147             CHECK1\
00148             CHECK(-3,1)\
00149             CHECK2\
00150             CHECK(0,-2)\
00151             CHECK1\
00152             CHECK(1,-3)\
00153             CHECK2\
00154 \
00155             /* if(p->mode<2) ... */\
00156             "movq    %[tmp3], %%mm6 \n\t" /* diff */\
00157             "cmp       $2, %[mode] \n\t"\
00158             "jge       1f \n\t"\
00159             LOAD4("(%["prev2"],%[mrefs],2)", %%mm2) /* prev2[x-2*refs] */\
00160             LOAD4("(%["next2"],%[mrefs],2)", %%mm4) /* next2[x-2*refs] */\
00161             LOAD4("(%["prev2"],%[prefs],2)", %%mm3) /* prev2[x+2*refs] */\
00162             LOAD4("(%["next2"],%[prefs],2)", %%mm5) /* next2[x+2*refs] */\
00163             "paddw     %%mm4, %%mm2 \n\t"\
00164             "paddw     %%mm5, %%mm3 \n\t"\
00165             "psrlw     $1,    %%mm2 \n\t" /* b */\
00166             "psrlw     $1,    %%mm3 \n\t" /* f */\
00167             "movq    %[tmp0], %%mm4 \n\t" /* c */\
00168             "movq    %[tmp1], %%mm5 \n\t" /* d */\
00169             "movq    %[tmp2], %%mm7 \n\t" /* e */\
00170             "psubw     %%mm4, %%mm2 \n\t" /* b-c */\
00171             "psubw     %%mm7, %%mm3 \n\t" /* f-e */\
00172             "movq      %%mm5, %%mm0 \n\t"\
00173             "psubw     %%mm4, %%mm5 \n\t" /* d-c */\
00174             "psubw     %%mm7, %%mm0 \n\t" /* d-e */\
00175             "movq      %%mm2, %%mm4 \n\t"\
00176             "pminsw    %%mm3, %%mm2 \n\t"\
00177             "pmaxsw    %%mm4, %%mm3 \n\t"\
00178             "pmaxsw    %%mm5, %%mm2 \n\t"\
00179             "pminsw    %%mm5, %%mm3 \n\t"\
00180             "pmaxsw    %%mm0, %%mm2 \n\t" /* max */\
00181             "pminsw    %%mm0, %%mm3 \n\t" /* min */\
00182             "pxor      %%mm4, %%mm4 \n\t"\
00183             "pmaxsw    %%mm3, %%mm6 \n\t"\
00184             "psubw     %%mm2, %%mm4 \n\t" /* -max */\
00185             "pmaxsw    %%mm4, %%mm6 \n\t" /* diff= MAX3(diff, min, -max); */\
00186             "1: \n\t"\
00187 \
00188             "movq    %[tmp1], %%mm2 \n\t" /* d */\
00189             "movq      %%mm2, %%mm3 \n\t"\
00190             "psubw     %%mm6, %%mm2 \n\t" /* d-diff */\
00191             "paddw     %%mm6, %%mm3 \n\t" /* d+diff */\
00192             "pmaxsw    %%mm2, %%mm1 \n\t"\
00193             "pminsw    %%mm3, %%mm1 \n\t" /* d = clip(spatial_pred, d-diff, d+diff); */\
00194             "packuswb  %%mm1, %%mm1 \n\t"\
00195 \
00196             :[tmp0]"=m"(tmp0),\
00197              [tmp1]"=m"(tmp1),\
00198              [tmp2]"=m"(tmp2),\
00199              [tmp3]"=m"(tmp3)\
00200             :[prev] "r"(prev),\
00201              [cur]  "r"(cur),\
00202              [next] "r"(next),\
00203              [prefs]"r"((x86_reg)refs),\
00204              [mrefs]"r"((x86_reg)-refs),\
00205              [pw1]  "m"(pw_1),\
00206              [pb1]  "m"(pb_1),\
00207              [mode] "g"(mode)\
00208         );\
00209         __asm__ volatile("movd %%mm1, %0" :"=m"(*dst));\
00210         dst += 4;\
00211         prev+= 4;\
00212         cur += 4;\
00213         next+= 4;\
00214     }
00215 
00216     if(parity){
00217 #define prev2 "prev"
00218 #define next2 "cur"
00219         FILTER
00220 #undef prev2
00221 #undef next2
00222     }else{
00223 #define prev2 "cur"
00224 #define next2 "next"
00225         FILTER
00226 #undef prev2
00227 #undef next2
00228     }
00229 }
00230 #undef LOAD4
00231 #undef PABS
00232 #undef CHECK
00233 #undef CHECK1
00234 #undef CHECK2
00235 #undef FILTER
00236 
00237 #endif
00238 
00239 static void yadif_filter_line_c(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity){
00240     int x;
00241     uint8_t *prev2= parity ? prev : cur ;
00242     uint8_t *next2= parity ? cur  : next;
00243     for(x=0; x<w; x++){
00244         int c= cur[-refs];
00245         int d= (prev2[0] + next2[0])>>1;
00246         int e= cur[+refs];
00247         int temporal_diff0= FFABS(prev2[0] - next2[0]);
00248         int temporal_diff1=( FFABS(prev[-refs] - c) + FFABS(prev[+refs] - e) )>>1;
00249         int temporal_diff2=( FFABS(next[-refs] - c) + FFABS(next[+refs] - e) )>>1;
00250         int diff= FFMAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
00251         int spatial_pred= (c+e)>>1;
00252         int spatial_score= FFABS(cur[-refs-1] - cur[+refs-1]) + FFABS(c-e)
00253                          + FFABS(cur[-refs+1] - cur[+refs+1]) - 1;
00254 
00255 #define CHECK(j)\
00256     {   int score= FFABS(cur[-refs-1+j] - cur[+refs-1-j])\
00257                  + FFABS(cur[-refs  +j] - cur[+refs  -j])\
00258                  + FFABS(cur[-refs+1+j] - cur[+refs+1-j]);\
00259         if(score < spatial_score){\
00260             spatial_score= score;\
00261             spatial_pred= (cur[-refs  +j] + cur[+refs  -j])>>1;\
00262 
00263         CHECK(-1) CHECK(-2) }} }}
00264         CHECK( 1) CHECK( 2) }} }}
00265 
00266         if(p->mode<2){
00267             int b= (prev2[-2*refs] + next2[-2*refs])>>1;
00268             int f= (prev2[+2*refs] + next2[+2*refs])>>1;
00269 #if 0
00270             int a= cur[-3*refs];
00271             int g= cur[+3*refs];
00272             int max= FFMAX3(d-e, d-c, FFMIN3(FFMAX(b-c,f-e),FFMAX(b-c,b-a),FFMAX(f-g,f-e)) );
00273             int min= FFMIN3(d-e, d-c, FFMAX3(FFMIN(b-c,f-e),FFMIN(b-c,b-a),FFMIN(f-g,f-e)) );
00274 #else
00275             int max= FFMAX3(d-e, d-c, FFMIN(b-c, f-e));
00276             int min= FFMIN3(d-e, d-c, FFMAX(b-c, f-e));
00277 #endif
00278 
00279             diff= FFMAX3(diff, min, -max);
00280         }
00281 
00282         if(spatial_pred > d + diff)
00283            spatial_pred = d + diff;
00284         else if(spatial_pred < d - diff)
00285            spatial_pred = d - diff;
00286 
00287         dst[0] = spatial_pred;
00288 
00289         dst++;
00290         cur++;
00291         prev++;
00292         next++;
00293         prev2++;
00294         next2++;
00295     }
00296 }
00297 

Generated on Tue May 25 08:05:00 2010 for VLC by  doxygen 1.5.6