00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022 #if defined(CAN_COMPILE_SSE2) && ((__GNUC__ > 3) || (__GNUC__ == 3 && __GNUC_MINOR__ > 0))
00023
00024 #define HAVE_YADIF_SSE2
00025
00026 #define LOAD4(mem,dst) \
00027 "movd "mem", "#dst" \n\t"\
00028 "punpcklbw %%mm7, "#dst" \n\t"
00029
00030 #define PABS(tmp,dst) \
00031 "pxor "#tmp", "#tmp" \n\t"\
00032 "psubw "#dst", "#tmp" \n\t"\
00033 "pmaxsw "#tmp", "#dst" \n\t"
00034
00035 #define CHECK(pj,mj) \
00036 "movq "#pj"(%[cur],%[mrefs]), %%mm2 \n\t" \
00037 "movq "#mj"(%[cur],%[prefs]), %%mm3 \n\t" \
00038 "movq %%mm2, %%mm4 \n\t"\
00039 "movq %%mm2, %%mm5 \n\t"\
00040 "pxor %%mm3, %%mm4 \n\t"\
00041 "pavgb %%mm3, %%mm5 \n\t"\
00042 "pand %[pb1], %%mm4 \n\t"\
00043 "psubusb %%mm4, %%mm5 \n\t"\
00044 "psrlq $8, %%mm5 \n\t"\
00045 "punpcklbw %%mm7, %%mm5 \n\t" \
00046 "movq %%mm2, %%mm4 \n\t"\
00047 "psubusb %%mm3, %%mm2 \n\t"\
00048 "psubusb %%mm4, %%mm3 \n\t"\
00049 "pmaxub %%mm3, %%mm2 \n\t"\
00050 "movq %%mm2, %%mm3 \n\t"\
00051 "movq %%mm2, %%mm4 \n\t" \
00052 "psrlq $8, %%mm3 \n\t" \
00053 "psrlq $16, %%mm4 \n\t" \
00054 "punpcklbw %%mm7, %%mm2 \n\t"\
00055 "punpcklbw %%mm7, %%mm3 \n\t"\
00056 "punpcklbw %%mm7, %%mm4 \n\t"\
00057 "paddw %%mm3, %%mm2 \n\t"\
00058 "paddw %%mm4, %%mm2 \n\t"
00059
00060 #define CHECK1 \
00061 "movq %%mm0, %%mm3 \n\t"\
00062 "pcmpgtw %%mm2, %%mm3 \n\t" \
00063 "pminsw %%mm2, %%mm0 \n\t" \
00064 "movq %%mm3, %%mm6 \n\t"\
00065 "pand %%mm3, %%mm5 \n\t"\
00066 "pandn %%mm1, %%mm3 \n\t"\
00067 "por %%mm5, %%mm3 \n\t"\
00068 "movq %%mm3, %%mm1 \n\t"
00069
00070 #define CHECK2
00071 \
00072 "paddw %[pw1], %%mm6 \n\t"\
00073 "psllw $14, %%mm6 \n\t"\
00074 "paddsw %%mm6, %%mm2 \n\t"\
00075 "movq %%mm0, %%mm3 \n\t"\
00076 "pcmpgtw %%mm2, %%mm3 \n\t"\
00077 "pminsw %%mm2, %%mm0 \n\t"\
00078 "pand %%mm3, %%mm5 \n\t"\
00079 "pandn %%mm1, %%mm3 \n\t"\
00080 "por %%mm5, %%mm3 \n\t"\
00081 "movq %%mm3, %%mm1 \n\t"
00082
00083 static void yadif_filter_line_mmx2(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity){
00084 static const uint64_t pw_1 = 0x0001000100010001ULL;
00085 static const uint64_t pb_1 = 0x0101010101010101ULL;
00086 const int mode = p->mode;
00087 uint64_t tmp0, tmp1, tmp2, tmp3;
00088 int x;
00089
00090 #define FILTER\
00091 for(x=0; x<w; x+=4){\
00092 __asm__ volatile(\
00093 "pxor %%mm7, %%mm7 \n\t"\
00094 LOAD4("(%[cur],%[mrefs])", %%mm0) \
00095 LOAD4("(%[cur],%[prefs])", %%mm1) \
00096 LOAD4("(%["prev2"])", %%mm2) \
00097 LOAD4("(%["next2"])", %%mm3) \
00098 "movq %%mm3, %%mm4 \n\t"\
00099 "paddw %%mm2, %%mm3 \n\t"\
00100 "psraw $1, %%mm3 \n\t" \
00101 "movq %%mm0, %[tmp0] \n\t" \
00102 "movq %%mm3, %[tmp1] \n\t" \
00103 "movq %%mm1, %[tmp2] \n\t" \
00104 "psubw %%mm4, %%mm2 \n\t"\
00105 PABS( %%mm4, %%mm2) \
00106 LOAD4("(%[prev],%[mrefs])", %%mm3) \
00107 LOAD4("(%[prev],%[prefs])", %%mm4) \
00108 "psubw %%mm0, %%mm3 \n\t"\
00109 "psubw %%mm1, %%mm4 \n\t"\
00110 PABS( %%mm5, %%mm3)\
00111 PABS( %%mm5, %%mm4)\
00112 "paddw %%mm4, %%mm3 \n\t" \
00113 "psrlw $1, %%mm2 \n\t"\
00114 "psrlw $1, %%mm3 \n\t"\
00115 "pmaxsw %%mm3, %%mm2 \n\t"\
00116 LOAD4("(%[next],%[mrefs])", %%mm3) \
00117 LOAD4("(%[next],%[prefs])", %%mm4) \
00118 "psubw %%mm0, %%mm3 \n\t"\
00119 "psubw %%mm1, %%mm4 \n\t"\
00120 PABS( %%mm5, %%mm3)\
00121 PABS( %%mm5, %%mm4)\
00122 "paddw %%mm4, %%mm3 \n\t" \
00123 "psrlw $1, %%mm3 \n\t"\
00124 "pmaxsw %%mm3, %%mm2 \n\t"\
00125 "movq %%mm2, %[tmp3] \n\t" \
00126 \
00127 "paddw %%mm0, %%mm1 \n\t"\
00128 "paddw %%mm0, %%mm0 \n\t"\
00129 "psubw %%mm1, %%mm0 \n\t"\
00130 "psrlw $1, %%mm1 \n\t" \
00131 PABS( %%mm2, %%mm0) \
00132 \
00133 "movq -1(%[cur],%[mrefs]), %%mm2 \n\t" \
00134 "movq -1(%[cur],%[prefs]), %%mm3 \n\t" \
00135 "movq %%mm2, %%mm4 \n\t"\
00136 "psubusb %%mm3, %%mm2 \n\t"\
00137 "psubusb %%mm4, %%mm3 \n\t"\
00138 "pmaxub %%mm3, %%mm2 \n\t"\
00139 "pshufw $9,%%mm2, %%mm3 \n\t"\
00140 "punpcklbw %%mm7, %%mm2 \n\t" \
00141 "punpcklbw %%mm7, %%mm3 \n\t" \
00142 "paddw %%mm2, %%mm0 \n\t"\
00143 "paddw %%mm3, %%mm0 \n\t"\
00144 "psubw %[pw1], %%mm0 \n\t" \
00145 \
00146 CHECK(-2,0)\
00147 CHECK1\
00148 CHECK(-3,1)\
00149 CHECK2\
00150 CHECK(0,-2)\
00151 CHECK1\
00152 CHECK(1,-3)\
00153 CHECK2\
00154 \
00155 \
00156 "movq %[tmp3], %%mm6 \n\t" \
00157 "cmp $2, %[mode] \n\t"\
00158 "jge 1f \n\t"\
00159 LOAD4("(%["prev2"],%[mrefs],2)", %%mm2) \
00160 LOAD4("(%["next2"],%[mrefs],2)", %%mm4) \
00161 LOAD4("(%["prev2"],%[prefs],2)", %%mm3) \
00162 LOAD4("(%["next2"],%[prefs],2)", %%mm5) \
00163 "paddw %%mm4, %%mm2 \n\t"\
00164 "paddw %%mm5, %%mm3 \n\t"\
00165 "psrlw $1, %%mm2 \n\t" \
00166 "psrlw $1, %%mm3 \n\t" \
00167 "movq %[tmp0], %%mm4 \n\t" \
00168 "movq %[tmp1], %%mm5 \n\t" \
00169 "movq %[tmp2], %%mm7 \n\t" \
00170 "psubw %%mm4, %%mm2 \n\t" \
00171 "psubw %%mm7, %%mm3 \n\t" \
00172 "movq %%mm5, %%mm0 \n\t"\
00173 "psubw %%mm4, %%mm5 \n\t" \
00174 "psubw %%mm7, %%mm0 \n\t" \
00175 "movq %%mm2, %%mm4 \n\t"\
00176 "pminsw %%mm3, %%mm2 \n\t"\
00177 "pmaxsw %%mm4, %%mm3 \n\t"\
00178 "pmaxsw %%mm5, %%mm2 \n\t"\
00179 "pminsw %%mm5, %%mm3 \n\t"\
00180 "pmaxsw %%mm0, %%mm2 \n\t" \
00181 "pminsw %%mm0, %%mm3 \n\t" \
00182 "pxor %%mm4, %%mm4 \n\t"\
00183 "pmaxsw %%mm3, %%mm6 \n\t"\
00184 "psubw %%mm2, %%mm4 \n\t" \
00185 "pmaxsw %%mm4, %%mm6 \n\t" \
00186 "1: \n\t"\
00187 \
00188 "movq %[tmp1], %%mm2 \n\t" \
00189 "movq %%mm2, %%mm3 \n\t"\
00190 "psubw %%mm6, %%mm2 \n\t" \
00191 "paddw %%mm6, %%mm3 \n\t" \
00192 "pmaxsw %%mm2, %%mm1 \n\t"\
00193 "pminsw %%mm3, %%mm1 \n\t" \
00194 "packuswb %%mm1, %%mm1 \n\t"\
00195 \
00196 :[tmp0]"=m"(tmp0),\
00197 [tmp1]"=m"(tmp1),\
00198 [tmp2]"=m"(tmp2),\
00199 [tmp3]"=m"(tmp3)\
00200 :[prev] "r"(prev),\
00201 [cur] "r"(cur),\
00202 [next] "r"(next),\
00203 [prefs]"r"((x86_reg)refs),\
00204 [mrefs]"r"((x86_reg)-refs),\
00205 [pw1] "m"(pw_1),\
00206 [pb1] "m"(pb_1),\
00207 [mode] "g"(mode)\
00208 );\
00209 __asm__ volatile("movd %%mm1, %0" :"=m"(*dst));\
00210 dst += 4;\
00211 prev+= 4;\
00212 cur += 4;\
00213 next+= 4;\
00214 }
00215
00216 if(parity){
00217 #define prev2 "prev"
00218 #define next2 "cur"
00219 FILTER
00220 #undef prev2
00221 #undef next2
00222 }else{
00223 #define prev2 "cur"
00224 #define next2 "next"
00225 FILTER
00226 #undef prev2
00227 #undef next2
00228 }
00229 }
00230 #undef LOAD4
00231 #undef PABS
00232 #undef CHECK
00233 #undef CHECK1
00234 #undef CHECK2
00235 #undef FILTER
00236
00237 #endif
00238
00239 static void yadif_filter_line_c(struct vf_priv_s *p, uint8_t *dst, uint8_t *prev, uint8_t *cur, uint8_t *next, int w, int refs, int parity){
00240 int x;
00241 uint8_t *prev2= parity ? prev : cur ;
00242 uint8_t *next2= parity ? cur : next;
00243 for(x=0; x<w; x++){
00244 int c= cur[-refs];
00245 int d= (prev2[0] + next2[0])>>1;
00246 int e= cur[+refs];
00247 int temporal_diff0= FFABS(prev2[0] - next2[0]);
00248 int temporal_diff1=( FFABS(prev[-refs] - c) + FFABS(prev[+refs] - e) )>>1;
00249 int temporal_diff2=( FFABS(next[-refs] - c) + FFABS(next[+refs] - e) )>>1;
00250 int diff= FFMAX3(temporal_diff0>>1, temporal_diff1, temporal_diff2);
00251 int spatial_pred= (c+e)>>1;
00252 int spatial_score= FFABS(cur[-refs-1] - cur[+refs-1]) + FFABS(c-e)
00253 + FFABS(cur[-refs+1] - cur[+refs+1]) - 1;
00254
00255 #define CHECK(j)\
00256 { int score= FFABS(cur[-refs-1+j] - cur[+refs-1-j])\
00257 + FFABS(cur[-refs +j] - cur[+refs -j])\
00258 + FFABS(cur[-refs+1+j] - cur[+refs+1-j]);\
00259 if(score < spatial_score){\
00260 spatial_score= score;\
00261 spatial_pred= (cur[-refs +j] + cur[+refs -j])>>1;\
00262
00263 CHECK(-1) CHECK(-2) }} }}
00264 CHECK( 1) CHECK( 2) }} }}
00265
00266 if(p->mode<2){
00267 int b= (prev2[-2*refs] + next2[-2*refs])>>1;
00268 int f= (prev2[+2*refs] + next2[+2*refs])>>1;
00269 #if 0
00270 int a= cur[-3*refs];
00271 int g= cur[+3*refs];
00272 int max= FFMAX3(d-e, d-c, FFMIN3(FFMAX(b-c,f-e),FFMAX(b-c,b-a),FFMAX(f-g,f-e)) );
00273 int min= FFMIN3(d-e, d-c, FFMAX3(FFMIN(b-c,f-e),FFMIN(b-c,b-a),FFMIN(f-g,f-e)) );
00274 #else
00275 int max= FFMAX3(d-e, d-c, FFMIN(b-c, f-e));
00276 int min= FFMIN3(d-e, d-c, FFMAX(b-c, f-e));
00277 #endif
00278
00279 diff= FFMAX3(diff, min, -max);
00280 }
00281
00282 if(spatial_pred > d + diff)
00283 spatial_pred = d + diff;
00284 else if(spatial_pred < d - diff)
00285 spatial_pred = d - diff;
00286
00287 dst[0] = spatial_pred;
00288
00289 dst++;
00290 cur++;
00291 prev++;
00292 next++;
00293 prev2++;
00294 next2++;
00295 }
00296 }
00297