/src/libvpx/vp8/common/x86/idct_blk_sse2.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include "vpx_config.h" |
12 | | #include "vp8_rtcd.h" |
13 | | |
14 | | void vp8_idct_dequant_0_2x_sse2(short *q, short *dq, unsigned char *dst, |
15 | | int dst_stride); |
16 | | void vp8_idct_dequant_full_2x_sse2(short *q, short *dq, unsigned char *dst, |
17 | | int dst_stride); |
18 | | |
19 | | void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, |
20 | 221M | int stride, char *eobs) { |
21 | 221M | int i; |
22 | | |
23 | 1.10G | for (i = 0; i < 4; ++i) { |
24 | 886M | if (((short *)(eobs))[0]) { |
25 | 691M | if (((short *)(eobs))[0] & 0xfefe) { |
26 | 292M | vp8_idct_dequant_full_2x_sse2(q, dq, dst, stride); |
27 | 399M | } else { |
28 | 399M | vp8_idct_dequant_0_2x_sse2(q, dq, dst, stride); |
29 | 399M | } |
30 | 691M | } |
31 | 886M | if (((short *)(eobs))[1]) { |
32 | 718M | if (((short *)(eobs))[1] & 0xfefe) { |
33 | 343M | vp8_idct_dequant_full_2x_sse2(q + 32, dq, dst + 8, stride); |
34 | 375M | } else { |
35 | 375M | vp8_idct_dequant_0_2x_sse2(q + 32, dq, dst + 8, stride); |
36 | 375M | } |
37 | 718M | } |
38 | 886M | q += 64; |
39 | 886M | dst += stride * 4; |
40 | 886M | eobs += 4; |
41 | 886M | } |
42 | 221M | } |
43 | | |
44 | | void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, |
45 | | unsigned char *dst_u, |
46 | | unsigned char *dst_v, int stride, |
47 | 810M | char *eobs) { |
48 | 810M | if (((short *)(eobs))[0]) { |
49 | 103M | if (((short *)(eobs))[0] & 0xfefe) { |
50 | 33.5M | vp8_idct_dequant_full_2x_sse2(q, dq, dst_u, stride); |
51 | 69.8M | } else { |
52 | 69.8M | vp8_idct_dequant_0_2x_sse2(q, dq, dst_u, stride); |
53 | 69.8M | } |
54 | 103M | } |
55 | 810M | q += 32; |
56 | 810M | dst_u += stride * 4; |
57 | | |
58 | 810M | if (((short *)(eobs))[1]) { |
59 | 110M | if (((short *)(eobs))[1] & 0xfefe) { |
60 | 52.5M | vp8_idct_dequant_full_2x_sse2(q, dq, dst_u, stride); |
61 | 58.0M | } else { |
62 | 58.0M | vp8_idct_dequant_0_2x_sse2(q, dq, dst_u, stride); |
63 | 58.0M | } |
64 | 110M | } |
65 | 810M | q += 32; |
66 | | |
67 | 810M | if (((short *)(eobs))[2]) { |
68 | 94.3M | if (((short *)(eobs))[2] & 0xfefe) { |
69 | 52.5M | vp8_idct_dequant_full_2x_sse2(q, dq, dst_v, stride); |
70 | 52.5M | } else { |
71 | 41.8M | vp8_idct_dequant_0_2x_sse2(q, dq, dst_v, stride); |
72 | 41.8M | } |
73 | 94.3M | } |
74 | 810M | q += 32; |
75 | 810M | dst_v += stride * 4; |
76 | | |
77 | 810M | if (((short *)(eobs))[3]) { |
78 | 88.3M | if (((short *)(eobs))[3] & 0xfefe) { |
79 | 42.1M | vp8_idct_dequant_full_2x_sse2(q, dq, dst_v, stride); |
80 | 46.1M | } else { |
81 | 46.1M | vp8_idct_dequant_0_2x_sse2(q, dq, dst_v, stride); |
82 | 46.1M | } |
83 | 88.3M | } |
84 | 810M | } |