/src/libvpx/vp8/common/x86/idct_blk_sse2.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2010 The WebM project authors. All Rights Reserved. |
3 | | * |
4 | | * Use of this source code is governed by a BSD-style license |
5 | | * that can be found in the LICENSE file in the root of the source |
6 | | * tree. An additional intellectual property rights grant can be found |
7 | | * in the file PATENTS. All contributing project authors may |
8 | | * be found in the AUTHORS file in the root of the source tree. |
9 | | */ |
10 | | |
11 | | #include "vpx_config.h" |
12 | | #include "vp8_rtcd.h" |
13 | | |
14 | | void vp8_idct_dequant_0_2x_sse2(short *q, short *dq, unsigned char *dst, |
15 | | int dst_stride); |
16 | | void vp8_idct_dequant_full_2x_sse2(short *q, short *dq, unsigned char *dst, |
17 | | int dst_stride); |
18 | | |
19 | | void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, |
20 | 290M | int stride, char *eobs) { |
21 | 290M | int i; |
22 | | |
23 | 1.45G | for (i = 0; i < 4; ++i) { |
24 | 1.16G | if (((short *)(eobs))[0]) { |
25 | 881M | if (((short *)(eobs))[0] & 0xfefe) { |
26 | 416M | vp8_idct_dequant_full_2x_sse2(q, dq, dst, stride); |
27 | 464M | } else { |
28 | 464M | vp8_idct_dequant_0_2x_sse2(q, dq, dst, stride); |
29 | 464M | } |
30 | 881M | } |
31 | 1.16G | if (((short *)(eobs))[1]) { |
32 | 917M | if (((short *)(eobs))[1] & 0xfefe) { |
33 | 456M | vp8_idct_dequant_full_2x_sse2(q + 32, dq, dst + 8, stride); |
34 | 460M | } else { |
35 | 460M | vp8_idct_dequant_0_2x_sse2(q + 32, dq, dst + 8, stride); |
36 | 460M | } |
37 | 917M | } |
38 | 1.16G | q += 64; |
39 | 1.16G | dst += stride * 4; |
40 | 1.16G | eobs += 4; |
41 | 1.16G | } |
42 | 290M | } |
43 | | |
44 | | void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, |
45 | | unsigned char *dst_u, |
46 | | unsigned char *dst_v, int stride, |
47 | 869M | char *eobs) { |
48 | 869M | if (((short *)(eobs))[0]) { |
49 | 128M | if (((short *)(eobs))[0] & 0xfefe) { |
50 | 52.8M | vp8_idct_dequant_full_2x_sse2(q, dq, dst_u, stride); |
51 | 75.2M | } else { |
52 | 75.2M | vp8_idct_dequant_0_2x_sse2(q, dq, dst_u, stride); |
53 | 75.2M | } |
54 | 128M | } |
55 | 869M | q += 32; |
56 | 869M | dst_u += stride * 4; |
57 | | |
58 | 869M | if (((short *)(eobs))[1]) { |
59 | 136M | if (((short *)(eobs))[1] & 0xfefe) { |
60 | 69.1M | vp8_idct_dequant_full_2x_sse2(q, dq, dst_u, stride); |
61 | 69.1M | } else { |
62 | 67.1M | vp8_idct_dequant_0_2x_sse2(q, dq, dst_u, stride); |
63 | 67.1M | } |
64 | 136M | } |
65 | 869M | q += 32; |
66 | | |
67 | 869M | if (((short *)(eobs))[2]) { |
68 | 112M | if (((short *)(eobs))[2] & 0xfefe) { |
69 | 65.6M | vp8_idct_dequant_full_2x_sse2(q, dq, dst_v, stride); |
70 | 65.6M | } else { |
71 | 47.3M | vp8_idct_dequant_0_2x_sse2(q, dq, dst_v, stride); |
72 | 47.3M | } |
73 | 112M | } |
74 | 869M | q += 32; |
75 | 869M | dst_v += stride * 4; |
76 | | |
77 | 869M | if (((short *)(eobs))[3]) { |
78 | 107M | if (((short *)(eobs))[3] & 0xfefe) { |
79 | 57.7M | vp8_idct_dequant_full_2x_sse2(q, dq, dst_v, stride); |
80 | 57.7M | } else { |
81 | 49.2M | vp8_idct_dequant_0_2x_sse2(q, dq, dst_v, stride); |
82 | 49.2M | } |
83 | 107M | } |
84 | 869M | } |