/work/openh264/codec/common/src/deblocking_common.cpp
Line | Count | Source |
1 | | #include "deblocking_common.h" |
2 | | #include "macros.h" |
3 | | |
4 | | // C code only |
5 | | void DeblockLumaLt4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta, |
6 | 0 | int8_t* pTc) { |
7 | 0 | for (int32_t i = 0; i < 16; i++) { |
8 | 0 | int32_t iTc0 = pTc[i >> 2]; |
9 | 0 | if (iTc0 >= 0) { |
10 | 0 | int32_t p0 = pPix[-iStrideX]; |
11 | 0 | int32_t p1 = pPix[-2 * iStrideX]; |
12 | 0 | int32_t p2 = pPix[-3 * iStrideX]; |
13 | 0 | int32_t q0 = pPix[0]; |
14 | 0 | int32_t q1 = pPix[iStrideX]; |
15 | 0 | int32_t q2 = pPix[2 * iStrideX]; |
16 | 0 | bool bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; |
17 | 0 | bool bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; |
18 | 0 | bool bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; |
19 | 0 | int32_t iTc = iTc0; |
20 | 0 | if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { |
21 | 0 | bool bDetaP2P0 = WELS_ABS (p2 - p0) < iBeta; |
22 | 0 | bool bDetaQ2Q0 = WELS_ABS (q2 - q0) < iBeta; |
23 | 0 | if (bDetaP2P0) { |
24 | 0 | pPix[-2 * iStrideX] = p1 + WELS_CLIP3 ((p2 + ((p0 + q0 + 1) >> 1) - (p1 * (1 << 1))) >> 1, -iTc0, iTc0); |
25 | 0 | iTc++; |
26 | 0 | } |
27 | 0 | if (bDetaQ2Q0) { |
28 | 0 | pPix[iStrideX] = q1 + WELS_CLIP3 ((q2 + ((p0 + q0 + 1) >> 1) - (q1 * (1 << 1))) >> 1, -iTc0, iTc0); |
29 | 0 | iTc++; |
30 | 0 | } |
31 | 0 | int32_t iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc, iTc); |
32 | 0 | pPix[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */ |
33 | 0 | pPix[0] = WelsClip1 (q0 - iDeta); /* q0' */ |
34 | 0 | } |
35 | 0 | } |
36 | 0 | pPix += iStrideY; |
37 | 0 | } |
38 | 0 | } |
39 | 0 | void DeblockLumaEq4_c (uint8_t* pPix, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, int32_t iBeta) { |
40 | 0 | int32_t p0, p1, p2, q0, q1, q2; |
41 | 0 | int32_t iDetaP0Q0; |
42 | 0 | bool bDetaP1P0, bDetaQ1Q0; |
43 | 0 | for (int32_t i = 0; i < 16; i++) { |
44 | 0 | p0 = pPix[-iStrideX]; |
45 | 0 | p1 = pPix[-2 * iStrideX]; |
46 | 0 | p2 = pPix[-3 * iStrideX]; |
47 | 0 | q0 = pPix[0]; |
48 | 0 | q1 = pPix[iStrideX]; |
49 | 0 | q2 = pPix[2 * iStrideX]; |
50 | 0 | iDetaP0Q0 = WELS_ABS (p0 - q0); |
51 | 0 | bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; |
52 | 0 | bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; |
53 | 0 | if ((iDetaP0Q0 < iAlpha) && bDetaP1P0 && bDetaQ1Q0) { |
54 | 0 | if (iDetaP0Q0 < ((iAlpha >> 2) + 2)) { |
55 | 0 | bool bDetaP2P0 = WELS_ABS (p2 - p0) < iBeta; |
56 | 0 | bool bDetaQ2Q0 = WELS_ABS (q2 - q0) < iBeta; |
57 | 0 | if (bDetaP2P0) { |
58 | 0 | const int32_t p3 = pPix[-4 * iStrideX]; |
59 | 0 | pPix[-iStrideX] = (p2 + (p1 * (1 << 1)) + (p0 * (1 << 1)) + (q0 * (1 << 1)) + q1 + 4) >> 3; //p0 |
60 | 0 | pPix[-2 * iStrideX] = (p2 + p1 + p0 + q0 + 2) >> 2; //p1 |
61 | 0 | pPix[-3 * iStrideX] = ((p3 * (1 << 1)) + p2 + (p2 * (1 << 1)) + p1 + p0 + q0 + 4) >> 3; //p2 |
62 | 0 | } else { |
63 | 0 | pPix[-1 * iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; //p0 |
64 | 0 | } |
65 | 0 | if (bDetaQ2Q0) { |
66 | 0 | const int32_t q3 = pPix[3 * iStrideX]; |
67 | 0 | pPix[0] = (p1 + (p0 * (1 << 1)) + (q0 * (1 << 1)) + (q1 * (1 << 1)) + q2 + 4) >> 3; //q0 |
68 | 0 | pPix[iStrideX] = (p0 + q0 + q1 + q2 + 2) >> 2; //q1 |
69 | 0 | pPix[2 * iStrideX] = ((q3 * (1 << 1)) + q2 + (q2 * (1 << 1)) + q1 + q0 + p0 + 4) >> 3; //q2 |
70 | 0 | } else { |
71 | 0 | pPix[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; //q0 |
72 | 0 | } |
73 | 0 | } else { |
74 | 0 | pPix[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; //p0 |
75 | 0 | pPix[ 0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; //q0 |
76 | 0 | } |
77 | 0 | } |
78 | 0 | pPix += iStrideY; |
79 | 0 | } |
80 | 0 | } |
81 | 0 | void DeblockLumaLt4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc) { |
82 | 0 | DeblockLumaLt4_c (pPix, iStride, 1, iAlpha, iBeta, tc); |
83 | 0 | } |
84 | 0 | void DeblockLumaLt4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* tc) { |
85 | 0 | DeblockLumaLt4_c (pPix, 1, iStride, iAlpha, iBeta, tc); |
86 | 0 | } |
87 | 0 | void DeblockLumaEq4V_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) { |
88 | 0 | DeblockLumaEq4_c (pPix, iStride, 1, iAlpha, iBeta); |
89 | 0 | } |
90 | 0 | void DeblockLumaEq4H_c (uint8_t* pPix, int32_t iStride, int32_t iAlpha, int32_t iBeta) { |
91 | 0 | DeblockLumaEq4_c (pPix, 1, iStride, iAlpha, iBeta); |
92 | 0 | } |
93 | | void DeblockChromaLt4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, |
94 | 0 | int32_t iBeta, int8_t* pTc) { |
95 | 0 | int32_t p0, p1, q0, q1, iDeta; |
96 | 0 | bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; |
97 | |
|
98 | 0 | for (int32_t i = 0; i < 8; i++) { |
99 | 0 | int32_t iTc0 = pTc[i >> 1]; |
100 | 0 | if (iTc0 > 0) { |
101 | 0 | p0 = pPixCb[-iStrideX]; |
102 | 0 | p1 = pPixCb[-2 * iStrideX]; |
103 | 0 | q0 = pPixCb[0]; |
104 | 0 | q1 = pPixCb[iStrideX]; |
105 | |
|
106 | 0 | bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; |
107 | 0 | bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; |
108 | 0 | bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; |
109 | 0 | if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { |
110 | 0 | iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc0, iTc0); |
111 | 0 | pPixCb[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */ |
112 | 0 | pPixCb[0] = WelsClip1 (q0 - iDeta); /* q0' */ |
113 | 0 | } |
114 | | |
115 | |
|
116 | 0 | p0 = pPixCr[-iStrideX]; |
117 | 0 | p1 = pPixCr[-2 * iStrideX]; |
118 | 0 | q0 = pPixCr[0]; |
119 | 0 | q1 = pPixCr[iStrideX]; |
120 | |
|
121 | 0 | bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; |
122 | 0 | bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; |
123 | 0 | bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; |
124 | |
|
125 | 0 | if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { |
126 | 0 | iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc0, iTc0); |
127 | 0 | pPixCr[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */ |
128 | 0 | pPixCr[0] = WelsClip1 (q0 - iDeta); /* q0' */ |
129 | 0 | } |
130 | 0 | } |
131 | 0 | pPixCb += iStrideY; |
132 | 0 | pPixCr += iStrideY; |
133 | 0 | } |
134 | 0 | } |
135 | | void DeblockChromaEq4_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, |
136 | 0 | int32_t iBeta) { |
137 | 0 | int32_t p0, p1, q0, q1; |
138 | 0 | bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; |
139 | 0 | for (int32_t i = 0; i < 8; i++) { |
140 | | //cb |
141 | 0 | p0 = pPixCb[-iStrideX]; |
142 | 0 | p1 = pPixCb[-2 * iStrideX]; |
143 | 0 | q0 = pPixCb[0]; |
144 | 0 | q1 = pPixCb[iStrideX]; |
145 | 0 | bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; |
146 | 0 | bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; |
147 | 0 | bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; |
148 | 0 | if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { |
149 | 0 | pPixCb[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; /* p0' */ |
150 | 0 | pPixCb[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; /* q0' */ |
151 | 0 | } |
152 | | |
153 | | //cr |
154 | 0 | p0 = pPixCr[-iStrideX]; |
155 | 0 | p1 = pPixCr[-2 * iStrideX]; |
156 | 0 | q0 = pPixCr[0]; |
157 | 0 | q1 = pPixCr[iStrideX]; |
158 | 0 | bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; |
159 | 0 | bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; |
160 | 0 | bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; |
161 | 0 | if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { |
162 | 0 | pPixCr[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; /* p0' */ |
163 | 0 | pPixCr[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; /* q0' */ |
164 | 0 | } |
165 | 0 | pPixCr += iStrideY; |
166 | 0 | pPixCb += iStrideY; |
167 | 0 | } |
168 | 0 | } |
169 | | void DeblockChromaLt4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, |
170 | 0 | int8_t* tc) { |
171 | 0 | DeblockChromaLt4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta, tc); |
172 | 0 | } |
173 | | void DeblockChromaLt4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, |
174 | 0 | int8_t* tc) { |
175 | 0 | DeblockChromaLt4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta, tc); |
176 | 0 | } |
177 | 0 | void DeblockChromaEq4V_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) { |
178 | 0 | DeblockChromaEq4_c (pPixCb, pPixCr, iStride, 1, iAlpha, iBeta); |
179 | 0 | } |
180 | 0 | void DeblockChromaEq4H_c (uint8_t* pPixCb, uint8_t* pPixCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) { |
181 | 0 | DeblockChromaEq4_c (pPixCb, pPixCr, 1, iStride, iAlpha, iBeta); |
182 | 0 | } |
183 | | |
184 | | void DeblockChromaLt42_c (uint8_t* pPixCbCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, |
185 | 0 | int32_t iBeta, int8_t* pTc) { |
186 | 0 | int32_t p0, p1, q0, q1, iDeta; |
187 | 0 | bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; |
188 | |
|
189 | 0 | for (int32_t i = 0; i < 8; i++) { |
190 | 0 | int32_t iTc0 = pTc[i >> 1]; |
191 | 0 | if (iTc0 > 0) { |
192 | 0 | p0 = pPixCbCr[-iStrideX]; |
193 | 0 | p1 = pPixCbCr[-2 * iStrideX]; |
194 | 0 | q0 = pPixCbCr[0]; |
195 | 0 | q1 = pPixCbCr[iStrideX]; |
196 | |
|
197 | 0 | bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; |
198 | 0 | bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; |
199 | 0 | bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; |
200 | 0 | if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { |
201 | 0 | iDeta = WELS_CLIP3 ((((q0 - p0) * (1 << 2)) + (p1 - q1) + 4) >> 3, -iTc0, iTc0); |
202 | 0 | pPixCbCr[-iStrideX] = WelsClip1 (p0 + iDeta); /* p0' */ |
203 | 0 | pPixCbCr[0] = WelsClip1 (q0 - iDeta); /* q0' */ |
204 | 0 | } |
205 | | |
206 | |
|
207 | 0 | } |
208 | 0 | pPixCbCr += iStrideY; |
209 | 0 | } |
210 | 0 | } |
211 | | void DeblockChromaEq42_c (uint8_t* pPixCbCr, int32_t iStrideX, int32_t iStrideY, int32_t iAlpha, |
212 | 0 | int32_t iBeta) { |
213 | 0 | int32_t p0, p1, q0, q1; |
214 | 0 | bool bDetaP0Q0, bDetaP1P0, bDetaQ1Q0; |
215 | 0 | for (int32_t i = 0; i < 8; i++) { |
216 | 0 | p0 = pPixCbCr[-iStrideX]; |
217 | 0 | p1 = pPixCbCr[-2 * iStrideX]; |
218 | 0 | q0 = pPixCbCr[0]; |
219 | 0 | q1 = pPixCbCr[iStrideX]; |
220 | 0 | bDetaP0Q0 = WELS_ABS (p0 - q0) < iAlpha; |
221 | 0 | bDetaP1P0 = WELS_ABS (p1 - p0) < iBeta; |
222 | 0 | bDetaQ1Q0 = WELS_ABS (q1 - q0) < iBeta; |
223 | 0 | if (bDetaP0Q0 && bDetaP1P0 && bDetaQ1Q0) { |
224 | 0 | pPixCbCr[-iStrideX] = ((p1 * (1 << 1)) + p0 + q1 + 2) >> 2; /* p0' */ |
225 | 0 | pPixCbCr[0] = ((q1 * (1 << 1)) + q0 + p1 + 2) >> 2; /* q0' */ |
226 | 0 | } |
227 | |
|
228 | 0 | pPixCbCr += iStrideY; |
229 | 0 | } |
230 | 0 | } |
231 | | |
232 | | void DeblockChromaLt4V2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, |
233 | 0 | int8_t* tc) { |
234 | 0 | DeblockChromaLt42_c (pPixCbCr, iStride, 1, iAlpha, iBeta, tc); |
235 | 0 | } |
236 | | void DeblockChromaLt4H2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta, |
237 | 0 | int8_t* tc) { |
238 | |
|
239 | 0 | DeblockChromaLt42_c (pPixCbCr, 1, iStride, iAlpha, iBeta, tc); |
240 | 0 | } |
241 | 0 | void DeblockChromaEq4V2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) { |
242 | 0 | DeblockChromaEq42_c (pPixCbCr, iStride, 1, iAlpha, iBeta); |
243 | 0 | } |
244 | 0 | void DeblockChromaEq4H2_c (uint8_t* pPixCbCr, int32_t iStride, int32_t iAlpha, int32_t iBeta) { |
245 | 0 | DeblockChromaEq42_c (pPixCbCr, 1, iStride, iAlpha, iBeta); |
246 | 0 | } |
247 | | |
248 | 0 | void WelsNonZeroCount_c (int8_t* pNonZeroCount) { |
249 | 0 | int32_t i; |
250 | 0 | for (i = 0; i < 24; i++) { |
251 | 0 | pNonZeroCount[i] = !!pNonZeroCount[i]; |
252 | 0 | } |
253 | 0 | } |
254 | | |
255 | | #ifdef X86_ASM |
256 | | extern "C" { |
257 | | void DeblockLumaLt4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) { |
258 | | ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); |
259 | | |
260 | | DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]); |
261 | | DeblockLumaLt4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc); |
262 | | DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]); |
263 | | } |
264 | | |
265 | | void DeblockLumaEq4H_ssse3 (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) { |
266 | | ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); |
267 | | |
268 | | DeblockLumaTransposeH2V_sse2 (pPixY - 4, iStride, &uiBuf[0]); |
269 | | DeblockLumaEq4V_ssse3 (&uiBuf[4 * 16], 16, iAlpha, iBeta); |
270 | | DeblockLumaTransposeV2H_sse2 (pPixY - 4, iStride, &uiBuf[0]); |
271 | | } |
272 | | |
273 | | } |
274 | | |
275 | | #endif |
276 | | |
277 | | #ifdef HAVE_MMI |
278 | | extern "C" { |
279 | | void DeblockLumaLt4H_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta, int8_t* pTc) { |
280 | | ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); |
281 | | |
282 | | DeblockLumaTransposeH2V_mmi (pPixY - 4, iStride, &uiBuf[0]); |
283 | | DeblockLumaLt4V_mmi (&uiBuf[4 * 16], 16, iAlpha, iBeta, pTc); |
284 | | DeblockLumaTransposeV2H_mmi (pPixY - 4, iStride, &uiBuf[0]); |
285 | | } |
286 | | |
287 | | void DeblockLumaEq4H_mmi (uint8_t* pPixY, int32_t iStride, int32_t iAlpha, int32_t iBeta) { |
288 | | ENFORCE_STACK_ALIGN_1D (uint8_t, uiBuf, 16 * 8, 16); |
289 | | |
290 | | DeblockLumaTransposeH2V_mmi (pPixY - 4, iStride, &uiBuf[0]); |
291 | | DeblockLumaEq4V_mmi (&uiBuf[4 * 16], 16, iAlpha, iBeta); |
292 | | DeblockLumaTransposeV2H_mmi (pPixY - 4, iStride, &uiBuf[0]); |
293 | | } |
294 | | } |
295 | | #endif//HAVE_MMI |