/src/libavc/encoder/ih264e_half_pel.c
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Copyright (C) 2015 The Android Open Source Project |
4 | | * |
5 | | * Licensed under the Apache License, Version 2.0 (the "License"); |
6 | | * you may not use this file except in compliance with the License. |
7 | | * You may obtain a copy of the License at: |
8 | | * |
9 | | * http://www.apache.org/licenses/LICENSE-2.0 |
10 | | * |
11 | | * Unless required by applicable law or agreed to in writing, software |
12 | | * distributed under the License is distributed on an "AS IS" BASIS, |
13 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | * See the License for the specific language governing permissions and |
15 | | * limitations under the License. |
16 | | * |
17 | | ***************************************************************************** |
18 | | * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore |
19 | | */ |
20 | | |
21 | | /** |
22 | | ******************************************************************************* |
23 | | * @file |
24 | | * ih264e_half_pel.c |
25 | | * |
26 | | * @brief |
27 | | * This file contains functions that are used for computing subpixel planes |
28 | | * |
29 | | * @author |
30 | | * ittiam |
31 | | * |
32 | | * @par List of Functions: |
33 | | * - ih264e_sixtapfilter_horz |
34 | | * - ih264e_sixtap_filter_2dvh_vert |
35 | | * |
36 | | * @remarks |
37 | | * none |
38 | | * |
39 | | ******************************************************************************* |
40 | | */ |
41 | | |
42 | | /*****************************************************************************/ |
43 | | /* File Includes */ |
44 | | /*****************************************************************************/ |
45 | | |
46 | | /* System Include Files */ |
47 | | #include <stdio.h> |
48 | | #include <assert.h> |
49 | | #include <limits.h> |
50 | | |
51 | | /* User Include Files */ |
52 | | #include "ih264_typedefs.h" |
53 | | |
54 | | #include "ih264_macros.h" |
55 | | #include "ih264_defs.h" |
56 | | #include "ih264_mem_fns.h" |
57 | | #include "ih264_padding.h" |
58 | | #include "ih264_intra_pred_filters.h" |
59 | | #include "ih264_inter_pred_filters.h" |
60 | | #include "ih264_deblk_edge_filters.h" |
61 | | #include "ih264_platform_macros.h" |
62 | | |
63 | | #include "ih264e_half_pel.h" |
64 | | |
65 | | |
66 | | /*****************************************************************************/ |
67 | | /* Function Definitions */ |
68 | | /*****************************************************************************/ |
69 | | |
70 | | /** |
71 | | ******************************************************************************* |
72 | | * |
73 | | * @brief |
74 | | * Interprediction luma filter for horizontal input (Filter run for width = 17 |
75 | | * and height =16) |
76 | | * |
77 | | * @par Description: |
78 | | * Applies a 6 tap horizontal filter .The output is clipped to 8 bits |
79 | | * sec 8.4.2.2.1 titled "Luma sample interpolation process" |
80 | | * |
81 | | * @param[in] pu1_src |
82 | | * UWORD8 pointer to the source |
83 | | * |
84 | | * @param[out] pu1_dst |
85 | | * UWORD8 pointer to the destination |
86 | | * |
87 | | * @param[in] src_strd |
88 | | * integer source stride |
89 | | * |
90 | | * @param[in] dst_strd |
91 | | * integer destination stride |
92 | | * |
93 | | * @returns |
94 | | * |
95 | | * @remarks |
96 | | * none |
97 | | * |
98 | | ******************************************************************************* |
99 | | */ |
100 | | void ih264e_sixtapfilter_horz(UWORD8 *pu1_src, |
101 | | UWORD8 *pu1_dst, |
102 | | WORD32 src_strd, |
103 | | WORD32 dst_strd) |
104 | 330k | { |
105 | 330k | UWORD32 u4_i, u4_j; |
106 | 330k | UWORD32 u4_w, u4_h; |
107 | | |
108 | | /* width and height of interpolation */ |
109 | 330k | u4_w = HP_PL_WD; |
110 | 330k | u4_h = MB_SIZE; |
111 | | |
112 | 330k | pu1_src -= 2; |
113 | | |
114 | 5.58M | for (u4_i = 0; u4_i < u4_h; u4_i++) |
115 | 5.25M | { |
116 | 93.0M | for (u4_j = 0; u4_j < u4_w; u4_j++, pu1_dst++, pu1_src++) |
117 | 87.7M | { |
118 | 87.7M | WORD16 i16_temp; |
119 | | |
120 | 87.7M | i16_temp = ih264_g_six_tap[0] * (*pu1_src + pu1_src[5]) |
121 | 87.7M | + ih264_g_six_tap[1] * (pu1_src[1] + pu1_src[4]) |
122 | 87.7M | + ih264_g_six_tap[2] * (pu1_src[2] + pu1_src[3]); |
123 | | |
124 | 87.7M | i16_temp = (i16_temp + 16) >> 5; |
125 | | |
126 | 87.7M | *pu1_dst = CLIP_U8(i16_temp); |
127 | 87.7M | } |
128 | 5.25M | pu1_src += src_strd - u4_w; |
129 | 5.25M | pu1_dst += dst_strd - u4_w; |
130 | 5.25M | } |
131 | 330k | } |
132 | | |
133 | | /** |
134 | | ******************************************************************************* |
135 | | * |
136 | | * @brief |
137 | | * This function implements a two stage cascaded six tap filter. It applies |
138 | | * the six tap filter in the vertical direction on the predictor values, |
139 | | * followed by applying the same filter in the horizontal direction on the |
140 | | * output of the first stage. The six tap filtering operation is described in |
141 | | * sec 8.4.2.2.1 titled "Luma sample interpolation process" (Filter run for |
142 | | * width = 17 and height = 17) |
143 | | * |
144 | | * @par Description: |
145 | | * The function interpolates the predictors first in the vertical direction and |
146 | | * then in the horizontal direction to output the (1/2,1/2). The output of the |
147 | | * first stage of the filter is stored in the buffer pointed to by |
148 | | * pi16_pred1(only in C) in 16 bit precision. |
149 | | * |
150 | | * @param[in] pu1_src |
151 | | * UWORD8 pointer to the source |
152 | | * |
153 | | * @param[out] pu1_dst1 |
154 | | * UWORD8 pointer to the destination (Horizontal filtered output) |
155 | | * |
156 | | * @param[out] pu1_dst2 |
157 | | * UWORD8 pointer to the destination (output after applying vertical filter to |
158 | | * the intermediate horizontal output) |
159 | | * |
160 | | * @param[in] src_strd |
161 | | * integer source stride |
162 | | |
163 | | * @param[in] dst_strd |
164 | | * integer destination stride of pu1_dst |
165 | | * |
166 | | * @param[in] pi4_pred |
167 | | * Pointer to 16bit intermediate buffer (used only in c) |
168 | | * |
169 | | * @param[in] i4_pred_strd |
170 | | * integer destination stride of pi16_pred1 |
171 | | * |
172 | | * @returns |
173 | | * |
174 | | * @remarks |
175 | | * none |
176 | | * |
177 | | ******************************************************************************* |
178 | | */ |
179 | | void ih264e_sixtap_filter_2dvh_vert(UWORD8 *pu1_src, |
180 | | UWORD8 *pu1_dst1, |
181 | | UWORD8 *pu1_dst2, |
182 | | WORD32 src_strd, |
183 | | WORD32 dst_strd, |
184 | | WORD32 *pi4_pred, |
185 | | WORD32 i4_pred_strd) |
186 | 330k | { |
187 | 330k | WORD32 row, col; |
188 | 330k | WORD32 tmp; |
189 | 330k | WORD32 *pi4_pred_temp = pi4_pred; |
190 | 330k | WORD32 ht = HP_PL_HT, wd = HP_PL_WD; |
191 | | |
192 | 5.91M | for (row = 0; row < ht; row++) |
193 | 5.58M | { |
194 | 126M | for (col = -2; col < wd + 3; col++) |
195 | 120M | { |
196 | 120M | tmp = ih264_g_six_tap[0] * (pu1_src[col - 2 * src_strd] + pu1_src[col + 3 * src_strd]) + |
197 | 120M | ih264_g_six_tap[1] * (pu1_src[col - 1 * src_strd] + pu1_src[col + 2 * src_strd]) + |
198 | 120M | ih264_g_six_tap[2] * (pu1_src[col] + pu1_src[col + 1 * src_strd]); |
199 | | |
200 | 120M | pi4_pred_temp[col] = tmp; |
201 | 120M | } |
202 | | |
203 | 5.58M | pu1_src += src_strd; |
204 | 5.58M | pi4_pred_temp += i4_pred_strd; |
205 | 5.58M | } |
206 | | |
207 | 5.93M | for (row = 0; row < ht; row++) |
208 | 5.60M | { |
209 | 100M | for (col = 0; col < wd; col++) |
210 | 94.5M | { |
211 | 94.5M | tmp = (pi4_pred[col - 2] + pi4_pred[col + 3]) + |
212 | 94.5M | ih264_g_six_tap[1] * (pi4_pred[col - 1] + pi4_pred[col + 2]) + |
213 | 94.5M | ih264_g_six_tap[2] * (pi4_pred[col] + pi4_pred[col + 1]); |
214 | | |
215 | 94.5M | tmp = (tmp + 512) >> 10; |
216 | | |
217 | 94.5M | pu1_dst2[col] = CLIP_U8(tmp); |
218 | 94.5M | pu1_dst1[col] = CLIP_U8((pi4_pred[col] + 16) >> 5); |
219 | 94.5M | } |
220 | 5.60M | pi4_pred += i4_pred_strd; |
221 | 5.60M | pu1_dst2 += dst_strd; |
222 | 5.60M | pu1_dst1 += dst_strd; |
223 | 5.60M | } |
224 | 330k | } |
225 | | |