/src/tesseract/src/lstm/networkio.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: networkio.cpp |
3 | | // Description: Network input/output data, allowing float/int implementations. |
4 | | // Author: Ray Smith |
5 | | // |
6 | | // (C) Copyright 2014, Google Inc. |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | // See the License for the specific language governing permissions and |
15 | | // limitations under the License. |
16 | | /////////////////////////////////////////////////////////////////////// |
17 | | |
18 | | #include "networkio.h" |
19 | | #include <cfloat> // for FLT_MAX |
20 | | #include <cmath> |
21 | | |
22 | | #include <allheaders.h> |
23 | | #include "functions.h" |
24 | | #include "statistc.h" |
25 | | #include "tprintf.h" |
26 | | |
27 | | namespace tesseract { |
28 | | |
29 | | // Minimum value to output for certainty. |
30 | | const float kMinCertainty = -20.0f; |
31 | | // Probability corresponding to kMinCertainty. |
32 | | const float kMinProb = std::exp(kMinCertainty); |
33 | | |
34 | | // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim. |
35 | 0 | void NetworkIO::Resize2d(bool int_mode, int width, int num_features) { |
36 | 0 | stride_map_ = StrideMap(); |
37 | 0 | int_mode_ = int_mode; |
38 | 0 | if (int_mode_) { |
39 | 0 | i_.ResizeNoInit(width, num_features, GetPadding(num_features)); |
40 | 0 | } else { |
41 | 0 | f_.ResizeNoInit(width, num_features); |
42 | 0 | } |
43 | 0 | } |
44 | | |
45 | | // Resizes to a specific stride_map. |
46 | 3.19M | void NetworkIO::ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features) { |
47 | | // If this method crashes with this == nullptr, |
48 | | // it most likely got here through an uninitialized scratch element, |
49 | | // ie call NetworkScratch::IO::Resizexxx() not NetworkIO::Resizexxx()!! |
50 | 3.19M | stride_map_ = stride_map; |
51 | 3.19M | int_mode_ = int_mode; |
52 | 3.19M | if (int_mode_) { |
53 | 3.00M | i_.ResizeNoInit(stride_map.Width(), num_features, GetPadding(num_features)); |
54 | 3.00M | } else { |
55 | 187k | f_.ResizeNoInit(stride_map.Width(), num_features); |
56 | 187k | } |
57 | 3.19M | ZeroInvalidElements(); |
58 | 3.19M | } |
59 | | |
60 | | // Shrinks image size by x_scale,y_scale, and use given number of features. |
61 | 187k | void NetworkIO::ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features) { |
62 | 187k | StrideMap stride_map = src.stride_map_; |
63 | 187k | stride_map.ScaleXY(x_scale, y_scale); |
64 | 187k | ResizeToMap(src.int_mode_, stride_map, num_features); |
65 | 187k | } |
66 | | |
67 | | // Resizes to just 1 x-coord, whatever the input. |
68 | 187k | void NetworkIO::ResizeXTo1(const NetworkIO &src, int num_features) { |
69 | 187k | StrideMap stride_map = src.stride_map_; |
70 | 187k | stride_map.ReduceWidthTo1(); |
71 | 187k | ResizeToMap(src.int_mode_, stride_map, num_features); |
72 | 187k | } |
73 | | |
74 | | // Initialize all the array to zero. |
75 | 0 | void NetworkIO::Zero() { |
76 | 0 | int width = Width(); |
77 | | // Zero out the everything. Column-by-column in case it is aligned. |
78 | 0 | for (int t = 0; t < width; ++t) { |
79 | 0 | ZeroTimeStep(t); |
80 | 0 | } |
81 | 0 | } |
82 | | |
83 | | // Initializes to zero all elements of the array that do not correspond to |
84 | | // valid image positions. (If a batch of different-sized images are packed |
85 | | // together, then there will be padding pixels.) |
86 | 3.56M | void NetworkIO::ZeroInvalidElements() { |
87 | 3.56M | int num_features = NumFeatures(); |
88 | 3.56M | int full_width = stride_map_.Size(FD_WIDTH); |
89 | 3.56M | int full_height = stride_map_.Size(FD_HEIGHT); |
90 | 3.56M | StrideMap::Index b_index(stride_map_); |
91 | 3.56M | do { |
92 | 3.56M | int end_x = b_index.MaxIndexOfDim(FD_WIDTH) + 1; |
93 | 3.56M | if (end_x < full_width) { |
94 | | // The width is small, so fill for every valid y. |
95 | 0 | StrideMap::Index y_index(b_index); |
96 | 0 | int fill_size = num_features * (full_width - end_x); |
97 | 0 | do { |
98 | 0 | StrideMap::Index z_index(y_index); |
99 | 0 | z_index.AddOffset(end_x, FD_WIDTH); |
100 | 0 | if (int_mode_) { |
101 | 0 | ZeroVector(fill_size, i_[z_index.t()]); |
102 | 0 | } else { |
103 | 0 | ZeroVector(fill_size, f_[z_index.t()]); |
104 | 0 | } |
105 | 0 | } while (y_index.AddOffset(1, FD_HEIGHT)); |
106 | 0 | } |
107 | 3.56M | int end_y = b_index.MaxIndexOfDim(FD_HEIGHT) + 1; |
108 | 3.56M | if (end_y < full_height) { |
109 | | // The height is small, so fill in the space in one go. |
110 | 0 | StrideMap::Index y_index(b_index); |
111 | 0 | y_index.AddOffset(end_y, FD_HEIGHT); |
112 | 0 | int fill_size = num_features * full_width * (full_height - end_y); |
113 | 0 | if (int_mode_) { |
114 | 0 | ZeroVector(fill_size, i_[y_index.t()]); |
115 | 0 | } else { |
116 | 0 | ZeroVector(fill_size, f_[y_index.t()]); |
117 | 0 | } |
118 | 0 | } |
119 | 3.56M | } while (b_index.AddOffset(1, FD_BATCH)); |
120 | 3.56M | } |
121 | | |
122 | | // Helper computes a black point and white point to contrast-enhance an image. |
123 | | // The computation is based on the assumption that the image is of a single line |
124 | | // of text, so a horizontal line through the middle of the image passes through |
125 | | // at least some of it, so local minima and maxima are a good proxy for black |
126 | | // and white pixel samples. |
127 | 187k | static void ComputeBlackWhite(Image pix, float *black, float *white) { |
128 | 187k | int width = pixGetWidth(pix); |
129 | 187k | int height = pixGetHeight(pix); |
130 | 187k | STATS mins(0, 255), maxes(0, 255); |
131 | 187k | if (width >= 3) { |
132 | 187k | int y = height / 2; |
133 | 187k | l_uint32 *line = pixGetData(pix) + pixGetWpl(pix) * y; |
134 | 187k | int prev = GET_DATA_BYTE(line, 0); |
135 | 187k | int curr = GET_DATA_BYTE(line, 1); |
136 | 7.71M | for (int x = 1; x + 1 < width; ++x) { |
137 | 7.53M | int next = GET_DATA_BYTE(line, x + 1); |
138 | 7.53M | if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) { |
139 | | // Local minimum. |
140 | 1.46M | mins.add(curr, 1); |
141 | 1.46M | } |
142 | 7.53M | if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) { |
143 | | // Local maximum. |
144 | 1.46M | maxes.add(curr, 1); |
145 | 1.46M | } |
146 | 7.53M | prev = curr; |
147 | 7.53M | curr = next; |
148 | 7.53M | } |
149 | 187k | } |
150 | 187k | if (mins.get_total() == 0) { |
151 | 15.1k | mins.add(0, 1); |
152 | 15.1k | } |
153 | 187k | if (maxes.get_total() == 0) { |
154 | 14.7k | maxes.add(255, 1); |
155 | 14.7k | } |
156 | 187k | *black = mins.ile(0.25); |
157 | 187k | *white = maxes.ile(0.75); |
158 | 187k | } |
159 | | |
160 | | // Sets up the array from the given image, using the currently set int_mode_. |
161 | | // If the image width doesn't match the shape, the image is truncated or padded |
162 | | // with noise to match. |
163 | 187k | void NetworkIO::FromPix(const StaticShape &shape, const Image pix, TRand *randomizer) { |
164 | 187k | std::vector<Image> pixes(1, pix); |
165 | 187k | FromPixes(shape, pixes, randomizer); |
166 | 187k | } |
167 | | |
168 | | // Sets up the array from the given set of images, using the currently set |
169 | | // int_mode_. If the image width doesn't match the shape, the images are |
170 | | // truncated or padded with noise to match. |
171 | | void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<Image> &pixes, |
172 | 187k | TRand *randomizer) { |
173 | 187k | int target_height = shape.height(); |
174 | 187k | int target_width = shape.width(); |
175 | 187k | std::vector<std::pair<int, int>> h_w_pairs; |
176 | 187k | for (auto &&pix : pixes) { |
177 | 187k | Image var_pix = pix; |
178 | 187k | int width = pixGetWidth(var_pix); |
179 | 187k | if (target_width != 0) { |
180 | 0 | width = target_width; |
181 | 0 | } |
182 | 187k | int height = pixGetHeight(var_pix); |
183 | 187k | if (target_height != 0) { |
184 | 187k | height = target_height; |
185 | 187k | } |
186 | 187k | h_w_pairs.emplace_back(height, width); |
187 | 187k | } |
188 | 187k | stride_map_.SetStride(h_w_pairs); |
189 | 187k | ResizeToMap(int_mode(), stride_map_, shape.depth()); |
190 | | // Iterate over the images again to copy the data. |
191 | 375k | for (size_t b = 0; b < pixes.size(); ++b) { |
192 | 187k | Image pix = pixes[b]; |
193 | 187k | float black = 0.0f, white = 255.0f; |
194 | 187k | if (shape.depth() != 3) { |
195 | 187k | ComputeBlackWhite(pix, &black, &white); |
196 | 187k | } |
197 | 187k | float contrast = (white - black) / 2.0f; |
198 | 187k | if (contrast <= 0.0f) { |
199 | 17 | contrast = 1.0f; |
200 | 17 | } |
201 | 187k | if (shape.height() == 1) { |
202 | 0 | Copy1DGreyImage(b, pix, black, contrast, randomizer); |
203 | 187k | } else { |
204 | 187k | Copy2DImage(b, pix, black, contrast, randomizer); |
205 | 187k | } |
206 | 187k | } |
207 | 187k | } |
208 | | |
209 | | // Copies the given pix to *this at the given batch index, stretching and |
210 | | // clipping the pixel values so that [black, black + 2*contrast] maps to the |
211 | | // dynamic range of *this, ie [-1,1] for a float and (-127,127) for int. |
212 | | // This is a 2-d operation in the sense that the output depth is the number |
213 | | // of input channels, the height is the height of the image, and the width |
214 | | // is the width of the image, or truncated/padded with noise if the width |
215 | | // is a fixed size. |
216 | 187k | void NetworkIO::Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer) { |
217 | 187k | int width = pixGetWidth(pix); |
218 | 187k | int height = pixGetHeight(pix); |
219 | 187k | int wpl = pixGetWpl(pix); |
220 | 187k | StrideMap::Index index(stride_map_); |
221 | 187k | index.AddOffset(batch, FD_BATCH); |
222 | 187k | int t = index.t(); |
223 | 187k | int target_height = stride_map_.Size(FD_HEIGHT); |
224 | 187k | int target_width = stride_map_.Size(FD_WIDTH); |
225 | 187k | int num_features = NumFeatures(); |
226 | 187k | bool color = num_features == 3; |
227 | 187k | if (width > target_width) { |
228 | 0 | width = target_width; |
229 | 0 | } |
230 | 187k | uint32_t *line = pixGetData(pix); |
231 | 6.94M | for (int y = 0; y < target_height; ++y, line += wpl) { |
232 | 6.75M | int x = 0; |
233 | 6.75M | if (y < height) { |
234 | 291M | for (x = 0; x < width; ++x, ++t) { |
235 | 284M | if (color) { |
236 | 0 | int f = 0; |
237 | 0 | for (int c = COLOR_RED; c <= COLOR_BLUE; ++c) { |
238 | 0 | int pixel = GET_DATA_BYTE(line + x, c); |
239 | 0 | SetPixel(t, f++, pixel, black, contrast); |
240 | 0 | } |
241 | 284M | } else { |
242 | 284M | int pixel = GET_DATA_BYTE(line, x); |
243 | 284M | SetPixel(t, 0, pixel, black, contrast); |
244 | 284M | } |
245 | 284M | } |
246 | 6.75M | } |
247 | 6.75M | for (; x < target_width; ++x) { |
248 | 0 | Randomize(t++, 0, num_features, randomizer); |
249 | 0 | } |
250 | 6.75M | } |
251 | 187k | } |
252 | | |
253 | | // Copies the given pix to *this at the given batch index, as Copy2DImage |
254 | | // above, except that the output depth is the height of the input image, the |
255 | | // output height is 1, and the output width as for Copy2DImage. |
256 | | // The image is thus treated as a 1-d set of vertical pixel strips. |
257 | | void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contrast, |
258 | 0 | TRand *randomizer) { |
259 | 0 | int width = pixGetWidth(pix); |
260 | 0 | int height = pixGetHeight(pix); |
261 | 0 | ASSERT_HOST(height == NumFeatures()); |
262 | 0 | int wpl = pixGetWpl(pix); |
263 | 0 | StrideMap::Index index(stride_map_); |
264 | 0 | index.AddOffset(batch, FD_BATCH); |
265 | 0 | int t = index.t(); |
266 | 0 | int target_width = stride_map_.Size(FD_WIDTH); |
267 | 0 | if (width > target_width) { |
268 | 0 | width = target_width; |
269 | 0 | } |
270 | 0 | int x; |
271 | 0 | for (x = 0; x < width; ++x, ++t) { |
272 | 0 | for (int y = 0; y < height; ++y) { |
273 | 0 | uint32_t *line = pixGetData(pix) + wpl * y; |
274 | 0 | int pixel = GET_DATA_BYTE(line, x); |
275 | 0 | SetPixel(t, y, pixel, black, contrast); |
276 | 0 | } |
277 | 0 | } |
278 | 0 | for (; x < target_width; ++x) { |
279 | 0 | Randomize(t++, 0, height, randomizer); |
280 | 0 | } |
281 | 0 | } |
282 | | |
283 | | // Helper stores the pixel value in i_ or f_ according to int_mode_. |
284 | | // t: is the index from the StrideMap corresponding to the current |
285 | | // [batch,y,x] position |
286 | | // f: is the index into the depth/channel |
287 | | // pixel: the value of the pixel from the image (in one channel) |
288 | | // black: the pixel value to map to the lowest of the range of *this |
289 | | // contrast: the range of pixel values to stretch to half the range of *this. |
290 | 284M | void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) { |
291 | 284M | float float_pixel = (pixel - black) / contrast - 1.0f; |
292 | 284M | if (int_mode_) { |
293 | 284M | i_[t][f] = ClipToRange<int>(IntCastRounded((INT8_MAX + 1) * float_pixel), -INT8_MAX, INT8_MAX); |
294 | 284M | } else { |
295 | 0 | f_[t][f] = float_pixel; |
296 | 0 | } |
297 | 284M | } |
298 | | |
299 | | // Converts the array to a Pix. Must be pixDestroyed after use. |
300 | 0 | Image NetworkIO::ToPix() const { |
301 | | // Count the width of the image, and find the max multiplication factor. |
302 | 0 | int im_width = stride_map_.Size(FD_WIDTH); |
303 | 0 | int im_height = stride_map_.Size(FD_HEIGHT); |
304 | 0 | int num_features = NumFeatures(); |
305 | 0 | int feature_factor = 1; |
306 | 0 | if (num_features == 3) { |
307 | | // Special hack for color. |
308 | 0 | num_features = 1; |
309 | 0 | feature_factor = 3; |
310 | 0 | } |
311 | 0 | Image pix = pixCreate(im_width, im_height * num_features, 32); |
312 | 0 | StrideMap::Index index(stride_map_); |
313 | 0 | do { |
314 | 0 | int im_x = index.index(FD_WIDTH); |
315 | 0 | int top_im_y = index.index(FD_HEIGHT); |
316 | 0 | int im_y = top_im_y; |
317 | 0 | int t = index.t(); |
318 | 0 | if (int_mode_) { |
319 | 0 | const int8_t *features = i_[t]; |
320 | 0 | for (int y = 0; y < num_features; ++y, im_y += im_height) { |
321 | 0 | int pixel = features[y * feature_factor]; |
322 | | // 1 or 2 features use greyscale. |
323 | 0 | int red = ClipToRange<int>(pixel + 128, 0, 255); |
324 | 0 | int green = red, blue = red; |
325 | 0 | if (feature_factor == 3) { |
326 | | // With 3 features assume RGB color. |
327 | 0 | green = ClipToRange<int>(features[y * feature_factor + 1] + 128, 0, 255); |
328 | 0 | blue = ClipToRange<int>(features[y * feature_factor + 2] + 128, 0, 255); |
329 | 0 | } else if (num_features > 3) { |
330 | | // More than 3 features use false yellow/blue color, assuming a signed |
331 | | // input in the range [-1,1]. |
332 | 0 | red = abs(pixel) * 2; |
333 | 0 | if (pixel >= 0) { |
334 | 0 | green = red; |
335 | 0 | blue = 0; |
336 | 0 | } else { |
337 | 0 | blue = red; |
338 | 0 | green = red = 0; |
339 | 0 | } |
340 | 0 | } |
341 | 0 | pixSetPixel(pix, im_x, im_y, |
342 | 0 | (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT)); |
343 | 0 | } |
344 | 0 | } else { |
345 | 0 | const float *features = f_[t]; |
346 | 0 | for (int y = 0; y < num_features; ++y, im_y += im_height) { |
347 | 0 | float pixel = features[y * feature_factor]; |
348 | | // 1 or 2 features use greyscale. |
349 | 0 | int red = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); |
350 | 0 | int green = red, blue = red; |
351 | 0 | if (feature_factor == 3) { |
352 | | // With 3 features assume RGB color. |
353 | 0 | pixel = features[y * feature_factor + 1]; |
354 | 0 | green = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); |
355 | 0 | pixel = features[y * feature_factor + 2]; |
356 | 0 | blue = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255); |
357 | 0 | } else if (num_features > 3) { |
358 | | // More than 3 features use false yellow/blue color, assuming a signed |
359 | | // input in the range [-1,1]. |
360 | 0 | red = ClipToRange<int>(IntCastRounded(std::fabs(pixel) * 255), 0, 255); |
361 | 0 | if (pixel >= 0) { |
362 | 0 | green = red; |
363 | 0 | blue = 0; |
364 | 0 | } else { |
365 | 0 | blue = red; |
366 | 0 | green = red = 0; |
367 | 0 | } |
368 | 0 | } |
369 | 0 | pixSetPixel(pix, im_x, im_y, |
370 | 0 | (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT)); |
371 | 0 | } |
372 | 0 | } |
373 | 0 | } while (index.Increment()); |
374 | 0 | return pix; |
375 | 0 | } |
376 | | |
377 | | // Prints the first and last num timesteps of the array for each feature. |
378 | 0 | void NetworkIO::Print(int num) const { |
379 | 0 | int num_features = NumFeatures(); |
380 | 0 | for (int y = 0; y < num_features; ++y) { |
381 | 0 | for (int t = 0; t < Width(); ++t) { |
382 | 0 | if (num == 0 || t < num || t + num >= Width()) { |
383 | 0 | if (int_mode_) { |
384 | 0 | tprintf(" %g", static_cast<float>(i_[t][y]) / INT8_MAX); |
385 | 0 | } else { |
386 | 0 | tprintf(" %g", f_[t][y]); |
387 | 0 | } |
388 | 0 | } |
389 | 0 | } |
390 | 0 | tprintf("\n"); |
391 | 0 | } |
392 | 0 | } |
393 | | |
394 | | // Copies a single time step from src. |
395 | 69.5M | void NetworkIO::CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t) { |
396 | 69.5M | ASSERT_HOST(int_mode_ == src.int_mode_); |
397 | 69.5M | if (int_mode_) { |
398 | 69.5M | memcpy(i_[dest_t], src.i_[src_t], i_.dim2() * sizeof(i_[0][0])); |
399 | 69.5M | } else { |
400 | 0 | memcpy(f_[dest_t], src.f_[src_t], f_.dim2() * sizeof(f_[0][0])); |
401 | 0 | } |
402 | 69.5M | } |
403 | | |
404 | | // Copies a part of single time step from src. |
405 | | void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features, |
406 | 2.51G | const NetworkIO &src, int src_t, int src_offset) { |
407 | 2.51G | ASSERT_HOST(int_mode_ == src.int_mode_); |
408 | 2.51G | if (int_mode_) { |
409 | 2.51G | memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset, num_features * sizeof(i_[0][0])); |
410 | 2.51G | } else { |
411 | 0 | memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset, num_features * sizeof(f_[0][0])); |
412 | 0 | } |
413 | 2.51G | } |
414 | | |
415 | | // Sets the given range to random values. |
416 | 60.2M | void NetworkIO::Randomize(int t, int offset, int num_features, TRand *randomizer) { |
417 | 60.2M | if (int_mode_) { |
418 | 60.2M | int8_t *line = i_[t] + offset; |
419 | 147M | for (int i = 0; i < num_features; ++i) { |
420 | 87.2M | line[i] = IntCastRounded(randomizer->SignedRand(INT8_MAX)); |
421 | 87.2M | } |
422 | 60.2M | } else { |
423 | | // float mode. |
424 | 0 | float *line = f_[t] + offset; |
425 | 0 | for (int i = 0; i < num_features; ++i) { |
426 | 0 | line[i] = randomizer->SignedRand(1.0); |
427 | 0 | } |
428 | 0 | } |
429 | 60.2M | } |
430 | | |
431 | | // Helper returns the label and score of the best choice over a range. |
432 | | int NetworkIO::BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating, |
433 | 0 | float *certainty) const { |
434 | 0 | if (t_end <= t_start) { |
435 | 0 | return -1; |
436 | 0 | } |
437 | 0 | int max_char = -1; |
438 | 0 | float min_score = 0.0f; |
439 | 0 | for (int c = 0; c < NumFeatures(); ++c) { |
440 | 0 | if (c == not_this || c == null_ch) { |
441 | 0 | continue; |
442 | 0 | } |
443 | 0 | ScoresOverRange(t_start, t_end, c, null_ch, rating, certainty); |
444 | 0 | if (max_char < 0 || *rating < min_score) { |
445 | 0 | min_score = *rating; |
446 | 0 | max_char = c; |
447 | 0 | } |
448 | 0 | } |
449 | 0 | ScoresOverRange(t_start, t_end, max_char, null_ch, rating, certainty); |
450 | 0 | return max_char; |
451 | 0 | } |
452 | | |
453 | | // Helper returns the rating and certainty of the choice over a range in output. |
454 | | void NetworkIO::ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating, |
455 | 0 | float *certainty) const { |
456 | 0 | ASSERT_HOST(!int_mode_); |
457 | 0 | *rating = 0.0f; |
458 | 0 | *certainty = 0.0f; |
459 | 0 | if (t_end <= t_start || t_end <= 0) { |
460 | 0 | return; |
461 | 0 | } |
462 | 0 | float ratings[3] = {0.0f, 0.0f, 0.0f}; |
463 | 0 | float certs[3] = {0.0f, 0.0f, 0.0f}; |
464 | 0 | for (int t = t_start; t < t_end; ++t) { |
465 | 0 | const float *line = f_[t]; |
466 | 0 | float score = ProbToCertainty(line[choice]); |
467 | 0 | float zero = ProbToCertainty(line[null_ch]); |
468 | 0 | if (t == t_start) { |
469 | 0 | ratings[2] = FLT_MAX; |
470 | 0 | ratings[1] = -score; |
471 | 0 | certs[1] = score; |
472 | 0 | } else { |
473 | 0 | for (int i = 2; i >= 1; --i) { |
474 | 0 | if (ratings[i] > ratings[i - 1]) { |
475 | 0 | ratings[i] = ratings[i - 1]; |
476 | 0 | certs[i] = certs[i - 1]; |
477 | 0 | } |
478 | 0 | } |
479 | 0 | ratings[2] -= zero; |
480 | 0 | if (zero < certs[2]) { |
481 | 0 | certs[2] = zero; |
482 | 0 | } |
483 | 0 | ratings[1] -= score; |
484 | 0 | if (score < certs[1]) { |
485 | 0 | certs[1] = score; |
486 | 0 | } |
487 | 0 | } |
488 | 0 | ratings[0] -= zero; |
489 | 0 | if (zero < certs[0]) { |
490 | 0 | certs[0] = zero; |
491 | 0 | } |
492 | 0 | } |
493 | 0 | int best_i = ratings[2] < ratings[1] ? 2 : 1; |
494 | 0 | *rating = ratings[best_i] + t_end - t_start; |
495 | 0 | *certainty = certs[best_i]; |
496 | 0 | } |
497 | | |
498 | | // Returns the index (label) of the best value at the given timestep, |
499 | | // excluding not_this and not_that, and if not null, sets the score to the |
500 | | // log of the corresponding value. |
501 | 2.57M | int NetworkIO::BestLabel(int t, int not_this, int not_that, float *score) const { |
502 | 2.57M | ASSERT_HOST(!int_mode_); |
503 | 2.57M | int best_index = -1; |
504 | 2.57M | float best_score = -FLT_MAX; |
505 | 2.57M | const float *line = f_[t]; |
506 | 288M | for (int i = 0; i < f_.dim2(); ++i) { |
507 | 285M | if (line[i] > best_score && i != not_this && i != not_that) { |
508 | 12.0M | best_score = line[i]; |
509 | 12.0M | best_index = i; |
510 | 12.0M | } |
511 | 285M | } |
512 | 2.57M | if (score != nullptr) { |
513 | 0 | *score = ProbToCertainty(best_score); |
514 | 0 | } |
515 | 2.57M | return best_index; |
516 | 2.57M | } |
517 | | |
518 | | // Returns the best start position out of [start, end) (into which all labels |
519 | | // must fit) to obtain the highest cumulative score for the given labels. |
520 | 0 | int NetworkIO::PositionOfBestMatch(const std::vector<int> &labels, int start, int end) const { |
521 | 0 | int length = labels.size(); |
522 | 0 | int last_start = end - length; |
523 | 0 | int best_start = -1; |
524 | 0 | TFloat best_score = 0; |
525 | 0 | for (int s = start; s <= last_start; ++s) { |
526 | 0 | TFloat score = ScoreOfLabels(labels, s); |
527 | 0 | if (score > best_score || best_start < 0) { |
528 | 0 | best_score = score; |
529 | 0 | best_start = s; |
530 | 0 | } |
531 | 0 | } |
532 | 0 | return best_start; |
533 | 0 | } |
534 | | |
535 | | // Returns the cumulative score of the given labels starting at start, and |
536 | | // using one label per time-step. |
537 | 0 | TFloat NetworkIO::ScoreOfLabels(const std::vector<int> &labels, int start) const { |
538 | 0 | int length = labels.size(); |
539 | 0 | TFloat score = 0; |
540 | 0 | for (int i = 0; i < length; ++i) { |
541 | 0 | score += f_(start + i, labels[i]); |
542 | 0 | } |
543 | 0 | return score; |
544 | 0 | } |
545 | | |
546 | | // Helper function sets all the outputs for a single timestep, such that |
547 | | // label has value ok_score, and the other labels share 1 - ok_score. |
548 | 0 | void NetworkIO::SetActivations(int t, int label, float ok_score) { |
549 | 0 | ASSERT_HOST(!int_mode_); |
550 | 0 | int num_classes = NumFeatures(); |
551 | 0 | float bad_score = (1.0f - ok_score) / (num_classes - 1); |
552 | 0 | float *targets = f_[t]; |
553 | 0 | for (int i = 0; i < num_classes; ++i) { |
554 | 0 | targets[i] = bad_score; |
555 | 0 | } |
556 | 0 | targets[label] = ok_score; |
557 | 0 | } |
558 | | |
559 | | // Modifies the values, only if needed, so that the given label is |
560 | | // the winner at the given time step t. |
561 | 0 | void NetworkIO::EnsureBestLabel(int t, int label) { |
562 | 0 | ASSERT_HOST(!int_mode_); |
563 | 0 | if (BestLabel(t, nullptr) != label) { |
564 | | // Output value needs enhancing. Third all the other elements and add the |
565 | | // remainder to best_label. |
566 | 0 | int num_classes = NumFeatures(); |
567 | 0 | float *targets = f_[t]; |
568 | 0 | for (int c = 0; c < num_classes; ++c) { |
569 | 0 | if (c == label) { |
570 | 0 | targets[c] += (1.0 - targets[c]) * (2 / 3.0); |
571 | 0 | } else { |
572 | 0 | targets[c] /= 3.0; |
573 | 0 | } |
574 | 0 | } |
575 | 0 | } |
576 | 0 | } |
577 | | |
578 | | // Helper function converts prob to certainty taking the minimum into account. |
579 | | /* static */ |
580 | 36.3M | float NetworkIO::ProbToCertainty(float prob) { |
581 | 36.3M | return prob > kMinProb ? std::log(prob) : kMinCertainty; |
582 | 36.3M | } |
583 | | |
584 | | // Returns true if there is any bad value that is suspiciously like a GT |
585 | | // error. Assuming that *this is the difference(gradient) between target |
586 | | // and forward output, returns true if there is a large negative value |
587 | | // (correcting a very confident output) for which there is no corresponding |
588 | | // positive value in an adjacent timestep for the same feature index. This |
589 | | // allows the box-truthed samples to make fine adjustments to position while |
590 | | // stopping other disagreements of confident output with ground truth. |
591 | 0 | bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const { |
592 | 0 | int num_features = NumFeatures(); |
593 | 0 | for (int t = 0; t < Width(); ++t) { |
594 | 0 | const float *features = f_[t]; |
595 | 0 | for (int y = 0; y < num_features; ++y) { |
596 | 0 | float grad = features[y]; |
597 | 0 | if (grad < -confidence_thr) { |
598 | | // Correcting strong output. Check for movement. |
599 | 0 | if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) && |
600 | 0 | (t + 1 == Width() || f_[t + 1][y] < confidence_thr / 2)) { |
601 | 0 | return true; // No strong positive on either side. |
602 | 0 | } |
603 | 0 | } |
604 | 0 | } |
605 | 0 | } |
606 | 0 | return false; |
607 | 0 | } |
608 | | |
609 | | // Reads a single timestep to floats in the range [-1, 1]. |
610 | 0 | void NetworkIO::ReadTimeStep(int t, TFloat *output) const { |
611 | 0 | if (int_mode_) { |
612 | 0 | const int8_t *line = i_[t]; |
613 | 0 | for (int i = 0; i < i_.dim2(); ++i) { |
614 | 0 | output[i] = static_cast<TFloat>(line[i]) / INT8_MAX; |
615 | 0 | } |
616 | 0 | } else { |
617 | 0 | const float *line = f_[t]; |
618 | 0 | for (int i = 0; i < f_.dim2(); ++i) { |
619 | 0 | output[i] = static_cast<TFloat>(line[i]); |
620 | 0 | } |
621 | 0 | } |
622 | 0 | } |
623 | | |
624 | | // Adds a single timestep to floats. |
625 | 0 | void NetworkIO::AddTimeStep(int t, TFloat *inout) const { |
626 | 0 | int num_features = NumFeatures(); |
627 | 0 | if (int_mode_) { |
628 | 0 | const int8_t *line = i_[t]; |
629 | 0 | for (int i = 0; i < num_features; ++i) { |
630 | 0 | inout[i] += static_cast<TFloat>(line[i]) / INT8_MAX; |
631 | 0 | } |
632 | 0 | } else { |
633 | 0 | const float *line = f_[t]; |
634 | 0 | for (int i = 0; i < num_features; ++i) { |
635 | 0 | inout[i] += line[i]; |
636 | 0 | } |
637 | 0 | } |
638 | 0 | } |
639 | | |
640 | | // Adds part of a single timestep to floats. |
641 | 0 | void NetworkIO::AddTimeStepPart(int t, int offset, int num_features, float *inout) const { |
642 | 0 | if (int_mode_) { |
643 | 0 | const int8_t *line = i_[t] + offset; |
644 | 0 | for (int i = 0; i < num_features; ++i) { |
645 | 0 | inout[i] += static_cast<float>(line[i]) / INT8_MAX; |
646 | 0 | } |
647 | 0 | } else { |
648 | 0 | const float *line = f_[t] + offset; |
649 | 0 | for (int i = 0; i < num_features; ++i) { |
650 | 0 | inout[i] += line[i]; |
651 | 0 | } |
652 | 0 | } |
653 | 0 | } |
654 | | |
655 | | // Writes a single timestep from floats in the range [-1, 1]. |
656 | 297M | void NetworkIO::WriteTimeStep(int t, const TFloat *input) { |
657 | 297M | WriteTimeStepPart(t, 0, NumFeatures(), input); |
658 | 297M | } |
659 | | |
660 | | // Writes a single timestep from floats in the range [-1, 1] writing only |
661 | | // num_features elements of input to (*this)[t], starting at offset. |
662 | 336M | void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input) { |
663 | 336M | if (int_mode_) { |
664 | 333M | int8_t *line = i_[t] + offset; |
665 | 10.6G | for (int i = 0; i < num_features; ++i) { |
666 | 10.3G | line[i] = ClipToRange<int>(IntCastRounded(input[i] * INT8_MAX), -INT8_MAX, INT8_MAX); |
667 | 10.3G | } |
668 | 333M | } else { |
669 | 2.57M | float *line = f_[t] + offset; |
670 | 288M | for (int i = 0; i < num_features; ++i) { |
671 | 285M | line[i] = static_cast<float>(input[i]); |
672 | 285M | } |
673 | 2.57M | } |
674 | 336M | } |
675 | | |
676 | | // Maxpools a single time step from src. |
677 | 278M | void NetworkIO::MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line) { |
678 | 278M | ASSERT_HOST(int_mode_ == src.int_mode_); |
679 | 278M | if (int_mode_) { |
680 | 278M | int dim = i_.dim2(); |
681 | 278M | int8_t *dest_line = i_[dest_t]; |
682 | 278M | const int8_t *src_line = src.i_[src_t]; |
683 | 4.72G | for (int i = 0; i < dim; ++i) { |
684 | 4.45G | if (dest_line[i] < src_line[i]) { |
685 | 811M | dest_line[i] = src_line[i]; |
686 | 811M | max_line[i] = src_t; |
687 | 811M | } |
688 | 4.45G | } |
689 | 278M | } else { |
690 | 0 | int dim = f_.dim2(); |
691 | 0 | float *dest_line = f_[dest_t]; |
692 | 0 | const float *src_line = src.f_[src_t]; |
693 | 0 | for (int i = 0; i < dim; ++i) { |
694 | 0 | if (dest_line[i] < src_line[i]) { |
695 | 0 | dest_line[i] = src_line[i]; |
696 | 0 | max_line[i] = src_t; |
697 | 0 | } |
698 | 0 | } |
699 | 0 | } |
700 | 278M | } |
701 | | |
702 | | // Runs maxpool backward, using maxes to index timesteps in *this. |
703 | 0 | void NetworkIO::MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY<int> &maxes) { |
704 | 0 | ASSERT_HOST(!int_mode_); |
705 | 0 | Zero(); |
706 | 0 | StrideMap::Index index(fwd.stride_map_); |
707 | 0 | do { |
708 | 0 | int t = index.t(); |
709 | 0 | const int *max_line = maxes[t]; |
710 | 0 | const float *fwd_line = fwd.f_[t]; |
711 | 0 | int num_features = fwd.f_.dim2(); |
712 | 0 | for (int i = 0; i < num_features; ++i) { |
713 | 0 | f_[max_line[i]][i] = fwd_line[i]; |
714 | 0 | } |
715 | 0 | } while (index.Increment()); |
716 | 0 | } |
717 | | |
718 | | // Returns the min over time of the maxes over features of the outputs. |
719 | 0 | float NetworkIO::MinOfMaxes() const { |
720 | 0 | float min_max = 0.0f; |
721 | 0 | int width = Width(); |
722 | 0 | int num_features = NumFeatures(); |
723 | 0 | for (int t = 0; t < width; ++t) { |
724 | 0 | float max_value = -FLT_MAX; |
725 | 0 | if (int_mode_) { |
726 | 0 | const int8_t *column = i_[t]; |
727 | 0 | for (int i = 0; i < num_features; ++i) { |
728 | 0 | if (column[i] > max_value) { |
729 | 0 | max_value = column[i]; |
730 | 0 | } |
731 | 0 | } |
732 | 0 | } else { |
733 | 0 | const float *column = f_[t]; |
734 | 0 | for (int i = 0; i < num_features; ++i) { |
735 | 0 | if (column[i] > max_value) { |
736 | 0 | max_value = column[i]; |
737 | 0 | } |
738 | 0 | } |
739 | 0 | } |
740 | 0 | if (t == 0 || max_value < min_max) { |
741 | 0 | min_max = max_value; |
742 | 0 | } |
743 | 0 | } |
744 | 0 | return min_max; |
745 | 0 | } |
746 | | |
747 | | // Computes combined results for a combiner that chooses between an existing |
748 | | // input and itself, with an additional output to indicate the choice. |
749 | 0 | void NetworkIO::CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output) { |
750 | 0 | int no = base_output.NumFeatures(); |
751 | 0 | ASSERT_HOST(combiner_output.NumFeatures() == no + 1); |
752 | 0 | Resize(base_output, no); |
753 | 0 | int width = Width(); |
754 | 0 | if (int_mode_) { |
755 | | // Number of outputs from base and final result. |
756 | 0 | for (int t = 0; t < width; ++t) { |
757 | 0 | int8_t *out_line = i_[t]; |
758 | 0 | const int8_t *base_line = base_output.i_[t]; |
759 | 0 | const int8_t *comb_line = combiner_output.i_[t]; |
760 | 0 | float base_weight = static_cast<float>(comb_line[no]) / INT8_MAX; |
761 | 0 | float boost_weight = 1.0f - base_weight; |
762 | 0 | for (int i = 0; i < no; ++i) { |
763 | 0 | out_line[i] = IntCastRounded(base_line[i] * base_weight + comb_line[i] * boost_weight); |
764 | 0 | } |
765 | 0 | } |
766 | 0 | } else { |
767 | 0 | for (int t = 0; t < width; ++t) { |
768 | 0 | float *out_line = f_[t]; |
769 | 0 | const float *base_line = base_output.f_[t]; |
770 | 0 | const float *comb_line = combiner_output.f_[t]; |
771 | 0 | float base_weight = comb_line[no]; |
772 | 0 | float boost_weight = 1.0f - base_weight; |
773 | 0 | for (int i = 0; i < no; ++i) { |
774 | 0 | out_line[i] = base_line[i] * base_weight + comb_line[i] * boost_weight; |
775 | 0 | } |
776 | 0 | } |
777 | 0 | } |
778 | 0 | } |
779 | | |
780 | | // Computes deltas for a combiner that chooses between 2 sets of inputs. |
781 | 0 | void NetworkIO::ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output) { |
782 | 0 | ASSERT_HOST(!int_mode_); |
783 | | // Compute the deltas for the combiner. |
784 | 0 | int width = Width(); |
785 | 0 | int no = NumFeatures() - 1; |
786 | 0 | ASSERT_HOST(fwd_deltas.NumFeatures() == no); |
787 | 0 | ASSERT_HOST(base_output.NumFeatures() == no); |
788 | | // Number of outputs from base and final result. |
789 | 0 | for (int t = 0; t < width; ++t) { |
790 | 0 | const float *delta_line = fwd_deltas.f_[t]; |
791 | 0 | const float *base_line = base_output.f_[t]; |
792 | 0 | float *comb_line = f_[t]; |
793 | 0 | float base_weight = comb_line[no]; |
794 | 0 | float boost_weight = 1.0f - base_weight; |
795 | 0 | float max_base_delta = 0.0; |
796 | 0 | for (int i = 0; i < no; ++i) { |
797 | | // What did the combiner actually produce? |
798 | 0 | float output = base_line[i] * base_weight + comb_line[i] * boost_weight; |
799 | | // Reconstruct the target from the delta. |
800 | 0 | float comb_target = delta_line[i] + output; |
801 | 0 | comb_line[i] = comb_target - comb_line[i]; |
802 | 0 | float base_delta = std::fabs(comb_target - base_line[i]); |
803 | 0 | if (base_delta > max_base_delta) { |
804 | 0 | max_base_delta = base_delta; |
805 | 0 | } |
806 | 0 | } |
807 | 0 | if (max_base_delta >= 0.5) { |
808 | | // The base network got it wrong. The combiner should output the right |
809 | | // answer and 0 for the base network. |
810 | 0 | comb_line[no] = 0.0 - base_weight; |
811 | 0 | } else { |
812 | | // The base network was right. The combiner should flag that. |
813 | 0 | for (int i = 0; i < no; ++i) { |
814 | | // All other targets are 0. |
815 | 0 | if (comb_line[i] > 0.0) { |
816 | 0 | comb_line[i] -= 1.0; |
817 | 0 | } |
818 | 0 | } |
819 | 0 | comb_line[no] = 1.0 - base_weight; |
820 | 0 | } |
821 | 0 | } |
822 | 0 | } |
823 | | |
824 | | // Copies the array checking that the types match. |
825 | 0 | void NetworkIO::CopyAll(const NetworkIO &src) { |
826 | 0 | ASSERT_HOST(src.int_mode_ == int_mode_); |
827 | 0 | f_ = src.f_; |
828 | 0 | } |
829 | | |
830 | | // Checks that both are floats and adds the src array to *this. |
831 | 0 | void NetworkIO::AddAllToFloat(const NetworkIO &src) { |
832 | 0 | ASSERT_HOST(!int_mode_); |
833 | 0 | ASSERT_HOST(!src.int_mode_); |
834 | 0 | f_ += src.f_; |
835 | 0 | } |
836 | | |
837 | | // Subtracts the array from a float array. src must also be float. |
838 | 0 | void NetworkIO::SubtractAllFromFloat(const NetworkIO &src) { |
839 | 0 | ASSERT_HOST(!int_mode_); |
840 | 0 | ASSERT_HOST(!src.int_mode_); |
841 | 0 | f_ -= src.f_; |
842 | 0 | } |
843 | | |
844 | | // Copies src to *this, with maxabs normalization to match scale. |
845 | 0 | void NetworkIO::CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale) { |
846 | 0 | ASSERT_HOST(!int_mode_); |
847 | 0 | ASSERT_HOST(!src.int_mode_); |
848 | 0 | ASSERT_HOST(!scale.int_mode_); |
849 | 0 | float src_max = src.f_.MaxAbs(); |
850 | 0 | ASSERT_HOST(std::isfinite(src_max)); |
851 | 0 | float scale_max = scale.f_.MaxAbs(); |
852 | 0 | ASSERT_HOST(std::isfinite(scale_max)); |
853 | 0 | if (src_max > 0.0f) { |
854 | 0 | float factor = scale_max / src_max; |
855 | 0 | for (int t = 0; t < src.Width(); ++t) { |
856 | 0 | const float *src_ptr = src.f_[t]; |
857 | 0 | float *dest_ptr = f_[t]; |
858 | 0 | for (int i = 0; i < src.f_.dim2(); ++i) { |
859 | 0 | dest_ptr[i] = src_ptr[i] * factor; |
860 | 0 | } |
861 | 0 | } |
862 | 0 | } else { |
863 | 0 | f_.Clear(); |
864 | 0 | } |
865 | 0 | } |
866 | | |
867 | | // Copies src to *this with independent reversal of the y dimension. |
868 | 0 | void NetworkIO::CopyWithYReversal(const NetworkIO &src) { |
869 | 0 | int num_features = src.NumFeatures(); |
870 | 0 | Resize(src, num_features); |
871 | 0 | StrideMap::Index b_index(src.stride_map_); |
872 | 0 | do { |
873 | 0 | int width = b_index.MaxIndexOfDim(FD_WIDTH) + 1; |
874 | 0 | StrideMap::Index fwd_index(b_index); |
875 | 0 | StrideMap::Index rev_index(b_index); |
876 | 0 | rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_HEIGHT), FD_HEIGHT); |
877 | 0 | do { |
878 | 0 | int fwd_t = fwd_index.t(); |
879 | 0 | int rev_t = rev_index.t(); |
880 | 0 | for (int x = 0; x < width; ++x) { |
881 | 0 | CopyTimeStepFrom(rev_t++, src, fwd_t++); |
882 | 0 | } |
883 | 0 | } while (fwd_index.AddOffset(1, FD_HEIGHT) && rev_index.AddOffset(-1, FD_HEIGHT)); |
884 | 0 | } while (b_index.AddOffset(1, FD_BATCH)); |
885 | 0 | } |
886 | | |
887 | | // Copies src to *this with independent reversal of the x dimension. |
888 | 375k | void NetworkIO::CopyWithXReversal(const NetworkIO &src) { |
889 | 375k | int num_features = src.NumFeatures(); |
890 | 375k | Resize(src, num_features); |
891 | 375k | StrideMap::Index b_index(src.stride_map_); |
892 | 375k | do { |
893 | 375k | StrideMap::Index y_index(b_index); |
894 | 375k | do { |
895 | 375k | StrideMap::Index fwd_index(y_index); |
896 | 375k | StrideMap::Index rev_index(y_index); |
897 | 375k | rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_WIDTH), FD_WIDTH); |
898 | 5.15M | do { |
899 | 5.15M | CopyTimeStepFrom(rev_index.t(), src, fwd_index.t()); |
900 | 5.15M | } while (fwd_index.AddOffset(1, FD_WIDTH) && rev_index.AddOffset(-1, FD_WIDTH)); |
901 | 375k | } while (y_index.AddOffset(1, FD_HEIGHT)); |
902 | 375k | } while (b_index.AddOffset(1, FD_BATCH)); |
903 | 375k | } |
904 | | |
905 | | // Copies src to *this with independent transpose of the x and y dimensions. |
906 | 375k | void NetworkIO::CopyWithXYTranspose(const NetworkIO &src) { |
907 | 375k | int num_features = src.NumFeatures(); |
908 | 375k | stride_map_ = src.stride_map_; |
909 | 375k | stride_map_.TransposeXY(); |
910 | 375k | ResizeToMap(src.int_mode(), stride_map_, num_features); |
911 | 375k | StrideMap::Index src_b_index(src.stride_map_); |
912 | 375k | StrideMap::Index dest_b_index(stride_map_); |
913 | 375k | do { |
914 | 375k | StrideMap::Index src_y_index(src_b_index); |
915 | 375k | StrideMap::Index dest_x_index(dest_b_index); |
916 | 4.82M | do { |
917 | 4.82M | StrideMap::Index src_x_index(src_y_index); |
918 | 4.82M | StrideMap::Index dest_y_index(dest_x_index); |
919 | 33.4M | do { |
920 | 33.4M | CopyTimeStepFrom(dest_y_index.t(), src, src_x_index.t()); |
921 | 33.4M | } while (src_x_index.AddOffset(1, FD_WIDTH) && dest_y_index.AddOffset(1, FD_HEIGHT)); |
922 | 4.82M | } while (src_y_index.AddOffset(1, FD_HEIGHT) && dest_x_index.AddOffset(1, FD_WIDTH)); |
923 | 375k | } while (src_b_index.AddOffset(1, FD_BATCH) && dest_b_index.AddOffset(1, FD_BATCH)); |
924 | 375k | } |
925 | | |
926 | | // Copies src to *this, at the given feature_offset, returning the total |
927 | | // feature offset after the copy. Multiple calls will stack outputs from |
928 | | // multiple sources in feature space. |
929 | 0 | int NetworkIO::CopyPacking(const NetworkIO &src, int feature_offset) { |
930 | 0 | ASSERT_HOST(int_mode_ == src.int_mode_); |
931 | 0 | int width = src.Width(); |
932 | 0 | ASSERT_HOST(width <= Width()); |
933 | 0 | int num_features = src.NumFeatures(); |
934 | 0 | ASSERT_HOST(num_features + feature_offset <= NumFeatures()); |
935 | 0 | if (int_mode_) { |
936 | 0 | for (int t = 0; t < width; ++t) { |
937 | 0 | memcpy(i_[t] + feature_offset, src.i_[t], num_features * sizeof(i_[t][0])); |
938 | 0 | } |
939 | 0 | for (int t = width; t < i_.dim1(); ++t) { |
940 | 0 | memset(i_[t], 0, num_features * sizeof(i_[t][0])); |
941 | 0 | } |
942 | 0 | } else { |
943 | 0 | for (int t = 0; t < width; ++t) { |
944 | 0 | memcpy(f_[t] + feature_offset, src.f_[t], num_features * sizeof(f_[t][0])); |
945 | 0 | } |
946 | 0 | for (int t = width; t < f_.dim1(); ++t) { |
947 | 0 | memset(f_[t], 0, num_features * sizeof(f_[t][0])); |
948 | 0 | } |
949 | 0 | } |
950 | 0 | return num_features + feature_offset; |
951 | 0 | } |
952 | | |
953 | | // Opposite of CopyPacking, fills *this with a part of src, starting at |
954 | | // feature_offset, and picking num_features. |
955 | 0 | void NetworkIO::CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features) { |
956 | 0 | Resize(src, num_features); |
957 | 0 | int width = src.Width(); |
958 | 0 | ASSERT_HOST(num_features + feature_offset <= src.NumFeatures()); |
959 | 0 | if (int_mode_) { |
960 | 0 | for (int t = 0; t < width; ++t) { |
961 | 0 | memcpy(i_[t], src.i_[t] + feature_offset, num_features * sizeof(i_[t][0])); |
962 | 0 | } |
963 | 0 | } else { |
964 | 0 | for (int t = 0; t < width; ++t) { |
965 | 0 | memcpy(f_[t], src.f_[t] + feature_offset, num_features * sizeof(f_[t][0])); |
966 | 0 | } |
967 | 0 | } |
968 | 0 | } |
969 | | |
970 | | // Transposes the float part of *this into dest. |
971 | 0 | void NetworkIO::Transpose(TransposedArray *dest) const { |
972 | 0 | int width = Width(); |
973 | 0 | dest->ResizeNoInit(NumFeatures(), width); |
974 | 0 | for (int t = 0; t < width; ++t) { |
975 | 0 | dest->WriteStrided(t, f_[t]); |
976 | 0 | } |
977 | 0 | } |
978 | | |
979 | | // Clips the content of a single time-step to +/-range. |
980 | 0 | void NetworkIO::ClipVector(int t, float range) { |
981 | 0 | ASSERT_HOST(!int_mode_); |
982 | 0 | float *v = f_[t]; |
983 | 0 | int dim = f_.dim2(); |
984 | 0 | for (int i = 0; i < dim; ++i) { |
985 | 0 | v[i] = ClipToRange<float>(v[i], -range, range); |
986 | 0 | } |
987 | 0 | } |
988 | | |
989 | | // Returns the padding required for the given number of features in order |
990 | | // for the SIMD operations to be safe. |
991 | | /* static */ |
992 | 3.00M | int NetworkIO::GetPadding(int num_features) { |
993 | 3.00M | int padding = 0; |
994 | 3.00M | if (IntSimdMatrix::intSimdMatrix) { |
995 | 3.00M | padding = IntSimdMatrix::intSimdMatrix->RoundInputs(num_features) - num_features; |
996 | 3.00M | } |
997 | 3.00M | return padding; |
998 | 3.00M | } |
999 | | |
1000 | | } // namespace tesseract. |