/src/tesseract/src/lstm/networkio.cpp
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | ///////////////////////////////////////////////////////////////////////  | 
2  |  | // File:        networkio.cpp  | 
3  |  | // Description: Network input/output data, allowing float/int implementations.  | 
4  |  | // Author:      Ray Smith  | 
5  |  | //  | 
6  |  | // (C) Copyright 2014, Google Inc.  | 
7  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
8  |  | // you may not use this file except in compliance with the License.  | 
9  |  | // You may obtain a copy of the License at  | 
10  |  | // http://www.apache.org/licenses/LICENSE-2.0  | 
11  |  | // Unless required by applicable law or agreed to in writing, software  | 
12  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
13  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
14  |  | // See the License for the specific language governing permissions and  | 
15  |  | // limitations under the License.  | 
16  |  | ///////////////////////////////////////////////////////////////////////  | 
17  |  |  | 
18  |  | #include "networkio.h"  | 
19  |  | #include <cfloat> // for FLT_MAX  | 
20  |  | #include <cmath>  | 
21  |  |  | 
22  |  | #include <allheaders.h>  | 
23  |  | #include "functions.h"  | 
24  |  | #include "statistc.h"  | 
25  |  | #include "tprintf.h"  | 
26  |  |  | 
27  |  | namespace tesseract { | 
28  |  |  | 
29  |  | // Minimum value to output for certainty.  | 
30  |  | const float kMinCertainty = -20.0f;  | 
31  |  | // Probability corresponding to kMinCertainty.  | 
32  |  | const float kMinProb = std::exp(kMinCertainty);  | 
33  |  |  | 
34  |  | // Resizes to a specific size as a 2-d temp buffer. No batches, no y-dim.  | 
35  | 0  | void NetworkIO::Resize2d(bool int_mode, int width, int num_features) { | 
36  | 0  |   stride_map_ = StrideMap();  | 
37  | 0  |   int_mode_ = int_mode;  | 
38  | 0  |   if (int_mode_) { | 
39  | 0  |     i_.ResizeNoInit(width, num_features, GetPadding(num_features));  | 
40  | 0  |   } else { | 
41  | 0  |     f_.ResizeNoInit(width, num_features);  | 
42  | 0  |   }  | 
43  | 0  | }  | 
44  |  |  | 
45  |  | // Resizes to a specific stride_map.  | 
46  | 3.19M  | void NetworkIO::ResizeToMap(bool int_mode, const StrideMap &stride_map, int num_features) { | 
47  |  |   // If this method crashes with this == nullptr,  | 
48  |  |   // it most likely got here through an uninitialized scratch element,  | 
49  |  |   // ie call NetworkScratch::IO::Resizexxx() not NetworkIO::Resizexxx()!!  | 
50  | 3.19M  |   stride_map_ = stride_map;  | 
51  | 3.19M  |   int_mode_ = int_mode;  | 
52  | 3.19M  |   if (int_mode_) { | 
53  | 3.00M  |     i_.ResizeNoInit(stride_map.Width(), num_features, GetPadding(num_features));  | 
54  | 3.00M  |   } else { | 
55  | 187k  |     f_.ResizeNoInit(stride_map.Width(), num_features);  | 
56  | 187k  |   }  | 
57  | 3.19M  |   ZeroInvalidElements();  | 
58  | 3.19M  | }  | 
59  |  |  | 
60  |  | // Shrinks image size by x_scale,y_scale, and use given number of features.  | 
61  | 187k  | void NetworkIO::ResizeScaled(const NetworkIO &src, int x_scale, int y_scale, int num_features) { | 
62  | 187k  |   StrideMap stride_map = src.stride_map_;  | 
63  | 187k  |   stride_map.ScaleXY(x_scale, y_scale);  | 
64  | 187k  |   ResizeToMap(src.int_mode_, stride_map, num_features);  | 
65  | 187k  | }  | 
66  |  |  | 
67  |  | // Resizes to just 1 x-coord, whatever the input.  | 
68  | 187k  | void NetworkIO::ResizeXTo1(const NetworkIO &src, int num_features) { | 
69  | 187k  |   StrideMap stride_map = src.stride_map_;  | 
70  | 187k  |   stride_map.ReduceWidthTo1();  | 
71  | 187k  |   ResizeToMap(src.int_mode_, stride_map, num_features);  | 
72  | 187k  | }  | 
73  |  |  | 
74  |  | // Initialize all the array to zero.  | 
75  | 0  | void NetworkIO::Zero() { | 
76  | 0  |   int width = Width();  | 
77  |  |   // Zero out the everything. Column-by-column in case it is aligned.  | 
78  | 0  |   for (int t = 0; t < width; ++t) { | 
79  | 0  |     ZeroTimeStep(t);  | 
80  | 0  |   }  | 
81  | 0  | }  | 
82  |  |  | 
83  |  | // Initializes to zero all elements of the array that do not correspond to  | 
84  |  | // valid image positions. (If a batch of different-sized images are packed  | 
85  |  | // together, then there will be padding pixels.)  | 
86  | 3.56M  | void NetworkIO::ZeroInvalidElements() { | 
87  | 3.56M  |   int num_features = NumFeatures();  | 
88  | 3.56M  |   int full_width = stride_map_.Size(FD_WIDTH);  | 
89  | 3.56M  |   int full_height = stride_map_.Size(FD_HEIGHT);  | 
90  | 3.56M  |   StrideMap::Index b_index(stride_map_);  | 
91  | 3.56M  |   do { | 
92  | 3.56M  |     int end_x = b_index.MaxIndexOfDim(FD_WIDTH) + 1;  | 
93  | 3.56M  |     if (end_x < full_width) { | 
94  |  |       // The width is small, so fill for every valid y.  | 
95  | 0  |       StrideMap::Index y_index(b_index);  | 
96  | 0  |       int fill_size = num_features * (full_width - end_x);  | 
97  | 0  |       do { | 
98  | 0  |         StrideMap::Index z_index(y_index);  | 
99  | 0  |         z_index.AddOffset(end_x, FD_WIDTH);  | 
100  | 0  |         if (int_mode_) { | 
101  | 0  |           ZeroVector(fill_size, i_[z_index.t()]);  | 
102  | 0  |         } else { | 
103  | 0  |           ZeroVector(fill_size, f_[z_index.t()]);  | 
104  | 0  |         }  | 
105  | 0  |       } while (y_index.AddOffset(1, FD_HEIGHT));  | 
106  | 0  |     }  | 
107  | 3.56M  |     int end_y = b_index.MaxIndexOfDim(FD_HEIGHT) + 1;  | 
108  | 3.56M  |     if (end_y < full_height) { | 
109  |  |       // The height is small, so fill in the space in one go.  | 
110  | 0  |       StrideMap::Index y_index(b_index);  | 
111  | 0  |       y_index.AddOffset(end_y, FD_HEIGHT);  | 
112  | 0  |       int fill_size = num_features * full_width * (full_height - end_y);  | 
113  | 0  |       if (int_mode_) { | 
114  | 0  |         ZeroVector(fill_size, i_[y_index.t()]);  | 
115  | 0  |       } else { | 
116  | 0  |         ZeroVector(fill_size, f_[y_index.t()]);  | 
117  | 0  |       }  | 
118  | 0  |     }  | 
119  | 3.56M  |   } while (b_index.AddOffset(1, FD_BATCH));  | 
120  | 3.56M  | }  | 
121  |  |  | 
122  |  | // Helper computes a black point and white point to contrast-enhance an image.  | 
123  |  | // The computation is based on the assumption that the image is of a single line  | 
124  |  | // of text, so a horizontal line through the middle of the image passes through  | 
125  |  | // at least some of it, so local minima and maxima are a good proxy for black  | 
126  |  | // and white pixel samples.  | 
127  | 187k  | static void ComputeBlackWhite(Image pix, float *black, float *white) { | 
128  | 187k  |   int width = pixGetWidth(pix);  | 
129  | 187k  |   int height = pixGetHeight(pix);  | 
130  | 187k  |   STATS mins(0, 255), maxes(0, 255);  | 
131  | 187k  |   if (width >= 3) { | 
132  | 187k  |     int y = height / 2;  | 
133  | 187k  |     l_uint32 *line = pixGetData(pix) + pixGetWpl(pix) * y;  | 
134  | 187k  |     int prev = GET_DATA_BYTE(line, 0);  | 
135  | 187k  |     int curr = GET_DATA_BYTE(line, 1);  | 
136  | 7.71M  |     for (int x = 1; x + 1 < width; ++x) { | 
137  | 7.53M  |       int next = GET_DATA_BYTE(line, x + 1);  | 
138  | 7.53M  |       if ((curr < prev && curr <= next) || (curr <= prev && curr < next)) { | 
139  |  |         // Local minimum.  | 
140  | 1.46M  |         mins.add(curr, 1);  | 
141  | 1.46M  |       }  | 
142  | 7.53M  |       if ((curr > prev && curr >= next) || (curr >= prev && curr > next)) { | 
143  |  |         // Local maximum.  | 
144  | 1.46M  |         maxes.add(curr, 1);  | 
145  | 1.46M  |       }  | 
146  | 7.53M  |       prev = curr;  | 
147  | 7.53M  |       curr = next;  | 
148  | 7.53M  |     }  | 
149  | 187k  |   }  | 
150  | 187k  |   if (mins.get_total() == 0) { | 
151  | 15.1k  |     mins.add(0, 1);  | 
152  | 15.1k  |   }  | 
153  | 187k  |   if (maxes.get_total() == 0) { | 
154  | 14.7k  |     maxes.add(255, 1);  | 
155  | 14.7k  |   }  | 
156  | 187k  |   *black = mins.ile(0.25);  | 
157  | 187k  |   *white = maxes.ile(0.75);  | 
158  | 187k  | }  | 
159  |  |  | 
160  |  | // Sets up the array from the given image, using the currently set int_mode_.  | 
161  |  | // If the image width doesn't match the shape, the image is truncated or padded  | 
162  |  | // with noise to match.  | 
163  | 187k  | void NetworkIO::FromPix(const StaticShape &shape, const Image pix, TRand *randomizer) { | 
164  | 187k  |   std::vector<Image> pixes(1, pix);  | 
165  | 187k  |   FromPixes(shape, pixes, randomizer);  | 
166  | 187k  | }  | 
167  |  |  | 
168  |  | // Sets up the array from the given set of images, using the currently set  | 
169  |  | // int_mode_. If the image width doesn't match the shape, the images are  | 
170  |  | // truncated or padded with noise to match.  | 
171  |  | void NetworkIO::FromPixes(const StaticShape &shape, const std::vector<Image> &pixes,  | 
172  | 187k  |                           TRand *randomizer) { | 
173  | 187k  |   int target_height = shape.height();  | 
174  | 187k  |   int target_width = shape.width();  | 
175  | 187k  |   std::vector<std::pair<int, int>> h_w_pairs;  | 
176  | 187k  |   for (auto &&pix : pixes) { | 
177  | 187k  |     Image var_pix = pix;  | 
178  | 187k  |     int width = pixGetWidth(var_pix);  | 
179  | 187k  |     if (target_width != 0) { | 
180  | 0  |       width = target_width;  | 
181  | 0  |     }  | 
182  | 187k  |     int height = pixGetHeight(var_pix);  | 
183  | 187k  |     if (target_height != 0) { | 
184  | 187k  |       height = target_height;  | 
185  | 187k  |     }  | 
186  | 187k  |     h_w_pairs.emplace_back(height, width);  | 
187  | 187k  |   }  | 
188  | 187k  |   stride_map_.SetStride(h_w_pairs);  | 
189  | 187k  |   ResizeToMap(int_mode(), stride_map_, shape.depth());  | 
190  |  |   // Iterate over the images again to copy the data.  | 
191  | 375k  |   for (size_t b = 0; b < pixes.size(); ++b) { | 
192  | 187k  |     Image pix = pixes[b];  | 
193  | 187k  |     float black = 0.0f, white = 255.0f;  | 
194  | 187k  |     if (shape.depth() != 3) { | 
195  | 187k  |       ComputeBlackWhite(pix, &black, &white);  | 
196  | 187k  |     }  | 
197  | 187k  |     float contrast = (white - black) / 2.0f;  | 
198  | 187k  |     if (contrast <= 0.0f) { | 
199  | 17  |       contrast = 1.0f;  | 
200  | 17  |     }  | 
201  | 187k  |     if (shape.height() == 1) { | 
202  | 0  |       Copy1DGreyImage(b, pix, black, contrast, randomizer);  | 
203  | 187k  |     } else { | 
204  | 187k  |       Copy2DImage(b, pix, black, contrast, randomizer);  | 
205  | 187k  |     }  | 
206  | 187k  |   }  | 
207  | 187k  | }  | 
208  |  |  | 
209  |  | // Copies the given pix to *this at the given batch index, stretching and  | 
210  |  | // clipping the pixel values so that [black, black + 2*contrast] maps to the  | 
211  |  | // dynamic range of *this, ie [-1,1] for a float and (-127,127) for int.  | 
212  |  | // This is a 2-d operation in the sense that the output depth is the number  | 
213  |  | // of input channels, the height is the height of the image, and the width  | 
214  |  | // is the width of the image, or truncated/padded with noise if the width  | 
215  |  | // is a fixed size.  | 
216  | 187k  | void NetworkIO::Copy2DImage(int batch, Image pix, float black, float contrast, TRand *randomizer) { | 
217  | 187k  |   int width = pixGetWidth(pix);  | 
218  | 187k  |   int height = pixGetHeight(pix);  | 
219  | 187k  |   int wpl = pixGetWpl(pix);  | 
220  | 187k  |   StrideMap::Index index(stride_map_);  | 
221  | 187k  |   index.AddOffset(batch, FD_BATCH);  | 
222  | 187k  |   int t = index.t();  | 
223  | 187k  |   int target_height = stride_map_.Size(FD_HEIGHT);  | 
224  | 187k  |   int target_width = stride_map_.Size(FD_WIDTH);  | 
225  | 187k  |   int num_features = NumFeatures();  | 
226  | 187k  |   bool color = num_features == 3;  | 
227  | 187k  |   if (width > target_width) { | 
228  | 0  |     width = target_width;  | 
229  | 0  |   }  | 
230  | 187k  |   uint32_t *line = pixGetData(pix);  | 
231  | 6.94M  |   for (int y = 0; y < target_height; ++y, line += wpl) { | 
232  | 6.75M  |     int x = 0;  | 
233  | 6.75M  |     if (y < height) { | 
234  | 291M  |       for (x = 0; x < width; ++x, ++t) { | 
235  | 284M  |         if (color) { | 
236  | 0  |           int f = 0;  | 
237  | 0  |           for (int c = COLOR_RED; c <= COLOR_BLUE; ++c) { | 
238  | 0  |             int pixel = GET_DATA_BYTE(line + x, c);  | 
239  | 0  |             SetPixel(t, f++, pixel, black, contrast);  | 
240  | 0  |           }  | 
241  | 284M  |         } else { | 
242  | 284M  |           int pixel = GET_DATA_BYTE(line, x);  | 
243  | 284M  |           SetPixel(t, 0, pixel, black, contrast);  | 
244  | 284M  |         }  | 
245  | 284M  |       }  | 
246  | 6.75M  |     }  | 
247  | 6.75M  |     for (; x < target_width; ++x) { | 
248  | 0  |       Randomize(t++, 0, num_features, randomizer);  | 
249  | 0  |     }  | 
250  | 6.75M  |   }  | 
251  | 187k  | }  | 
252  |  |  | 
253  |  | // Copies the given pix to *this at the given batch index, as Copy2DImage  | 
254  |  | // above, except that the output depth is the height of the input image, the  | 
255  |  | // output height is 1, and the output width as for Copy2DImage.  | 
256  |  | // The image is thus treated as a 1-d set of vertical pixel strips.  | 
257  |  | void NetworkIO::Copy1DGreyImage(int batch, Image pix, float black, float contrast,  | 
258  | 0  |                                 TRand *randomizer) { | 
259  | 0  |   int width = pixGetWidth(pix);  | 
260  | 0  |   int height = pixGetHeight(pix);  | 
261  | 0  |   ASSERT_HOST(height == NumFeatures());  | 
262  | 0  |   int wpl = pixGetWpl(pix);  | 
263  | 0  |   StrideMap::Index index(stride_map_);  | 
264  | 0  |   index.AddOffset(batch, FD_BATCH);  | 
265  | 0  |   int t = index.t();  | 
266  | 0  |   int target_width = stride_map_.Size(FD_WIDTH);  | 
267  | 0  |   if (width > target_width) { | 
268  | 0  |     width = target_width;  | 
269  | 0  |   }  | 
270  | 0  |   int x;  | 
271  | 0  |   for (x = 0; x < width; ++x, ++t) { | 
272  | 0  |     for (int y = 0; y < height; ++y) { | 
273  | 0  |       uint32_t *line = pixGetData(pix) + wpl * y;  | 
274  | 0  |       int pixel = GET_DATA_BYTE(line, x);  | 
275  | 0  |       SetPixel(t, y, pixel, black, contrast);  | 
276  | 0  |     }  | 
277  | 0  |   }  | 
278  | 0  |   for (; x < target_width; ++x) { | 
279  | 0  |     Randomize(t++, 0, height, randomizer);  | 
280  | 0  |   }  | 
281  | 0  | }  | 
282  |  |  | 
283  |  | // Helper stores the pixel value in i_ or f_ according to int_mode_.  | 
284  |  | // t: is the index from the StrideMap corresponding to the current  | 
285  |  | //   [batch,y,x] position  | 
286  |  | // f: is the index into the depth/channel  | 
287  |  | // pixel: the value of the pixel from the image (in one channel)  | 
288  |  | // black: the pixel value to map to the lowest of the range of *this  | 
289  |  | // contrast: the range of pixel values to stretch to half the range of *this.  | 
290  | 284M  | void NetworkIO::SetPixel(int t, int f, int pixel, float black, float contrast) { | 
291  | 284M  |   float float_pixel = (pixel - black) / contrast - 1.0f;  | 
292  | 284M  |   if (int_mode_) { | 
293  | 284M  |     i_[t][f] = ClipToRange<int>(IntCastRounded((INT8_MAX + 1) * float_pixel), -INT8_MAX, INT8_MAX);  | 
294  | 284M  |   } else { | 
295  | 0  |     f_[t][f] = float_pixel;  | 
296  | 0  |   }  | 
297  | 284M  | }  | 
298  |  |  | 
299  |  | // Converts the array to a Pix. Must be pixDestroyed after use.  | 
300  | 0  | Image NetworkIO::ToPix() const { | 
301  |  |   // Count the width of the image, and find the max multiplication factor.  | 
302  | 0  |   int im_width = stride_map_.Size(FD_WIDTH);  | 
303  | 0  |   int im_height = stride_map_.Size(FD_HEIGHT);  | 
304  | 0  |   int num_features = NumFeatures();  | 
305  | 0  |   int feature_factor = 1;  | 
306  | 0  |   if (num_features == 3) { | 
307  |  |     // Special hack for color.  | 
308  | 0  |     num_features = 1;  | 
309  | 0  |     feature_factor = 3;  | 
310  | 0  |   }  | 
311  | 0  |   Image pix = pixCreate(im_width, im_height * num_features, 32);  | 
312  | 0  |   StrideMap::Index index(stride_map_);  | 
313  | 0  |   do { | 
314  | 0  |     int im_x = index.index(FD_WIDTH);  | 
315  | 0  |     int top_im_y = index.index(FD_HEIGHT);  | 
316  | 0  |     int im_y = top_im_y;  | 
317  | 0  |     int t = index.t();  | 
318  | 0  |     if (int_mode_) { | 
319  | 0  |       const int8_t *features = i_[t];  | 
320  | 0  |       for (int y = 0; y < num_features; ++y, im_y += im_height) { | 
321  | 0  |         int pixel = features[y * feature_factor];  | 
322  |  |         // 1 or 2 features use greyscale.  | 
323  | 0  |         int red = ClipToRange<int>(pixel + 128, 0, 255);  | 
324  | 0  |         int green = red, blue = red;  | 
325  | 0  |         if (feature_factor == 3) { | 
326  |  |           // With 3 features assume RGB color.  | 
327  | 0  |           green = ClipToRange<int>(features[y * feature_factor + 1] + 128, 0, 255);  | 
328  | 0  |           blue = ClipToRange<int>(features[y * feature_factor + 2] + 128, 0, 255);  | 
329  | 0  |         } else if (num_features > 3) { | 
330  |  |           // More than 3 features use false yellow/blue color, assuming a signed  | 
331  |  |           // input in the range [-1,1].  | 
332  | 0  |           red = abs(pixel) * 2;  | 
333  | 0  |           if (pixel >= 0) { | 
334  | 0  |             green = red;  | 
335  | 0  |             blue = 0;  | 
336  | 0  |           } else { | 
337  | 0  |             blue = red;  | 
338  | 0  |             green = red = 0;  | 
339  | 0  |           }  | 
340  | 0  |         }  | 
341  | 0  |         pixSetPixel(pix, im_x, im_y,  | 
342  | 0  |                     (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT));  | 
343  | 0  |       }  | 
344  | 0  |     } else { | 
345  | 0  |       const float *features = f_[t];  | 
346  | 0  |       for (int y = 0; y < num_features; ++y, im_y += im_height) { | 
347  | 0  |         float pixel = features[y * feature_factor];  | 
348  |  |         // 1 or 2 features use greyscale.  | 
349  | 0  |         int red = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);  | 
350  | 0  |         int green = red, blue = red;  | 
351  | 0  |         if (feature_factor == 3) { | 
352  |  |           // With 3 features assume RGB color.  | 
353  | 0  |           pixel = features[y * feature_factor + 1];  | 
354  | 0  |           green = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);  | 
355  | 0  |           pixel = features[y * feature_factor + 2];  | 
356  | 0  |           blue = ClipToRange<int>(IntCastRounded((pixel + 1.0f) * 127.5f), 0, 255);  | 
357  | 0  |         } else if (num_features > 3) { | 
358  |  |           // More than 3 features use false yellow/blue color, assuming a signed  | 
359  |  |           // input in the range [-1,1].  | 
360  | 0  |           red = ClipToRange<int>(IntCastRounded(std::fabs(pixel) * 255), 0, 255);  | 
361  | 0  |           if (pixel >= 0) { | 
362  | 0  |             green = red;  | 
363  | 0  |             blue = 0;  | 
364  | 0  |           } else { | 
365  | 0  |             blue = red;  | 
366  | 0  |             green = red = 0;  | 
367  | 0  |           }  | 
368  | 0  |         }  | 
369  | 0  |         pixSetPixel(pix, im_x, im_y,  | 
370  | 0  |                     (red << L_RED_SHIFT) | (green << L_GREEN_SHIFT) | (blue << L_BLUE_SHIFT));  | 
371  | 0  |       }  | 
372  | 0  |     }  | 
373  | 0  |   } while (index.Increment());  | 
374  | 0  |   return pix;  | 
375  | 0  | }  | 
376  |  |  | 
377  |  | // Prints the first and last num timesteps of the array for each feature.  | 
378  | 0  | void NetworkIO::Print(int num) const { | 
379  | 0  |   int num_features = NumFeatures();  | 
380  | 0  |   for (int y = 0; y < num_features; ++y) { | 
381  | 0  |     for (int t = 0; t < Width(); ++t) { | 
382  | 0  |       if (num == 0 || t < num || t + num >= Width()) { | 
383  | 0  |         if (int_mode_) { | 
384  | 0  |           tprintf(" %g", static_cast<float>(i_[t][y]) / INT8_MAX); | 
385  | 0  |         } else { | 
386  | 0  |           tprintf(" %g", f_[t][y]); | 
387  | 0  |         }  | 
388  | 0  |       }  | 
389  | 0  |     }  | 
390  | 0  |     tprintf("\n"); | 
391  | 0  |   }  | 
392  | 0  | }  | 
393  |  |  | 
394  |  | // Copies a single time step from src.  | 
395  | 69.5M  | void NetworkIO::CopyTimeStepFrom(int dest_t, const NetworkIO &src, int src_t) { | 
396  | 69.5M  |   ASSERT_HOST(int_mode_ == src.int_mode_);  | 
397  | 69.5M  |   if (int_mode_) { | 
398  | 69.5M  |     memcpy(i_[dest_t], src.i_[src_t], i_.dim2() * sizeof(i_[0][0]));  | 
399  | 69.5M  |   } else { | 
400  | 0  |     memcpy(f_[dest_t], src.f_[src_t], f_.dim2() * sizeof(f_[0][0]));  | 
401  | 0  |   }  | 
402  | 69.5M  | }  | 
403  |  |  | 
404  |  | // Copies a part of single time step from src.  | 
405  |  | void NetworkIO::CopyTimeStepGeneral(int dest_t, int dest_offset, int num_features,  | 
406  | 2.51G  |                                     const NetworkIO &src, int src_t, int src_offset) { | 
407  | 2.51G  |   ASSERT_HOST(int_mode_ == src.int_mode_);  | 
408  | 2.51G  |   if (int_mode_) { | 
409  | 2.51G  |     memcpy(i_[dest_t] + dest_offset, src.i_[src_t] + src_offset, num_features * sizeof(i_[0][0]));  | 
410  | 2.51G  |   } else { | 
411  | 0  |     memcpy(f_[dest_t] + dest_offset, src.f_[src_t] + src_offset, num_features * sizeof(f_[0][0]));  | 
412  | 0  |   }  | 
413  | 2.51G  | }  | 
414  |  |  | 
415  |  | // Sets the given range to random values.  | 
416  | 60.2M  | void NetworkIO::Randomize(int t, int offset, int num_features, TRand *randomizer) { | 
417  | 60.2M  |   if (int_mode_) { | 
418  | 60.2M  |     int8_t *line = i_[t] + offset;  | 
419  | 147M  |     for (int i = 0; i < num_features; ++i) { | 
420  | 87.2M  |       line[i] = IntCastRounded(randomizer->SignedRand(INT8_MAX));  | 
421  | 87.2M  |     }  | 
422  | 60.2M  |   } else { | 
423  |  |     // float mode.  | 
424  | 0  |     float *line = f_[t] + offset;  | 
425  | 0  |     for (int i = 0; i < num_features; ++i) { | 
426  | 0  |       line[i] = randomizer->SignedRand(1.0);  | 
427  | 0  |     }  | 
428  | 0  |   }  | 
429  | 60.2M  | }  | 
430  |  |  | 
431  |  | // Helper returns the label and score of the best choice over a range.  | 
432  |  | int NetworkIO::BestChoiceOverRange(int t_start, int t_end, int not_this, int null_ch, float *rating,  | 
433  | 0  |                                    float *certainty) const { | 
434  | 0  |   if (t_end <= t_start) { | 
435  | 0  |     return -1;  | 
436  | 0  |   }  | 
437  | 0  |   int max_char = -1;  | 
438  | 0  |   float min_score = 0.0f;  | 
439  | 0  |   for (int c = 0; c < NumFeatures(); ++c) { | 
440  | 0  |     if (c == not_this || c == null_ch) { | 
441  | 0  |       continue;  | 
442  | 0  |     }  | 
443  | 0  |     ScoresOverRange(t_start, t_end, c, null_ch, rating, certainty);  | 
444  | 0  |     if (max_char < 0 || *rating < min_score) { | 
445  | 0  |       min_score = *rating;  | 
446  | 0  |       max_char = c;  | 
447  | 0  |     }  | 
448  | 0  |   }  | 
449  | 0  |   ScoresOverRange(t_start, t_end, max_char, null_ch, rating, certainty);  | 
450  | 0  |   return max_char;  | 
451  | 0  | }  | 
452  |  |  | 
453  |  | // Helper returns the rating and certainty of the choice over a range in output.  | 
454  |  | void NetworkIO::ScoresOverRange(int t_start, int t_end, int choice, int null_ch, float *rating,  | 
455  | 0  |                                 float *certainty) const { | 
456  | 0  |   ASSERT_HOST(!int_mode_);  | 
457  | 0  |   *rating = 0.0f;  | 
458  | 0  |   *certainty = 0.0f;  | 
459  | 0  |   if (t_end <= t_start || t_end <= 0) { | 
460  | 0  |     return;  | 
461  | 0  |   }  | 
462  | 0  |   float ratings[3] = {0.0f, 0.0f, 0.0f}; | 
463  | 0  |   float certs[3] = {0.0f, 0.0f, 0.0f}; | 
464  | 0  |   for (int t = t_start; t < t_end; ++t) { | 
465  | 0  |     const float *line = f_[t];  | 
466  | 0  |     float score = ProbToCertainty(line[choice]);  | 
467  | 0  |     float zero = ProbToCertainty(line[null_ch]);  | 
468  | 0  |     if (t == t_start) { | 
469  | 0  |       ratings[2] = FLT_MAX;  | 
470  | 0  |       ratings[1] = -score;  | 
471  | 0  |       certs[1] = score;  | 
472  | 0  |     } else { | 
473  | 0  |       for (int i = 2; i >= 1; --i) { | 
474  | 0  |         if (ratings[i] > ratings[i - 1]) { | 
475  | 0  |           ratings[i] = ratings[i - 1];  | 
476  | 0  |           certs[i] = certs[i - 1];  | 
477  | 0  |         }  | 
478  | 0  |       }  | 
479  | 0  |       ratings[2] -= zero;  | 
480  | 0  |       if (zero < certs[2]) { | 
481  | 0  |         certs[2] = zero;  | 
482  | 0  |       }  | 
483  | 0  |       ratings[1] -= score;  | 
484  | 0  |       if (score < certs[1]) { | 
485  | 0  |         certs[1] = score;  | 
486  | 0  |       }  | 
487  | 0  |     }  | 
488  | 0  |     ratings[0] -= zero;  | 
489  | 0  |     if (zero < certs[0]) { | 
490  | 0  |       certs[0] = zero;  | 
491  | 0  |     }  | 
492  | 0  |   }  | 
493  | 0  |   int best_i = ratings[2] < ratings[1] ? 2 : 1;  | 
494  | 0  |   *rating = ratings[best_i] + t_end - t_start;  | 
495  | 0  |   *certainty = certs[best_i];  | 
496  | 0  | }  | 
497  |  |  | 
498  |  | // Returns the index (label) of the best value at the given timestep,  | 
499  |  | // excluding not_this and not_that, and if not null, sets the score to the  | 
500  |  | // log of the corresponding value.  | 
501  | 2.57M  | int NetworkIO::BestLabel(int t, int not_this, int not_that, float *score) const { | 
502  | 2.57M  |   ASSERT_HOST(!int_mode_);  | 
503  | 2.57M  |   int best_index = -1;  | 
504  | 2.57M  |   float best_score = -FLT_MAX;  | 
505  | 2.57M  |   const float *line = f_[t];  | 
506  | 288M  |   for (int i = 0; i < f_.dim2(); ++i) { | 
507  | 285M  |     if (line[i] > best_score && i != not_this && i != not_that) { | 
508  | 12.0M  |       best_score = line[i];  | 
509  | 12.0M  |       best_index = i;  | 
510  | 12.0M  |     }  | 
511  | 285M  |   }  | 
512  | 2.57M  |   if (score != nullptr) { | 
513  | 0  |     *score = ProbToCertainty(best_score);  | 
514  | 0  |   }  | 
515  | 2.57M  |   return best_index;  | 
516  | 2.57M  | }  | 
517  |  |  | 
518  |  | // Returns the best start position out of [start, end) (into which all labels  | 
519  |  | // must fit) to obtain the highest cumulative score for the given labels.  | 
520  | 0  | int NetworkIO::PositionOfBestMatch(const std::vector<int> &labels, int start, int end) const { | 
521  | 0  |   int length = labels.size();  | 
522  | 0  |   int last_start = end - length;  | 
523  | 0  |   int best_start = -1;  | 
524  | 0  |   TFloat best_score = 0;  | 
525  | 0  |   for (int s = start; s <= last_start; ++s) { | 
526  | 0  |     TFloat score = ScoreOfLabels(labels, s);  | 
527  | 0  |     if (score > best_score || best_start < 0) { | 
528  | 0  |       best_score = score;  | 
529  | 0  |       best_start = s;  | 
530  | 0  |     }  | 
531  | 0  |   }  | 
532  | 0  |   return best_start;  | 
533  | 0  | }  | 
534  |  |  | 
535  |  | // Returns the cumulative score of the given labels starting at start, and  | 
536  |  | // using one label per time-step.  | 
537  | 0  | TFloat NetworkIO::ScoreOfLabels(const std::vector<int> &labels, int start) const { | 
538  | 0  |   int length = labels.size();  | 
539  | 0  |   TFloat score = 0;  | 
540  | 0  |   for (int i = 0; i < length; ++i) { | 
541  | 0  |     score += f_(start + i, labels[i]);  | 
542  | 0  |   }  | 
543  | 0  |   return score;  | 
544  | 0  | }  | 
545  |  |  | 
546  |  | // Helper function sets all the outputs for a single timestep, such that  | 
547  |  | // label has value ok_score, and the other labels share 1 - ok_score.  | 
548  | 0  | void NetworkIO::SetActivations(int t, int label, float ok_score) { | 
549  | 0  |   ASSERT_HOST(!int_mode_);  | 
550  | 0  |   int num_classes = NumFeatures();  | 
551  | 0  |   float bad_score = (1.0f - ok_score) / (num_classes - 1);  | 
552  | 0  |   float *targets = f_[t];  | 
553  | 0  |   for (int i = 0; i < num_classes; ++i) { | 
554  | 0  |     targets[i] = bad_score;  | 
555  | 0  |   }  | 
556  | 0  |   targets[label] = ok_score;  | 
557  | 0  | }  | 
558  |  |  | 
559  |  | // Modifies the values, only if needed, so that the given label is  | 
560  |  | // the winner at the given time step t.  | 
561  | 0  | void NetworkIO::EnsureBestLabel(int t, int label) { | 
562  | 0  |   ASSERT_HOST(!int_mode_);  | 
563  | 0  |   if (BestLabel(t, nullptr) != label) { | 
564  |  |     // Output value needs enhancing. Third all the other elements and add the  | 
565  |  |     // remainder to best_label.  | 
566  | 0  |     int num_classes = NumFeatures();  | 
567  | 0  |     float *targets = f_[t];  | 
568  | 0  |     for (int c = 0; c < num_classes; ++c) { | 
569  | 0  |       if (c == label) { | 
570  | 0  |         targets[c] += (1.0 - targets[c]) * (2 / 3.0);  | 
571  | 0  |       } else { | 
572  | 0  |         targets[c] /= 3.0;  | 
573  | 0  |       }  | 
574  | 0  |     }  | 
575  | 0  |   }  | 
576  | 0  | }  | 
577  |  |  | 
578  |  | // Helper function converts prob to certainty taking the minimum into account.  | 
579  |  | /* static */  | 
580  | 36.3M  | float NetworkIO::ProbToCertainty(float prob) { | 
581  | 36.3M  |   return prob > kMinProb ? std::log(prob) : kMinCertainty;  | 
582  | 36.3M  | }  | 
583  |  |  | 
584  |  | // Returns true if there is any bad value that is suspiciously like a GT  | 
585  |  | // error. Assuming that *this is the difference(gradient) between target  | 
586  |  | // and forward output, returns true if there is a large negative value  | 
587  |  | // (correcting a very confident output) for which there is no corresponding  | 
588  |  | // positive value in an adjacent timestep for the same feature index. This  | 
589  |  | // allows the box-truthed samples to make fine adjustments to position while  | 
590  |  | // stopping other disagreements of confident output with ground truth.  | 
591  | 0  | bool NetworkIO::AnySuspiciousTruth(float confidence_thr) const { | 
592  | 0  |   int num_features = NumFeatures();  | 
593  | 0  |   for (int t = 0; t < Width(); ++t) { | 
594  | 0  |     const float *features = f_[t];  | 
595  | 0  |     for (int y = 0; y < num_features; ++y) { | 
596  | 0  |       float grad = features[y];  | 
597  | 0  |       if (grad < -confidence_thr) { | 
598  |  |         // Correcting strong output. Check for movement.  | 
599  | 0  |         if ((t == 0 || f_[t - 1][y] < confidence_thr / 2) &&  | 
600  | 0  |             (t + 1 == Width() || f_[t + 1][y] < confidence_thr / 2)) { | 
601  | 0  |           return true; // No strong positive on either side.  | 
602  | 0  |         }  | 
603  | 0  |       }  | 
604  | 0  |     }  | 
605  | 0  |   }  | 
606  | 0  |   return false;  | 
607  | 0  | }  | 
608  |  |  | 
609  |  | // Reads a single timestep to floats in the range [-1, 1].  | 
610  | 0  | void NetworkIO::ReadTimeStep(int t, TFloat *output) const { | 
611  | 0  |   if (int_mode_) { | 
612  | 0  |     const int8_t *line = i_[t];  | 
613  | 0  |     for (int i = 0; i < i_.dim2(); ++i) { | 
614  | 0  |       output[i] = static_cast<TFloat>(line[i]) / INT8_MAX;  | 
615  | 0  |     }  | 
616  | 0  |   } else { | 
617  | 0  |     const float *line = f_[t];  | 
618  | 0  |     for (int i = 0; i < f_.dim2(); ++i) { | 
619  | 0  |       output[i] = static_cast<TFloat>(line[i]);  | 
620  | 0  |     }  | 
621  | 0  |   }  | 
622  | 0  | }  | 
623  |  |  | 
624  |  | // Adds a single timestep to floats.  | 
625  | 0  | void NetworkIO::AddTimeStep(int t, TFloat *inout) const { | 
626  | 0  |   int num_features = NumFeatures();  | 
627  | 0  |   if (int_mode_) { | 
628  | 0  |     const int8_t *line = i_[t];  | 
629  | 0  |     for (int i = 0; i < num_features; ++i) { | 
630  | 0  |       inout[i] += static_cast<TFloat>(line[i]) / INT8_MAX;  | 
631  | 0  |     }  | 
632  | 0  |   } else { | 
633  | 0  |     const float *line = f_[t];  | 
634  | 0  |     for (int i = 0; i < num_features; ++i) { | 
635  | 0  |       inout[i] += line[i];  | 
636  | 0  |     }  | 
637  | 0  |   }  | 
638  | 0  | }  | 
639  |  |  | 
640  |  | // Adds part of a single timestep to floats.  | 
641  | 0  | void NetworkIO::AddTimeStepPart(int t, int offset, int num_features, float *inout) const { | 
642  | 0  |   if (int_mode_) { | 
643  | 0  |     const int8_t *line = i_[t] + offset;  | 
644  | 0  |     for (int i = 0; i < num_features; ++i) { | 
645  | 0  |       inout[i] += static_cast<float>(line[i]) / INT8_MAX;  | 
646  | 0  |     }  | 
647  | 0  |   } else { | 
648  | 0  |     const float *line = f_[t] + offset;  | 
649  | 0  |     for (int i = 0; i < num_features; ++i) { | 
650  | 0  |       inout[i] += line[i];  | 
651  | 0  |     }  | 
652  | 0  |   }  | 
653  | 0  | }  | 
654  |  |  | 
655  |  | // Writes a single timestep from floats in the range [-1, 1].  | 
656  | 297M  | void NetworkIO::WriteTimeStep(int t, const TFloat *input) { | 
657  | 297M  |   WriteTimeStepPart(t, 0, NumFeatures(), input);  | 
658  | 297M  | }  | 
659  |  |  | 
660  |  | // Writes a single timestep from floats in the range [-1, 1] writing only  | 
661  |  | // num_features elements of input to (*this)[t], starting at offset.  | 
662  | 336M  | void NetworkIO::WriteTimeStepPart(int t, int offset, int num_features, const TFloat *input) { | 
663  | 336M  |   if (int_mode_) { | 
664  | 333M  |     int8_t *line = i_[t] + offset;  | 
665  | 10.6G  |     for (int i = 0; i < num_features; ++i) { | 
666  | 10.3G  |       line[i] = ClipToRange<int>(IntCastRounded(input[i] * INT8_MAX), -INT8_MAX, INT8_MAX);  | 
667  | 10.3G  |     }  | 
668  | 333M  |   } else { | 
669  | 2.57M  |     float *line = f_[t] + offset;  | 
670  | 288M  |     for (int i = 0; i < num_features; ++i) { | 
671  | 285M  |       line[i] = static_cast<float>(input[i]);  | 
672  | 285M  |     }  | 
673  | 2.57M  |   }  | 
674  | 336M  | }  | 
675  |  |  | 
676  |  | // Maxpools a single time step from src.  | 
677  | 278M  | void NetworkIO::MaxpoolTimeStep(int dest_t, const NetworkIO &src, int src_t, int *max_line) { | 
678  | 278M  |   ASSERT_HOST(int_mode_ == src.int_mode_);  | 
679  | 278M  |   if (int_mode_) { | 
680  | 278M  |     int dim = i_.dim2();  | 
681  | 278M  |     int8_t *dest_line = i_[dest_t];  | 
682  | 278M  |     const int8_t *src_line = src.i_[src_t];  | 
683  | 4.72G  |     for (int i = 0; i < dim; ++i) { | 
684  | 4.45G  |       if (dest_line[i] < src_line[i]) { | 
685  | 811M  |         dest_line[i] = src_line[i];  | 
686  | 811M  |         max_line[i] = src_t;  | 
687  | 811M  |       }  | 
688  | 4.45G  |     }  | 
689  | 278M  |   } else { | 
690  | 0  |     int dim = f_.dim2();  | 
691  | 0  |     float *dest_line = f_[dest_t];  | 
692  | 0  |     const float *src_line = src.f_[src_t];  | 
693  | 0  |     for (int i = 0; i < dim; ++i) { | 
694  | 0  |       if (dest_line[i] < src_line[i]) { | 
695  | 0  |         dest_line[i] = src_line[i];  | 
696  | 0  |         max_line[i] = src_t;  | 
697  | 0  |       }  | 
698  | 0  |     }  | 
699  | 0  |   }  | 
700  | 278M  | }  | 
701  |  |  | 
702  |  | // Runs maxpool backward, using maxes to index timesteps in *this.  | 
703  | 0  | void NetworkIO::MaxpoolBackward(const NetworkIO &fwd, const GENERIC_2D_ARRAY<int> &maxes) { | 
704  | 0  |   ASSERT_HOST(!int_mode_);  | 
705  | 0  |   Zero();  | 
706  | 0  |   StrideMap::Index index(fwd.stride_map_);  | 
707  | 0  |   do { | 
708  | 0  |     int t = index.t();  | 
709  | 0  |     const int *max_line = maxes[t];  | 
710  | 0  |     const float *fwd_line = fwd.f_[t];  | 
711  | 0  |     int num_features = fwd.f_.dim2();  | 
712  | 0  |     for (int i = 0; i < num_features; ++i) { | 
713  | 0  |       f_[max_line[i]][i] = fwd_line[i];  | 
714  | 0  |     }  | 
715  | 0  |   } while (index.Increment());  | 
716  | 0  | }  | 
717  |  |  | 
718  |  | // Returns the min over time of the maxes over features of the outputs.  | 
719  | 0  | float NetworkIO::MinOfMaxes() const { | 
720  | 0  |   float min_max = 0.0f;  | 
721  | 0  |   int width = Width();  | 
722  | 0  |   int num_features = NumFeatures();  | 
723  | 0  |   for (int t = 0; t < width; ++t) { | 
724  | 0  |     float max_value = -FLT_MAX;  | 
725  | 0  |     if (int_mode_) { | 
726  | 0  |       const int8_t *column = i_[t];  | 
727  | 0  |       for (int i = 0; i < num_features; ++i) { | 
728  | 0  |         if (column[i] > max_value) { | 
729  | 0  |           max_value = column[i];  | 
730  | 0  |         }  | 
731  | 0  |       }  | 
732  | 0  |     } else { | 
733  | 0  |       const float *column = f_[t];  | 
734  | 0  |       for (int i = 0; i < num_features; ++i) { | 
735  | 0  |         if (column[i] > max_value) { | 
736  | 0  |           max_value = column[i];  | 
737  | 0  |         }  | 
738  | 0  |       }  | 
739  | 0  |     }  | 
740  | 0  |     if (t == 0 || max_value < min_max) { | 
741  | 0  |       min_max = max_value;  | 
742  | 0  |     }  | 
743  | 0  |   }  | 
744  | 0  |   return min_max;  | 
745  | 0  | }  | 
746  |  |  | 
747  |  | // Computes combined results for a combiner that chooses between an existing  | 
748  |  | // input and itself, with an additional output to indicate the choice.  | 
749  | 0  | void NetworkIO::CombineOutputs(const NetworkIO &base_output, const NetworkIO &combiner_output) { | 
750  | 0  |   int no = base_output.NumFeatures();  | 
751  | 0  |   ASSERT_HOST(combiner_output.NumFeatures() == no + 1);  | 
752  | 0  |   Resize(base_output, no);  | 
753  | 0  |   int width = Width();  | 
754  | 0  |   if (int_mode_) { | 
755  |  |     // Number of outputs from base and final result.  | 
756  | 0  |     for (int t = 0; t < width; ++t) { | 
757  | 0  |       int8_t *out_line = i_[t];  | 
758  | 0  |       const int8_t *base_line = base_output.i_[t];  | 
759  | 0  |       const int8_t *comb_line = combiner_output.i_[t];  | 
760  | 0  |       float base_weight = static_cast<float>(comb_line[no]) / INT8_MAX;  | 
761  | 0  |       float boost_weight = 1.0f - base_weight;  | 
762  | 0  |       for (int i = 0; i < no; ++i) { | 
763  | 0  |         out_line[i] = IntCastRounded(base_line[i] * base_weight + comb_line[i] * boost_weight);  | 
764  | 0  |       }  | 
765  | 0  |     }  | 
766  | 0  |   } else { | 
767  | 0  |     for (int t = 0; t < width; ++t) { | 
768  | 0  |       float *out_line = f_[t];  | 
769  | 0  |       const float *base_line = base_output.f_[t];  | 
770  | 0  |       const float *comb_line = combiner_output.f_[t];  | 
771  | 0  |       float base_weight = comb_line[no];  | 
772  | 0  |       float boost_weight = 1.0f - base_weight;  | 
773  | 0  |       for (int i = 0; i < no; ++i) { | 
774  | 0  |         out_line[i] = base_line[i] * base_weight + comb_line[i] * boost_weight;  | 
775  | 0  |       }  | 
776  | 0  |     }  | 
777  | 0  |   }  | 
778  | 0  | }  | 
779  |  |  | 
780  |  | // Computes deltas for a combiner that chooses between 2 sets of inputs.  | 
781  | 0  | void NetworkIO::ComputeCombinerDeltas(const NetworkIO &fwd_deltas, const NetworkIO &base_output) { | 
782  | 0  |   ASSERT_HOST(!int_mode_);  | 
783  |  |   // Compute the deltas for the combiner.  | 
784  | 0  |   int width = Width();  | 
785  | 0  |   int no = NumFeatures() - 1;  | 
786  | 0  |   ASSERT_HOST(fwd_deltas.NumFeatures() == no);  | 
787  | 0  |   ASSERT_HOST(base_output.NumFeatures() == no);  | 
788  |  |   // Number of outputs from base and final result.  | 
789  | 0  |   for (int t = 0; t < width; ++t) { | 
790  | 0  |     const float *delta_line = fwd_deltas.f_[t];  | 
791  | 0  |     const float *base_line = base_output.f_[t];  | 
792  | 0  |     float *comb_line = f_[t];  | 
793  | 0  |     float base_weight = comb_line[no];  | 
794  | 0  |     float boost_weight = 1.0f - base_weight;  | 
795  | 0  |     float max_base_delta = 0.0;  | 
796  | 0  |     for (int i = 0; i < no; ++i) { | 
797  |  |       // What did the combiner actually produce?  | 
798  | 0  |       float output = base_line[i] * base_weight + comb_line[i] * boost_weight;  | 
799  |  |       // Reconstruct the target from the delta.  | 
800  | 0  |       float comb_target = delta_line[i] + output;  | 
801  | 0  |       comb_line[i] = comb_target - comb_line[i];  | 
802  | 0  |       float base_delta = std::fabs(comb_target - base_line[i]);  | 
803  | 0  |       if (base_delta > max_base_delta) { | 
804  | 0  |         max_base_delta = base_delta;  | 
805  | 0  |       }  | 
806  | 0  |     }  | 
807  | 0  |     if (max_base_delta >= 0.5) { | 
808  |  |       // The base network got it wrong. The combiner should output the right  | 
809  |  |       // answer and 0 for the base network.  | 
810  | 0  |       comb_line[no] = 0.0 - base_weight;  | 
811  | 0  |     } else { | 
812  |  |       // The base network was right. The combiner should flag that.  | 
813  | 0  |       for (int i = 0; i < no; ++i) { | 
814  |  |         // All other targets are 0.  | 
815  | 0  |         if (comb_line[i] > 0.0) { | 
816  | 0  |           comb_line[i] -= 1.0;  | 
817  | 0  |         }  | 
818  | 0  |       }  | 
819  | 0  |       comb_line[no] = 1.0 - base_weight;  | 
820  | 0  |     }  | 
821  | 0  |   }  | 
822  | 0  | }  | 
823  |  |  | 
824  |  | // Copies the array checking that the types match.  | 
825  | 0  | void NetworkIO::CopyAll(const NetworkIO &src) { | 
826  | 0  |   ASSERT_HOST(src.int_mode_ == int_mode_);  | 
827  | 0  |   f_ = src.f_;  | 
828  | 0  | }  | 
829  |  |  | 
830  |  | // Checks that both are floats and adds the src array to *this.  | 
831  | 0  | void NetworkIO::AddAllToFloat(const NetworkIO &src) { | 
832  | 0  |   ASSERT_HOST(!int_mode_);  | 
833  | 0  |   ASSERT_HOST(!src.int_mode_);  | 
834  | 0  |   f_ += src.f_;  | 
835  | 0  | }  | 
836  |  |  | 
837  |  | // Subtracts the array from a float array. src must also be float.  | 
838  | 0  | void NetworkIO::SubtractAllFromFloat(const NetworkIO &src) { | 
839  | 0  |   ASSERT_HOST(!int_mode_);  | 
840  | 0  |   ASSERT_HOST(!src.int_mode_);  | 
841  | 0  |   f_ -= src.f_;  | 
842  | 0  | }  | 
843  |  |  | 
844  |  | // Copies src to *this, with maxabs normalization to match scale.  | 
845  | 0  | void NetworkIO::CopyWithNormalization(const NetworkIO &src, const NetworkIO &scale) { | 
846  | 0  |   ASSERT_HOST(!int_mode_);  | 
847  | 0  |   ASSERT_HOST(!src.int_mode_);  | 
848  | 0  |   ASSERT_HOST(!scale.int_mode_);  | 
849  | 0  |   float src_max = src.f_.MaxAbs();  | 
850  | 0  |   ASSERT_HOST(std::isfinite(src_max));  | 
851  | 0  |   float scale_max = scale.f_.MaxAbs();  | 
852  | 0  |   ASSERT_HOST(std::isfinite(scale_max));  | 
853  | 0  |   if (src_max > 0.0f) { | 
854  | 0  |     float factor = scale_max / src_max;  | 
855  | 0  |     for (int t = 0; t < src.Width(); ++t) { | 
856  | 0  |       const float *src_ptr = src.f_[t];  | 
857  | 0  |       float *dest_ptr = f_[t];  | 
858  | 0  |       for (int i = 0; i < src.f_.dim2(); ++i) { | 
859  | 0  |         dest_ptr[i] = src_ptr[i] * factor;  | 
860  | 0  |       }  | 
861  | 0  |     }  | 
862  | 0  |   } else { | 
863  | 0  |     f_.Clear();  | 
864  | 0  |   }  | 
865  | 0  | }  | 
866  |  |  | 
867  |  | // Copies src to *this with independent reversal of the y dimension.  | 
868  | 0  | void NetworkIO::CopyWithYReversal(const NetworkIO &src) { | 
869  | 0  |   int num_features = src.NumFeatures();  | 
870  | 0  |   Resize(src, num_features);  | 
871  | 0  |   StrideMap::Index b_index(src.stride_map_);  | 
872  | 0  |   do { | 
873  | 0  |     int width = b_index.MaxIndexOfDim(FD_WIDTH) + 1;  | 
874  | 0  |     StrideMap::Index fwd_index(b_index);  | 
875  | 0  |     StrideMap::Index rev_index(b_index);  | 
876  | 0  |     rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_HEIGHT), FD_HEIGHT);  | 
877  | 0  |     do { | 
878  | 0  |       int fwd_t = fwd_index.t();  | 
879  | 0  |       int rev_t = rev_index.t();  | 
880  | 0  |       for (int x = 0; x < width; ++x) { | 
881  | 0  |         CopyTimeStepFrom(rev_t++, src, fwd_t++);  | 
882  | 0  |       }  | 
883  | 0  |     } while (fwd_index.AddOffset(1, FD_HEIGHT) && rev_index.AddOffset(-1, FD_HEIGHT));  | 
884  | 0  |   } while (b_index.AddOffset(1, FD_BATCH));  | 
885  | 0  | }  | 
886  |  |  | 
887  |  | // Copies src to *this with independent reversal of the x dimension.  | 
888  | 375k  | void NetworkIO::CopyWithXReversal(const NetworkIO &src) { | 
889  | 375k  |   int num_features = src.NumFeatures();  | 
890  | 375k  |   Resize(src, num_features);  | 
891  | 375k  |   StrideMap::Index b_index(src.stride_map_);  | 
892  | 375k  |   do { | 
893  | 375k  |     StrideMap::Index y_index(b_index);  | 
894  | 375k  |     do { | 
895  | 375k  |       StrideMap::Index fwd_index(y_index);  | 
896  | 375k  |       StrideMap::Index rev_index(y_index);  | 
897  | 375k  |       rev_index.AddOffset(rev_index.MaxIndexOfDim(FD_WIDTH), FD_WIDTH);  | 
898  | 5.15M  |       do { | 
899  | 5.15M  |         CopyTimeStepFrom(rev_index.t(), src, fwd_index.t());  | 
900  | 5.15M  |       } while (fwd_index.AddOffset(1, FD_WIDTH) && rev_index.AddOffset(-1, FD_WIDTH));  | 
901  | 375k  |     } while (y_index.AddOffset(1, FD_HEIGHT));  | 
902  | 375k  |   } while (b_index.AddOffset(1, FD_BATCH));  | 
903  | 375k  | }  | 
904  |  |  | 
905  |  | // Copies src to *this with independent transpose of the x and y dimensions.  | 
906  | 375k  | void NetworkIO::CopyWithXYTranspose(const NetworkIO &src) { | 
907  | 375k  |   int num_features = src.NumFeatures();  | 
908  | 375k  |   stride_map_ = src.stride_map_;  | 
909  | 375k  |   stride_map_.TransposeXY();  | 
910  | 375k  |   ResizeToMap(src.int_mode(), stride_map_, num_features);  | 
911  | 375k  |   StrideMap::Index src_b_index(src.stride_map_);  | 
912  | 375k  |   StrideMap::Index dest_b_index(stride_map_);  | 
913  | 375k  |   do { | 
914  | 375k  |     StrideMap::Index src_y_index(src_b_index);  | 
915  | 375k  |     StrideMap::Index dest_x_index(dest_b_index);  | 
916  | 4.82M  |     do { | 
917  | 4.82M  |       StrideMap::Index src_x_index(src_y_index);  | 
918  | 4.82M  |       StrideMap::Index dest_y_index(dest_x_index);  | 
919  | 33.4M  |       do { | 
920  | 33.4M  |         CopyTimeStepFrom(dest_y_index.t(), src, src_x_index.t());  | 
921  | 33.4M  |       } while (src_x_index.AddOffset(1, FD_WIDTH) && dest_y_index.AddOffset(1, FD_HEIGHT));  | 
922  | 4.82M  |     } while (src_y_index.AddOffset(1, FD_HEIGHT) && dest_x_index.AddOffset(1, FD_WIDTH));  | 
923  | 375k  |   } while (src_b_index.AddOffset(1, FD_BATCH) && dest_b_index.AddOffset(1, FD_BATCH));  | 
924  | 375k  | }  | 
925  |  |  | 
926  |  | // Copies src to *this, at the given feature_offset, returning the total  | 
927  |  | // feature offset after the copy. Multiple calls will stack outputs from  | 
928  |  | // multiple sources in feature space.  | 
929  | 0  | int NetworkIO::CopyPacking(const NetworkIO &src, int feature_offset) { | 
930  | 0  |   ASSERT_HOST(int_mode_ == src.int_mode_);  | 
931  | 0  |   int width = src.Width();  | 
932  | 0  |   ASSERT_HOST(width <= Width());  | 
933  | 0  |   int num_features = src.NumFeatures();  | 
934  | 0  |   ASSERT_HOST(num_features + feature_offset <= NumFeatures());  | 
935  | 0  |   if (int_mode_) { | 
936  | 0  |     for (int t = 0; t < width; ++t) { | 
937  | 0  |       memcpy(i_[t] + feature_offset, src.i_[t], num_features * sizeof(i_[t][0]));  | 
938  | 0  |     }  | 
939  | 0  |     for (int t = width; t < i_.dim1(); ++t) { | 
940  | 0  |       memset(i_[t], 0, num_features * sizeof(i_[t][0]));  | 
941  | 0  |     }  | 
942  | 0  |   } else { | 
943  | 0  |     for (int t = 0; t < width; ++t) { | 
944  | 0  |       memcpy(f_[t] + feature_offset, src.f_[t], num_features * sizeof(f_[t][0]));  | 
945  | 0  |     }  | 
946  | 0  |     for (int t = width; t < f_.dim1(); ++t) { | 
947  | 0  |       memset(f_[t], 0, num_features * sizeof(f_[t][0]));  | 
948  | 0  |     }  | 
949  | 0  |   }  | 
950  | 0  |   return num_features + feature_offset;  | 
951  | 0  | }  | 
952  |  |  | 
953  |  | // Opposite of CopyPacking, fills *this with a part of src, starting at  | 
954  |  | // feature_offset, and picking num_features.  | 
955  | 0  | void NetworkIO::CopyUnpacking(const NetworkIO &src, int feature_offset, int num_features) { | 
956  | 0  |   Resize(src, num_features);  | 
957  | 0  |   int width = src.Width();  | 
958  | 0  |   ASSERT_HOST(num_features + feature_offset <= src.NumFeatures());  | 
959  | 0  |   if (int_mode_) { | 
960  | 0  |     for (int t = 0; t < width; ++t) { | 
961  | 0  |       memcpy(i_[t], src.i_[t] + feature_offset, num_features * sizeof(i_[t][0]));  | 
962  | 0  |     }  | 
963  | 0  |   } else { | 
964  | 0  |     for (int t = 0; t < width; ++t) { | 
965  | 0  |       memcpy(f_[t], src.f_[t] + feature_offset, num_features * sizeof(f_[t][0]));  | 
966  | 0  |     }  | 
967  | 0  |   }  | 
968  | 0  | }  | 
969  |  |  | 
970  |  | // Transposes the float part of *this into dest.  | 
971  | 0  | void NetworkIO::Transpose(TransposedArray *dest) const { | 
972  | 0  |   int width = Width();  | 
973  | 0  |   dest->ResizeNoInit(NumFeatures(), width);  | 
974  | 0  |   for (int t = 0; t < width; ++t) { | 
975  | 0  |     dest->WriteStrided(t, f_[t]);  | 
976  | 0  |   }  | 
977  | 0  | }  | 
978  |  |  | 
979  |  | // Clips the content of a single time-step to +/-range.  | 
980  | 0  | void NetworkIO::ClipVector(int t, float range) { | 
981  | 0  |   ASSERT_HOST(!int_mode_);  | 
982  | 0  |   float *v = f_[t];  | 
983  | 0  |   int dim = f_.dim2();  | 
984  | 0  |   for (int i = 0; i < dim; ++i) { | 
985  | 0  |     v[i] = ClipToRange<float>(v[i], -range, range);  | 
986  | 0  |   }  | 
987  | 0  | }  | 
988  |  |  | 
989  |  | // Returns the padding required for the given number of features in order  | 
990  |  | // for the SIMD operations to be safe.  | 
991  |  | /* static */  | 
992  | 3.00M  | int NetworkIO::GetPadding(int num_features) { | 
993  | 3.00M  |   int padding = 0;  | 
994  | 3.00M  |   if (IntSimdMatrix::intSimdMatrix) { | 
995  | 3.00M  |     padding = IntSimdMatrix::intSimdMatrix->RoundInputs(num_features) - num_features;  | 
996  | 3.00M  |   }  | 
997  | 3.00M  |   return padding;  | 
998  | 3.00M  | }  | 
999  |  |  | 
1000  |  | } // namespace tesseract.  |