/src/tesseract/src/lstm/parallel.cpp
Line | Count | Source |
1 | | ///////////////////////////////////////////////////////////////////////// |
2 | | // File: parallel.cpp |
3 | | // Description: Runs networks in parallel on the same input. |
4 | | // Author: Ray Smith |
5 | | // |
6 | | // (C) Copyright 2013, Google Inc. |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | // See the License for the specific language governing permissions and |
15 | | // limitations under the License. |
16 | | /////////////////////////////////////////////////////////////////////// |
17 | | |
18 | | #ifdef HAVE_CONFIG_H |
19 | | # include "config_auto.h" |
20 | | #endif |
21 | | |
22 | | #include "parallel.h" |
23 | | |
24 | | #ifdef _OPENMP |
25 | | # include <omp.h> |
26 | | #endif |
27 | | |
28 | | #include "functions.h" // For conditional undef of _OPENMP. |
29 | | #include "networkscratch.h" |
30 | | |
31 | | namespace tesseract { |
32 | | |
33 | | // ni_ and no_ will be set by AddToStack. |
34 | 0 | Parallel::Parallel(const std::string &name, NetworkType type) : Plumbing(name) { |
35 | 0 | type_ = type; |
36 | 0 | } |
37 | | |
38 | | // Returns the shape output from the network given an input shape (which may |
39 | | // be partially unknown ie zero). |
40 | 0 | StaticShape Parallel::OutputShape(const StaticShape &input_shape) const { |
41 | 0 | StaticShape result = stack_[0]->OutputShape(input_shape); |
42 | 0 | int stack_size = stack_.size(); |
43 | 0 | for (int i = 1; i < stack_size; ++i) { |
44 | 0 | StaticShape shape = stack_[i]->OutputShape(input_shape); |
45 | 0 | result.set_depth(result.depth() + shape.depth()); |
46 | 0 | } |
47 | 0 | return result; |
48 | 0 | } |
49 | | |
50 | | // Runs forward propagation of activations on the input line. |
51 | | // See NetworkCpp for a detailed discussion of the arguments. |
52 | | void Parallel::Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose, |
53 | 0 | NetworkScratch *scratch, NetworkIO *output) { |
54 | 0 | bool parallel_debug = false; |
55 | | // If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair, |
56 | | // or a 2-d LSTM quad, do debug locally, and don't pass the flag on. |
57 | 0 | if (debug && type_ != NT_PARALLEL) { |
58 | 0 | parallel_debug = true; |
59 | 0 | debug = false; |
60 | 0 | } |
61 | 0 | int stack_size = stack_.size(); |
62 | 0 | if (type_ == NT_PAR_2D_LSTM) { |
63 | | // Special case, run parallel in parallel. |
64 | 0 | std::vector<NetworkScratch::IO> results(stack_size); |
65 | 0 | for (int i = 0; i < stack_size; ++i) { |
66 | 0 | results[i].Resize(input, stack_[i]->NumOutputs(), scratch); |
67 | 0 | } |
68 | | #ifdef _OPENMP |
69 | | # pragma omp parallel for num_threads(stack_size) |
70 | | #endif |
71 | 0 | for (int i = 0; i < stack_size; ++i) { |
72 | 0 | stack_[i]->Forward(debug, input, nullptr, scratch, results[i]); |
73 | 0 | } |
74 | | // Now pack all the results (serially) into the output. |
75 | 0 | int out_offset = 0; |
76 | 0 | output->Resize(*results[0], NumOutputs()); |
77 | 0 | for (int i = 0; i < stack_size; ++i) { |
78 | 0 | out_offset = output->CopyPacking(*results[i], out_offset); |
79 | 0 | } |
80 | 0 | } else { |
81 | | // Revolving intermediate result. |
82 | 0 | NetworkScratch::IO result(input, scratch); |
83 | | // Source for divided replicated. |
84 | 0 | NetworkScratch::IO source_part; |
85 | 0 | TransposedArray *src_transpose = nullptr; |
86 | 0 | if (IsTraining() && type_ == NT_REPLICATED) { |
87 | | // Make a transposed copy of the input. |
88 | 0 | input.Transpose(&transposed_input_); |
89 | 0 | src_transpose = &transposed_input_; |
90 | 0 | } |
91 | | // Run each network, putting the outputs into result. |
92 | 0 | int out_offset = 0; |
93 | 0 | for (int i = 0; i < stack_size; ++i) { |
94 | 0 | stack_[i]->Forward(debug, input, src_transpose, scratch, result); |
95 | | // All networks must have the same output width |
96 | 0 | if (i == 0) { |
97 | 0 | output->Resize(*result, NumOutputs()); |
98 | 0 | } else { |
99 | 0 | ASSERT_HOST(result->Width() == output->Width()); |
100 | 0 | } |
101 | 0 | out_offset = output->CopyPacking(*result, out_offset); |
102 | 0 | } |
103 | 0 | } |
104 | | #ifndef GRAPHICS_DISABLED |
105 | | if (parallel_debug) { |
106 | | DisplayForward(*output); |
107 | | } |
108 | | #endif |
109 | 0 | } |
110 | | |
111 | | // Runs backward propagation of errors on the deltas line. |
112 | | // See NetworkCpp for a detailed discussion of the arguments. |
113 | | bool Parallel::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch, |
114 | 0 | NetworkIO *back_deltas) { |
115 | | // If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair, |
116 | | // or a 2-d LSTM quad, do debug locally, and don't pass the flag on. |
117 | 0 | if (debug && type_ != NT_PARALLEL) { |
118 | | #ifndef GRAPHICS_DISABLED |
119 | | DisplayBackward(fwd_deltas); |
120 | | #endif |
121 | 0 | debug = false; |
122 | 0 | } |
123 | 0 | auto stack_size = stack_.size(); |
124 | 0 | if (type_ == NT_PAR_2D_LSTM) { |
125 | | // Special case, run parallel in parallel. |
126 | 0 | std::vector<NetworkScratch::IO> in_deltas(stack_size); |
127 | 0 | std::vector<NetworkScratch::IO> out_deltas(stack_size); |
128 | | // Split the forward deltas for each stack element. |
129 | 0 | int feature_offset = 0; |
130 | 0 | for (unsigned i = 0; i < stack_.size(); ++i) { |
131 | 0 | int num_features = stack_[i]->NumOutputs(); |
132 | 0 | in_deltas[i].Resize(fwd_deltas, num_features, scratch); |
133 | 0 | out_deltas[i].Resize(fwd_deltas, stack_[i]->NumInputs(), scratch); |
134 | 0 | in_deltas[i]->CopyUnpacking(fwd_deltas, feature_offset, num_features); |
135 | 0 | feature_offset += num_features; |
136 | 0 | } |
137 | | #ifdef _OPENMP |
138 | | # pragma omp parallel for num_threads(stack_size) |
139 | | #endif |
140 | 0 | for (unsigned i = 0; i < stack_size; ++i) { |
141 | 0 | stack_[i]->Backward(debug, *in_deltas[i], scratch, i == 0 ? back_deltas : out_deltas[i]); |
142 | 0 | } |
143 | 0 | if (needs_to_backprop_) { |
144 | 0 | for (unsigned i = 1; i < stack_size; ++i) { |
145 | 0 | back_deltas->AddAllToFloat(*out_deltas[i]); |
146 | 0 | } |
147 | 0 | } |
148 | 0 | } else { |
149 | | // Revolving partial deltas. |
150 | 0 | NetworkScratch::IO in_deltas(fwd_deltas, scratch); |
151 | | // The sum of deltas from different sources, which will eventually go into |
152 | | // back_deltas. |
153 | 0 | NetworkScratch::IO out_deltas; |
154 | 0 | int feature_offset = 0; |
155 | 0 | for (unsigned i = 0; i < stack_.size(); ++i) { |
156 | 0 | int num_features = stack_[i]->NumOutputs(); |
157 | 0 | in_deltas->CopyUnpacking(fwd_deltas, feature_offset, num_features); |
158 | 0 | feature_offset += num_features; |
159 | 0 | if (stack_[i]->Backward(debug, *in_deltas, scratch, back_deltas)) { |
160 | 0 | if (i == 0) { |
161 | 0 | out_deltas.ResizeFloat(*back_deltas, back_deltas->NumFeatures(), scratch); |
162 | 0 | out_deltas->CopyAll(*back_deltas); |
163 | 0 | } else if (back_deltas->NumFeatures() == out_deltas->NumFeatures()) { |
164 | | // Widths are allowed to be different going back, as we may have |
165 | | // input nets, so only accumulate the deltas if the widths are the |
166 | | // same. |
167 | 0 | out_deltas->AddAllToFloat(*back_deltas); |
168 | 0 | } |
169 | 0 | } |
170 | 0 | } |
171 | 0 | if (needs_to_backprop_) { |
172 | 0 | back_deltas->CopyAll(*out_deltas); |
173 | 0 | } |
174 | 0 | } |
175 | 0 | if (needs_to_backprop_) { |
176 | 0 | back_deltas->ScaleFloatBy(1.0f / stack_size); |
177 | 0 | } |
178 | 0 | return needs_to_backprop_; |
179 | 0 | } |
180 | | |
181 | | } // namespace tesseract. |