Coverage Report

Created: 2026-04-01 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/lstm/parallel.cpp
Line
Count
Source
1
/////////////////////////////////////////////////////////////////////////
2
// File:        parallel.cpp
3
// Description: Runs networks in parallel on the same input.
4
// Author:      Ray Smith
5
//
6
// (C) Copyright 2013, Google Inc.
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
///////////////////////////////////////////////////////////////////////
17
18
#ifdef HAVE_CONFIG_H
19
#  include "config_auto.h"
20
#endif
21
22
#include "parallel.h"
23
24
#ifdef _OPENMP
25
#  include <omp.h>
26
#endif
27
28
#include "functions.h" // For conditional undef of _OPENMP.
29
#include "networkscratch.h"
30
31
namespace tesseract {
32
33
// ni_ and no_ will be set by AddToStack.
34
0
Parallel::Parallel(const std::string &name, NetworkType type) : Plumbing(name) {
35
0
  type_ = type;
36
0
}
37
38
// Returns the shape output from the network given an input shape (which may
39
// be partially unknown ie zero).
40
0
StaticShape Parallel::OutputShape(const StaticShape &input_shape) const {
41
0
  StaticShape result = stack_[0]->OutputShape(input_shape);
42
0
  int stack_size = stack_.size();
43
0
  for (int i = 1; i < stack_size; ++i) {
44
0
    StaticShape shape = stack_[i]->OutputShape(input_shape);
45
0
    result.set_depth(result.depth() + shape.depth());
46
0
  }
47
0
  return result;
48
0
}
49
50
// Runs forward propagation of activations on the input line.
51
// See NetworkCpp for a detailed discussion of the arguments.
52
void Parallel::Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose,
53
0
                       NetworkScratch *scratch, NetworkIO *output) {
54
0
  bool parallel_debug = false;
55
  // If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair,
56
  // or a 2-d LSTM quad, do debug locally, and don't pass the flag on.
57
0
  if (debug && type_ != NT_PARALLEL) {
58
0
    parallel_debug = true;
59
0
    debug = false;
60
0
  }
61
0
  int stack_size = stack_.size();
62
0
  if (type_ == NT_PAR_2D_LSTM) {
63
    // Special case, run parallel in parallel.
64
0
    std::vector<NetworkScratch::IO> results(stack_size);
65
0
    for (int i = 0; i < stack_size; ++i) {
66
0
      results[i].Resize(input, stack_[i]->NumOutputs(), scratch);
67
0
    }
68
#ifdef _OPENMP
69
#  pragma omp parallel for num_threads(stack_size)
70
#endif
71
0
    for (int i = 0; i < stack_size; ++i) {
72
0
      stack_[i]->Forward(debug, input, nullptr, scratch, results[i]);
73
0
    }
74
    // Now pack all the results (serially) into the output.
75
0
    int out_offset = 0;
76
0
    output->Resize(*results[0], NumOutputs());
77
0
    for (int i = 0; i < stack_size; ++i) {
78
0
      out_offset = output->CopyPacking(*results[i], out_offset);
79
0
    }
80
0
  } else {
81
    // Revolving intermediate result.
82
0
    NetworkScratch::IO result(input, scratch);
83
    // Source for divided replicated.
84
0
    NetworkScratch::IO source_part;
85
0
    TransposedArray *src_transpose = nullptr;
86
0
    if (IsTraining() && type_ == NT_REPLICATED) {
87
      // Make a transposed copy of the input.
88
0
      input.Transpose(&transposed_input_);
89
0
      src_transpose = &transposed_input_;
90
0
    }
91
    // Run each network, putting the outputs into result.
92
0
    int out_offset = 0;
93
0
    for (int i = 0; i < stack_size; ++i) {
94
0
      stack_[i]->Forward(debug, input, src_transpose, scratch, result);
95
      // All networks must have the same output width
96
0
      if (i == 0) {
97
0
        output->Resize(*result, NumOutputs());
98
0
      } else {
99
0
        ASSERT_HOST(result->Width() == output->Width());
100
0
      }
101
0
      out_offset = output->CopyPacking(*result, out_offset);
102
0
    }
103
0
  }
104
#ifndef GRAPHICS_DISABLED
105
  if (parallel_debug) {
106
    DisplayForward(*output);
107
  }
108
#endif
109
0
}
110
111
// Runs backward propagation of errors on the deltas line.
112
// See NetworkCpp for a detailed discussion of the arguments.
113
bool Parallel::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch,
114
0
                        NetworkIO *back_deltas) {
115
  // If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair,
116
  // or a 2-d LSTM quad, do debug locally, and don't pass the flag on.
117
0
  if (debug && type_ != NT_PARALLEL) {
118
#ifndef GRAPHICS_DISABLED
119
    DisplayBackward(fwd_deltas);
120
#endif
121
0
    debug = false;
122
0
  }
123
0
  auto stack_size = stack_.size();
124
0
  if (type_ == NT_PAR_2D_LSTM) {
125
    // Special case, run parallel in parallel.
126
0
    std::vector<NetworkScratch::IO> in_deltas(stack_size);
127
0
    std::vector<NetworkScratch::IO> out_deltas(stack_size);
128
    // Split the forward deltas for each stack element.
129
0
    int feature_offset = 0;
130
0
    for (unsigned i = 0; i < stack_.size(); ++i) {
131
0
      int num_features = stack_[i]->NumOutputs();
132
0
      in_deltas[i].Resize(fwd_deltas, num_features, scratch);
133
0
      out_deltas[i].Resize(fwd_deltas, stack_[i]->NumInputs(), scratch);
134
0
      in_deltas[i]->CopyUnpacking(fwd_deltas, feature_offset, num_features);
135
0
      feature_offset += num_features;
136
0
    }
137
#ifdef _OPENMP
138
#  pragma omp parallel for num_threads(stack_size)
139
#endif
140
0
    for (unsigned i = 0; i < stack_size; ++i) {
141
0
      stack_[i]->Backward(debug, *in_deltas[i], scratch, i == 0 ? back_deltas : out_deltas[i]);
142
0
    }
143
0
    if (needs_to_backprop_) {
144
0
      for (unsigned i = 1; i < stack_size; ++i) {
145
0
        back_deltas->AddAllToFloat(*out_deltas[i]);
146
0
      }
147
0
    }
148
0
  } else {
149
    // Revolving partial deltas.
150
0
    NetworkScratch::IO in_deltas(fwd_deltas, scratch);
151
    // The sum of deltas from different sources, which will eventually go into
152
    // back_deltas.
153
0
    NetworkScratch::IO out_deltas;
154
0
    int feature_offset = 0;
155
0
    for (unsigned i = 0; i < stack_.size(); ++i) {
156
0
      int num_features = stack_[i]->NumOutputs();
157
0
      in_deltas->CopyUnpacking(fwd_deltas, feature_offset, num_features);
158
0
      feature_offset += num_features;
159
0
      if (stack_[i]->Backward(debug, *in_deltas, scratch, back_deltas)) {
160
0
        if (i == 0) {
161
0
          out_deltas.ResizeFloat(*back_deltas, back_deltas->NumFeatures(), scratch);
162
0
          out_deltas->CopyAll(*back_deltas);
163
0
        } else if (back_deltas->NumFeatures() == out_deltas->NumFeatures()) {
164
          // Widths are allowed to be different going back, as we may have
165
          // input nets, so only accumulate the deltas if the widths are the
166
          // same.
167
0
          out_deltas->AddAllToFloat(*back_deltas);
168
0
        }
169
0
      }
170
0
    }
171
0
    if (needs_to_backprop_) {
172
0
      back_deltas->CopyAll(*out_deltas);
173
0
    }
174
0
  }
175
0
  if (needs_to_backprop_) {
176
0
    back_deltas->ScaleFloatBy(1.0f / stack_size);
177
0
  }
178
0
  return needs_to_backprop_;
179
0
}
180
181
} // namespace tesseract.