/src/tesseract/src/lstm/parallel.cpp

Source
/////////////////////////////////////////////////////////////////////////
// File:        parallel.cpp
// Description: Runs networks in parallel on the same input.
// Author:      Ray Smith
//
// (C) Copyright 2013, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
///////////////////////////////////////////////////////////////////////

#ifdef HAVE_CONFIG_H
#  include "config_auto.h"
#endif

#include "parallel.h"

#ifdef _OPENMP
#  include <omp.h>
#endif

#include "functions.h" // For conditional undef of _OPENMP.
#include "networkscratch.h"

namespace tesseract {

// ni_ and no_ will be set by AddToStack.
Parallel::Parallel(const std::string &name, NetworkType type) : Plumbing(name) {
  type_ = type;
}

// Returns the shape output from the network given an input shape (which may
// be partially unknown ie zero).
StaticShape Parallel::OutputShape(const StaticShape &input_shape) const {
  StaticShape result = stack_[0]->OutputShape(input_shape);
  int stack_size = stack_.size();
  for (int i = 1; i < stack_size; ++i) {
    StaticShape shape = stack_[i]->OutputShape(input_shape);
    result.set_depth(result.depth() + shape.depth());
  }
  return result;
}

// Runs forward propagation of activations on the input line.
// See NetworkCpp for a detailed discussion of the arguments.
void Parallel::Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose,
                       NetworkScratch *scratch, NetworkIO *output) {
  bool parallel_debug = false;
  // If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair,
  // or a 2-d LSTM quad, do debug locally, and don't pass the flag on.
  if (debug && type_ != NT_PARALLEL) {
    parallel_debug = true;
    debug = false;
  }
  int stack_size = stack_.size();
  if (type_ == NT_PAR_2D_LSTM) {
    // Special case, run parallel in parallel.
    std::vector<NetworkScratch::IO> results(stack_size);
    for (int i = 0; i < stack_size; ++i) {
      results[i].Resize(input, stack_[i]->NumOutputs(), scratch);
    }
#ifdef _OPENMP
#  pragma omp parallel for num_threads(stack_size)
#endif
    for (int i = 0; i < stack_size; ++i) {
      stack_[i]->Forward(debug, input, nullptr, scratch, results[i]);
    }
    // Now pack all the results (serially) into the output.
    int out_offset = 0;
    output->Resize(*results[0], NumOutputs());
    for (int i = 0; i < stack_size; ++i) {
      out_offset = output->CopyPacking(*results[i], out_offset);
    }
  } else {
    // Revolving intermediate result.
    NetworkScratch::IO result(input, scratch);
    // Source for divided replicated.
    NetworkScratch::IO source_part;
    TransposedArray *src_transpose = nullptr;
    if (IsTraining() && type_ == NT_REPLICATED) {
      // Make a transposed copy of the input.
      input.Transpose(&transposed_input_);
      src_transpose = &transposed_input_;
    }
    // Run each network, putting the outputs into result.
    int out_offset = 0;
    for (int i = 0; i < stack_size; ++i) {
      stack_[i]->Forward(debug, input, src_transpose, scratch, result);
      // All networks must have the same output width
      if (i == 0) {
        output->Resize(*result, NumOutputs());
      } else {
        ASSERT_HOST(result->Width() == output->Width());
      }
      out_offset = output->CopyPacking(*result, out_offset);
    }
  }
#ifndef GRAPHICS_DISABLED
  if (parallel_debug) {
    DisplayForward(*output);
  }
#endif
}

// Runs backward propagation of errors on the deltas line.
// See NetworkCpp for a detailed discussion of the arguments.
bool Parallel::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch,
                        NetworkIO *back_deltas) {
  // If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair,
  // or a 2-d LSTM quad, do debug locally, and don't pass the flag on.
  if (debug && type_ != NT_PARALLEL) {
#ifndef GRAPHICS_DISABLED
    DisplayBackward(fwd_deltas);
#endif
    debug = false;
  }
  auto stack_size = stack_.size();
  if (type_ == NT_PAR_2D_LSTM) {
    // Special case, run parallel in parallel.
    std::vector<NetworkScratch::IO> in_deltas(stack_size);
    std::vector<NetworkScratch::IO> out_deltas(stack_size);
    // Split the forward deltas for each stack element.
    int feature_offset = 0;
    for (unsigned i = 0; i < stack_.size(); ++i) {
      int num_features = stack_[i]->NumOutputs();
      in_deltas[i].Resize(fwd_deltas, num_features, scratch);
      out_deltas[i].Resize(fwd_deltas, stack_[i]->NumInputs(), scratch);
      in_deltas[i]->CopyUnpacking(fwd_deltas, feature_offset, num_features);
      feature_offset += num_features;
    }
#ifdef _OPENMP
#  pragma omp parallel for num_threads(stack_size)
#endif
    for (unsigned i = 0; i < stack_size; ++i) {
      stack_[i]->Backward(debug, *in_deltas[i], scratch, i == 0 ? back_deltas : out_deltas[i]);
    }
    if (needs_to_backprop_) {
      for (unsigned i = 1; i < stack_size; ++i) {
        back_deltas->AddAllToFloat(*out_deltas[i]);
      }
    }
  } else {
    // Revolving partial deltas.
    NetworkScratch::IO in_deltas(fwd_deltas, scratch);
    // The sum of deltas from different sources, which will eventually go into
    // back_deltas.
    NetworkScratch::IO out_deltas;
    int feature_offset = 0;
    for (unsigned i = 0; i < stack_.size(); ++i) {
      int num_features = stack_[i]->NumOutputs();
      in_deltas->CopyUnpacking(fwd_deltas, feature_offset, num_features);
      feature_offset += num_features;
      if (stack_[i]->Backward(debug, *in_deltas, scratch, back_deltas)) {
        if (i == 0) {
          out_deltas.ResizeFloat(*back_deltas, back_deltas->NumFeatures(), scratch);
          out_deltas->CopyAll(*back_deltas);
        } else if (back_deltas->NumFeatures() == out_deltas->NumFeatures()) {
          // Widths are allowed to be different going back, as we may have
          // input nets, so only accumulate the deltas if the widths are the
          // same.
          out_deltas->AddAllToFloat(*back_deltas);
        }
      }
    }
    if (needs_to_backprop_) {
      back_deltas->CopyAll(*out_deltas);
    }
  }
  if (needs_to_backprop_) {
    back_deltas->ScaleFloatBy(1.0f / stack_size);
  }
  return needs_to_backprop_;
}

} // namespace tesseract.

Line	Count	Source
1		/////////////////////////////////////////////////////////////////////////
2		// File: parallel.cpp
3		// Description: Runs networks in parallel on the same input.
4		// Author: Ray Smith
5		//
6		// (C) Copyright 2013, Google Inc.
7		// Licensed under the Apache License, Version 2.0 (the "License");
8		// you may not use this file except in compliance with the License.
9		// You may obtain a copy of the License at
10		// http://www.apache.org/licenses/LICENSE-2.0
11		// Unless required by applicable law or agreed to in writing, software
12		// distributed under the License is distributed on an "AS IS" BASIS,
13		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		// See the License for the specific language governing permissions and
15		// limitations under the License.
16		///////////////////////////////////////////////////////////////////////
17
18		#ifdef HAVE_CONFIG_H
19		# include "config_auto.h"
20		#endif
21
22		#include "parallel.h"
23
24		#ifdef _OPENMP
25		# include <omp.h>
26		#endif
27
28		#include "functions.h" // For conditional undef of _OPENMP.
29		#include "networkscratch.h"
30
31		namespace tesseract {
32
33		// ni_ and no_ will be set by AddToStack.
34	0	Parallel::Parallel(const std::string &name, NetworkType type) : Plumbing(name) {
35	0	type_ = type;
36	0	}
37
38		// Returns the shape output from the network given an input shape (which may
39		// be partially unknown ie zero).
40	0	StaticShape Parallel::OutputShape(const StaticShape &input_shape) const {
41	0	StaticShape result = stack_[0]->OutputShape(input_shape);
42	0	int stack_size = stack_.size();
43	0	for (int i = 1; i < stack_size; ++i) {
44	0	StaticShape shape = stack_[i]->OutputShape(input_shape);
45	0	result.set_depth(result.depth() + shape.depth());
46	0	}
47	0	return result;
48	0	}
49
50		// Runs forward propagation of activations on the input line.
51		// See NetworkCpp for a detailed discussion of the arguments.
52		void Parallel::Forward(bool debug, const NetworkIO &input, const TransposedArray *input_transpose,
53	0	NetworkScratch scratch, NetworkIO output) {
54	0	bool parallel_debug = false;
55		// If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair,
56		// or a 2-d LSTM quad, do debug locally, and don't pass the flag on.
57	0	if (debug && type_ != NT_PARALLEL) {
58	0	parallel_debug = true;
59	0	debug = false;
60	0	}
61	0	int stack_size = stack_.size();
62	0	if (type_ == NT_PAR_2D_LSTM) {
63		// Special case, run parallel in parallel.
64	0	std::vector<NetworkScratch::IO> results(stack_size);
65	0	for (int i = 0; i < stack_size; ++i) {
66	0	results[i].Resize(input, stack_[i]->NumOutputs(), scratch);
67	0	}
68		#ifdef _OPENMP
69		# pragma omp parallel for num_threads(stack_size)
70		#endif
71	0	for (int i = 0; i < stack_size; ++i) {
72	0	stack_[i]->Forward(debug, input, nullptr, scratch, results[i]);
73	0	}
74		// Now pack all the results (serially) into the output.
75	0	int out_offset = 0;
76	0	output->Resize(*results[0], NumOutputs());
77	0	for (int i = 0; i < stack_size; ++i) {
78	0	out_offset = output->CopyPacking(*results[i], out_offset);
79	0	}
80	0	} else {
81		// Revolving intermediate result.
82	0	NetworkScratch::IO result(input, scratch);
83		// Source for divided replicated.
84	0	NetworkScratch::IO source_part;
85	0	TransposedArray *src_transpose = nullptr;
86	0	if (IsTraining() && type_ == NT_REPLICATED) {
87		// Make a transposed copy of the input.
88	0	input.Transpose(&transposed_input_);
89	0	src_transpose = &transposed_input_;
90	0	}
91		// Run each network, putting the outputs into result.
92	0	int out_offset = 0;
93	0	for (int i = 0; i < stack_size; ++i) {
94	0	stack_[i]->Forward(debug, input, src_transpose, scratch, result);
95		// All networks must have the same output width
96	0	if (i == 0) {
97	0	output->Resize(*result, NumOutputs());
98	0	} else {
99	0	ASSERT_HOST(result->Width() == output->Width());
100	0	}
101	0	out_offset = output->CopyPacking(*result, out_offset);
102	0	}
103	0	}
104		#ifndef GRAPHICS_DISABLED
105		if (parallel_debug) {
106		DisplayForward(*output);
107		}
108		#endif
109	0	}
110
111		// Runs backward propagation of errors on the deltas line.
112		// See NetworkCpp for a detailed discussion of the arguments.
113		bool Parallel::Backward(bool debug, const NetworkIO &fwd_deltas, NetworkScratch *scratch,
114	0	NetworkIO *back_deltas) {
115		// If this parallel is a replicator of convolvers, or holds a 1-d LSTM pair,
116		// or a 2-d LSTM quad, do debug locally, and don't pass the flag on.
117	0	if (debug && type_ != NT_PARALLEL) {
118		#ifndef GRAPHICS_DISABLED
119		DisplayBackward(fwd_deltas);
120		#endif
121	0	debug = false;
122	0	}
123	0	auto stack_size = stack_.size();
124	0	if (type_ == NT_PAR_2D_LSTM) {
125		// Special case, run parallel in parallel.
126	0	std::vector<NetworkScratch::IO> in_deltas(stack_size);
127	0	std::vector<NetworkScratch::IO> out_deltas(stack_size);
128		// Split the forward deltas for each stack element.
129	0	int feature_offset = 0;
130	0	for (unsigned i = 0; i < stack_.size(); ++i) {
131	0	int num_features = stack_[i]->NumOutputs();
132	0	in_deltas[i].Resize(fwd_deltas, num_features, scratch);
133	0	out_deltas[i].Resize(fwd_deltas, stack_[i]->NumInputs(), scratch);
134	0	in_deltas[i]->CopyUnpacking(fwd_deltas, feature_offset, num_features);
135	0	feature_offset += num_features;
136	0	}
137		#ifdef _OPENMP
138		# pragma omp parallel for num_threads(stack_size)
139		#endif
140	0	for (unsigned i = 0; i < stack_size; ++i) {
141	0	stack_[i]->Backward(debug, *in_deltas[i], scratch, i == 0 ? back_deltas : out_deltas[i]);
142	0	}
143	0	if (needs_to_backprop_) {
144	0	for (unsigned i = 1; i < stack_size; ++i) {
145	0	back_deltas->AddAllToFloat(*out_deltas[i]);
146	0	}
147	0	}
148	0	} else {
149		// Revolving partial deltas.
150	0	NetworkScratch::IO in_deltas(fwd_deltas, scratch);
151		// The sum of deltas from different sources, which will eventually go into
152		// back_deltas.
153	0	NetworkScratch::IO out_deltas;
154	0	int feature_offset = 0;
155	0	for (unsigned i = 0; i < stack_.size(); ++i) {
156	0	int num_features = stack_[i]->NumOutputs();
157	0	in_deltas->CopyUnpacking(fwd_deltas, feature_offset, num_features);
158	0	feature_offset += num_features;
159	0	if (stack_[i]->Backward(debug, *in_deltas, scratch, back_deltas)) {
160	0	if (i == 0) {
161	0	out_deltas.ResizeFloat(*back_deltas, back_deltas->NumFeatures(), scratch);
162	0	out_deltas->CopyAll(*back_deltas);
163	0	} else if (back_deltas->NumFeatures() == out_deltas->NumFeatures()) {
164		// Widths are allowed to be different going back, as we may have
165		// input nets, so only accumulate the deltas if the widths are the
166		// same.
167	0	out_deltas->AddAllToFloat(*back_deltas);
168	0	}
169	0	}
170	0	}
171	0	if (needs_to_backprop_) {
172	0	back_deltas->CopyAll(*out_deltas);
173	0	}
174	0	}
175	0	if (needs_to_backprop_) {
176	0	back_deltas->ScaleFloatBy(1.0f / stack_size);
177	0	}
178	0	return needs_to_backprop_;
179	0	}
180
181		} // namespace tesseract.

Coverage Report

Created: 2026-04-01 07:03