/proc/self/cwd/external/gemmlowp/profiling/instrumentation.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2015 The Gemmlowp Authors. All Rights Reserved. |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // |
7 | | // http://www.apache.org/licenses/LICENSE-2.0 |
8 | | // |
9 | | // Unless required by applicable law or agreed to in writing, software |
10 | | // distributed under the License is distributed on an "AS IS" BASIS, |
11 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
12 | | // See the License for the specific language governing permissions and |
13 | | // limitations under the License. |
14 | | |
15 | | // instrumentation.h: contains the definitions needed to |
16 | | // instrument code for profiling: |
17 | | // ScopedProfilingLabel, RegisterCurrentThreadForProfiling. |
18 | | // |
19 | | // profiler.h is only needed to drive the profiler: |
20 | | // StartProfiling, FinishProfiling. |
21 | | // |
22 | | // See the usage example in profiler.h. |
23 | | |
24 | | #ifndef GEMMLOWP_PROFILING_INSTRUMENTATION_H_ |
25 | | #define GEMMLOWP_PROFILING_INSTRUMENTATION_H_ |
26 | | |
27 | | #include <cstdio> |
28 | | |
29 | | #ifndef GEMMLOWP_USE_STLPORT |
30 | | #include <cstdint> |
31 | | #else |
32 | | #include <stdint.h> |
33 | | namespace std { |
34 | | using ::int16_t; |
35 | | using ::int32_t; |
36 | | using ::int8_t; |
37 | | using ::size_t; |
38 | | using ::uint16_t; |
39 | | using ::uint32_t; |
40 | | using ::uint8_t; |
41 | | using ::uintptr_t; |
42 | | } // namespace std |
43 | | #endif |
44 | | |
45 | | #include <algorithm> |
46 | | #include <cassert> |
47 | | #include <cstdlib> |
48 | | |
49 | | #ifdef GEMMLOWP_PROFILING |
50 | | #include <cstring> |
51 | | #include <set> |
52 | | #endif |
53 | | |
54 | | #include "./pthread_everywhere.h" |
55 | | |
56 | | namespace gemmlowp { |
57 | | |
58 | 0 | inline void ReleaseBuildAssertion(bool condition, const char* msg) { |
59 | 0 | if (!condition) { |
60 | 0 | fprintf(stderr, "gemmlowp error: %s\n", msg); |
61 | 0 | abort(); |
62 | 0 | } |
63 | 0 | } |
64 | | |
65 | | class Mutex { |
66 | | public: |
67 | | Mutex(const Mutex&) = delete; |
68 | | Mutex& operator=(const Mutex&) = delete; |
69 | | |
70 | 0 | Mutex() { pthread_mutex_init(&m, NULL); } |
71 | 0 | ~Mutex() { pthread_mutex_destroy(&m); } |
72 | | |
73 | 0 | void Lock() { pthread_mutex_lock(&m); } |
74 | 0 | void Unlock() { pthread_mutex_unlock(&m); } |
75 | | |
76 | | private: |
77 | | pthread_mutex_t m; |
78 | | }; |
79 | | |
80 | | class GlobalMutexes { |
81 | | public: |
82 | 0 | static Mutex* Profiler() { |
83 | 0 | static Mutex m; |
84 | 0 | return &m; |
85 | 0 | } |
86 | | |
87 | 0 | static Mutex* EightBitIntGemm() { |
88 | 0 | static Mutex m; |
89 | 0 | return &m; |
90 | 0 | } |
91 | | }; |
92 | | |
93 | | // A very simple RAII helper to lock and unlock a Mutex |
94 | | struct ScopedLock { |
95 | 0 | ScopedLock(Mutex* m) : _m(m) { _m->Lock(); } |
96 | 0 | ~ScopedLock() { _m->Unlock(); } |
97 | | |
98 | | private: |
99 | | Mutex* _m; |
100 | | }; |
101 | | |
102 | | // Profiling definitions. Two paths: when profiling is enabled, |
103 | | // and when profiling is disabled. |
104 | | #ifdef GEMMLOWP_PROFILING |
105 | | // This code path is when profiling is enabled. |
106 | | |
107 | | // A pseudo-call-stack. Contrary to a real call-stack, this only |
108 | | // contains pointers to literal strings that were manually entered |
109 | | // in the instrumented code (see ScopedProfilingLabel). |
110 | | struct ProfilingStack { |
111 | | static const std::size_t kMaxSize = 30; |
112 | | typedef const char* LabelsArrayType[kMaxSize]; |
113 | | LabelsArrayType labels; |
114 | | std::size_t size; |
115 | | Mutex* lock; |
116 | | |
117 | | ProfilingStack() { memset(this, 0, sizeof(ProfilingStack)); } |
118 | | ~ProfilingStack() { delete lock; } |
119 | | |
120 | | void Push(const char* label) { |
121 | | ScopedLock sl(lock); |
122 | | ReleaseBuildAssertion(size < kMaxSize, "ProfilingStack overflow"); |
123 | | labels[size] = label; |
124 | | size++; |
125 | | } |
126 | | |
127 | | void Pop() { |
128 | | ScopedLock sl(lock); |
129 | | ReleaseBuildAssertion(size > 0, "ProfilingStack underflow"); |
130 | | size--; |
131 | | } |
132 | | |
133 | | void UpdateTop(const char* new_label) { |
134 | | ScopedLock sl(lock); |
135 | | assert(size); |
136 | | labels[size - 1] = new_label; |
137 | | } |
138 | | |
139 | | ProfilingStack& operator=(const ProfilingStack& other) { |
140 | | memcpy(this, &other, sizeof(ProfilingStack)); |
141 | | return *this; |
142 | | } |
143 | | |
144 | | bool operator==(const ProfilingStack& other) const { |
145 | | return !memcmp(this, &other, sizeof(ProfilingStack)); |
146 | | } |
147 | | }; |
148 | | |
149 | | static_assert( |
150 | | !(sizeof(ProfilingStack) & (sizeof(ProfilingStack) - 1)), |
151 | | "ProfilingStack should have power-of-two size to fit in cache lines"); |
152 | | |
153 | | struct ThreadInfo; |
154 | | |
155 | | // The global set of threads being profiled. |
156 | | inline std::set<ThreadInfo*>& ThreadsUnderProfiling() { |
157 | | static std::set<ThreadInfo*> v; |
158 | | return v; |
159 | | } |
160 | | |
161 | | struct ThreadInfo { |
162 | | pthread_key_t key; // used only to get a callback at thread exit. |
163 | | ProfilingStack stack; |
164 | | |
165 | | ThreadInfo() { |
166 | | pthread_key_create(&key, ThreadExitCallback); |
167 | | pthread_setspecific(key, this); |
168 | | stack.lock = new Mutex(); |
169 | | } |
170 | | |
171 | | static void ThreadExitCallback(void* ptr) { |
172 | | ScopedLock sl(GlobalMutexes::Profiler()); |
173 | | ThreadInfo* self = static_cast<ThreadInfo*>(ptr); |
174 | | ThreadsUnderProfiling().erase(self); |
175 | | } |
176 | | }; |
177 | | |
178 | | inline ThreadInfo& ThreadLocalThreadInfo() { |
179 | | static pthread_key_t key; |
180 | | static auto DeleteThreadInfo = [](void* threadInfoPtr) { |
181 | | ThreadInfo* threadInfo = static_cast<ThreadInfo*>(threadInfoPtr); |
182 | | if (threadInfo) { |
183 | | delete threadInfo; |
184 | | } |
185 | | }; |
186 | | |
187 | | // key_result is unused. The purpose of this 'static' local object is |
188 | | // to have its initializer (the pthread_key_create call) performed exactly |
189 | | // once, in a way that is guaranteed (since C++11) to be reentrant. |
190 | | static const int key_result = pthread_key_create(&key, DeleteThreadInfo); |
191 | | (void)key_result; |
192 | | |
193 | | ThreadInfo* threadInfo = static_cast<ThreadInfo*>(pthread_getspecific(key)); |
194 | | if (!threadInfo) { |
195 | | threadInfo = new ThreadInfo(); |
196 | | pthread_setspecific(key, threadInfo); |
197 | | } |
198 | | return *threadInfo; |
199 | | } |
200 | | |
201 | | // ScopedProfilingLabel is how one instruments code for profiling |
202 | | // with this profiler. Construct local ScopedProfilingLabel variables, |
203 | | // passing a literal string describing the local code. Profile |
204 | | // samples will then be annotated with this label, while it is in scope |
205 | | // (whence the name --- also known as RAII). |
206 | | // See the example in profiler.h. |
207 | | class ScopedProfilingLabel { |
208 | | ProfilingStack* profiling_stack_; |
209 | | |
210 | | public: |
211 | | explicit ScopedProfilingLabel(const char* label) |
212 | | : profiling_stack_(&ThreadLocalThreadInfo().stack) { |
213 | | profiling_stack_->Push(label); |
214 | | } |
215 | | |
216 | | ~ScopedProfilingLabel() { profiling_stack_->Pop(); } |
217 | | |
218 | | void Update(const char* new_label) { profiling_stack_->UpdateTop(new_label); } |
219 | | }; |
220 | | |
221 | | // To be called once on each thread to be profiled. |
222 | | inline void RegisterCurrentThreadForProfiling() { |
223 | | ScopedLock sl(GlobalMutexes::Profiler()); |
224 | | ThreadsUnderProfiling().insert(&ThreadLocalThreadInfo()); |
225 | | } |
226 | | |
227 | | #else // not GEMMLOWP_PROFILING |
228 | | // This code path is when profiling is disabled. |
229 | | |
230 | | // This empty definition of ScopedProfilingLabel ensures that |
231 | | // it has zero runtime overhead when profiling is disabled. |
232 | | struct ScopedProfilingLabel { |
233 | 0 | explicit ScopedProfilingLabel(const char*) {} |
234 | 0 | void Update(const char*) {} |
235 | | }; |
236 | | |
237 | 0 | inline void RegisterCurrentThreadForProfiling() {} |
238 | | |
239 | | #endif |
240 | | |
241 | | } // end namespace gemmlowp |
242 | | |
243 | | #endif // GEMMLOWP_PROFILING_INSTRUMENTATION_H_ |