Coverage Report

Created: 2024-09-03 06:23

/src/brpc/src/bthread/mutex.cpp
Line
Count
Source (jump to first uncovered line)
1
// Licensed to the Apache Software Foundation (ASF) under one
2
// or more contributor license agreements.  See the NOTICE file
3
// distributed with this work for additional information
4
// regarding copyright ownership.  The ASF licenses this file
5
// to you under the Apache License, Version 2.0 (the
6
// "License"); you may not use this file except in compliance
7
// with the License.  You may obtain a copy of the License at
8
//
9
//   http://www.apache.org/licenses/LICENSE-2.0
10
//
11
// Unless required by applicable law or agreed to in writing,
12
// software distributed under the License is distributed on an
13
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14
// KIND, either express or implied.  See the License for the
15
// specific language governing permissions and limitations
16
// under the License.
17
18
// bthread - An M:N threading library to make applications more concurrent.
19
20
// Date: Sun Aug  3 12:46:15 CST 2014
21
22
#include <pthread.h>
23
#include <dlfcn.h>                               // dlsym
24
#include <fcntl.h>                               // O_RDONLY
25
#include "butil/atomicops.h"
26
#include "bvar/bvar.h"
27
#include "bvar/collector.h"
28
#include "butil/macros.h"                         // BAIDU_CASSERT
29
#include "butil/containers/flat_map.h"
30
#include "butil/iobuf.h"
31
#include "butil/fd_guard.h"
32
#include "butil/files/file.h"
33
#include "butil/files/file_path.h"
34
#include "butil/file_util.h"
35
#include "butil/unique_ptr.h"
36
#include "butil/memory/scope_guard.h"
37
#include "butil/third_party/murmurhash3/murmurhash3.h"
38
#include "butil/third_party/symbolize/symbolize.h"
39
#include "butil/logging.h"
40
#include "butil/object_pool.h"
41
#include "butil/debug/stack_trace.h"
42
#include "bthread/butex.h"                       // butex_*
43
#include "bthread/mutex.h"                       // bthread_mutex_t
44
#include "bthread/sys_futex.h"
45
#include "bthread/log.h"
46
#include "butil/debug/stack_trace.h"
47
48
extern "C" {
49
extern void* BAIDU_WEAK _dl_sym(void* handle, const char* symbol, void* caller);
50
}
51
52
namespace bthread {
53
// Warm up backtrace before main().
54
const butil::debug::StackTrace ALLOW_UNUSED dummy_bt;
55
56
// For controlling contentions collected per second.
57
static bvar::CollectorSpeedLimit g_cp_sl = BVAR_COLLECTOR_SPEED_LIMIT_INITIALIZER;
58
59
const size_t MAX_CACHED_CONTENTIONS = 512;
60
// Skip frames which are always same: the unlock function and submit_contention()
61
const int SKIPPED_STACK_FRAMES = 2;
62
63
struct SampledContention : public bvar::Collected {
64
    // time taken by lock and unlock, normalized according to sampling_range
65
    int64_t duration_ns;
66
    // number of samples, normalized according to to sampling_range
67
    double count;
68
    void* stack[26];      // backtrace.
69
    int nframes;          // #elements in stack
70
71
    // Implement bvar::Collected
72
    void dump_and_destroy(size_t round) override;
73
    void destroy() override;
74
0
    bvar::CollectorSpeedLimit* speed_limit() override { return &g_cp_sl; }
75
76
0
    size_t hash_code() const {
77
0
        if (nframes == 0) {
78
0
            return 0;
79
0
        }
80
0
        if (_hash_code == 0) {
81
0
            _hash_code = 1;
82
0
            uint32_t seed = nframes;
83
0
            butil::MurmurHash3_x86_32(stack, sizeof(void*) * nframes, seed, &_hash_code);
84
0
        }
85
0
        return _hash_code;
86
0
    }
87
private:
88
friend butil::ObjectPool<SampledContention>;
89
    SampledContention()
90
0
        : duration_ns(0), count(0), stack{NULL}, nframes(0), _hash_code(0) {}
91
    ~SampledContention() override = default;
92
93
    mutable uint32_t _hash_code; // For combining samples with hashmap.
94
};
95
96
BAIDU_CASSERT(sizeof(SampledContention) == 256, be_friendly_to_allocator);
97
98
// Functor to compare contentions.
99
struct ContentionEqual {
100
    bool operator()(const SampledContention* c1,
101
0
                    const SampledContention* c2) const {
102
0
        return c1->hash_code() == c2->hash_code() &&
103
0
            c1->nframes == c2->nframes &&
104
0
            memcmp(c1->stack, c2->stack, sizeof(void*) * c1->nframes) == 0;
105
0
    }
106
};
107
108
// Functor to hash contentions.
109
struct ContentionHash {
110
0
    size_t operator()(const SampledContention* c) const {
111
0
        return c->hash_code();
112
0
    }
113
};
114
115
// The global context for contention profiler.
116
class ContentionProfiler {
117
public:
118
    typedef butil::FlatMap<SampledContention*, SampledContention*,
119
                          ContentionHash, ContentionEqual> ContentionMap;
120
121
    explicit ContentionProfiler(const char* name);
122
    ~ContentionProfiler();
123
    
124
    void dump_and_destroy(SampledContention* c);
125
126
    // Write buffered data into resulting file. If `ending' is true, append
127
    // content of /proc/self/maps and retry writing until buffer is empty.
128
    void flush_to_disk(bool ending);
129
130
    void init_if_needed();
131
private:
132
    bool _init;  // false before first dump_and_destroy is called
133
    bool _first_write;      // true if buffer was not written to file yet.
134
    std::string _filename;  // the file storing profiling result.
135
    butil::IOBuf _disk_buf;  // temp buf before saving the file.
136
    ContentionMap _dedup_map; // combining same samples to make result smaller.
137
};
138
139
ContentionProfiler::ContentionProfiler(const char* name)
140
    : _init(false)
141
    , _first_write(true)
142
0
    , _filename(name) {
143
0
}
144
145
0
ContentionProfiler::~ContentionProfiler() {
146
0
    if (!_init) {
147
        // Don't write file if dump_and_destroy was never called. We may create
148
        // such instances in ContentionProfilerStart.
149
0
        return;
150
0
    }
151
0
    flush_to_disk(true);
152
0
}
153
154
0
void ContentionProfiler::init_if_needed() {
155
0
    if (!_init) {
156
        // Already output nanoseconds, always set cycles/second to 1000000000.
157
0
        _disk_buf.append("--- contention\ncycles/second=1000000000\n");
158
0
        CHECK_EQ(0, _dedup_map.init(1024, 60));
159
0
        _init = true;
160
0
    }
161
0
}
162
    
163
0
void ContentionProfiler::dump_and_destroy(SampledContention* c) {
164
0
    init_if_needed();
165
    // Categorize the contention.
166
0
    SampledContention** p_c2 = _dedup_map.seek(c);
167
0
    if (p_c2) {
168
        // Most contentions are caused by several hotspots, this should be
169
        // the common branch.
170
0
        SampledContention* c2 = *p_c2;
171
0
        c2->duration_ns += c->duration_ns;
172
0
        c2->count += c->count;
173
0
        c->destroy();
174
0
    } else {
175
0
        _dedup_map.insert(c, c);
176
0
    }
177
0
    if (_dedup_map.size() > MAX_CACHED_CONTENTIONS) {
178
0
        flush_to_disk(false);
179
0
    }
180
0
}
181
182
0
void ContentionProfiler::flush_to_disk(bool ending) {
183
0
    BT_VLOG << "flush_to_disk(ending=" << ending << ")";
184
    
185
    // Serialize contentions in _dedup_map into _disk_buf.
186
0
    if (!_dedup_map.empty()) {
187
0
        BT_VLOG << "dedup_map=" << _dedup_map.size();
188
0
        butil::IOBufBuilder os;
189
0
        for (ContentionMap::const_iterator
190
0
                 it = _dedup_map.begin(); it != _dedup_map.end(); ++it) {
191
0
            SampledContention* c = it->second;
192
0
            os << c->duration_ns << ' ' << (size_t)ceil(c->count) << " @";
193
0
            for (int i = SKIPPED_STACK_FRAMES; i < c->nframes; ++i) {
194
0
                os << ' ' << (void*)c->stack[i];
195
0
            }
196
0
            os << '\n';
197
0
            c->destroy();
198
0
        }
199
0
        _dedup_map.clear();
200
0
        _disk_buf.append(os.buf());
201
0
    }
202
203
    // Append /proc/self/maps to the end of the contention file, required by
204
    // pprof.pl, otherwise the functions in sys libs are not interpreted.
205
0
    if (ending) {
206
0
        BT_VLOG << "Append /proc/self/maps";
207
        // Failures are not critical, don't return directly.
208
0
        butil::IOPortal mem_maps;
209
0
        const butil::fd_guard fd(open("/proc/self/maps", O_RDONLY));
210
0
        if (fd >= 0) {
211
0
            while (true) {
212
0
                ssize_t nr = mem_maps.append_from_file_descriptor(fd, 8192);
213
0
                if (nr < 0) {
214
0
                    if (errno == EINTR) {
215
0
                        continue;
216
0
                    }
217
0
                    PLOG(ERROR) << "Fail to read /proc/self/maps";
218
0
                    break;
219
0
                }
220
0
                if (nr == 0) {
221
0
                    _disk_buf.append(mem_maps);
222
0
                    break;
223
0
                }
224
0
            }
225
0
        } else {
226
0
            PLOG(ERROR) << "Fail to open /proc/self/maps";
227
0
        }
228
0
    }
229
    // Write _disk_buf into _filename
230
0
    butil::File::Error error;
231
0
    butil::FilePath path(_filename);
232
0
    butil::FilePath dir = path.DirName();
233
0
    if (!butil::CreateDirectoryAndGetError(dir, &error)) {
234
0
        LOG(ERROR) << "Fail to create directory=`" << dir.value()
235
0
                   << "', " << error;
236
0
        return;
237
0
    }
238
    // Truncate on first write, append on later writes.
239
0
    int flag = O_APPEND;
240
0
    if (_first_write) {
241
0
        _first_write = false;
242
0
        flag = O_TRUNC;
243
0
    }
244
0
    butil::fd_guard fd(open(_filename.c_str(), O_WRONLY|O_CREAT|flag, 0666));
245
0
    if (fd < 0) {
246
0
        PLOG(ERROR) << "Fail to open " << _filename;
247
0
        return;
248
0
    }
249
    // Write once normally, write until empty in the end.
250
0
    do {
251
0
        ssize_t nw = _disk_buf.cut_into_file_descriptor(fd);
252
0
        if (nw < 0) {
253
0
            if (errno == EINTR) {
254
0
                continue;
255
0
            }
256
0
            PLOG(ERROR) << "Fail to write into " << _filename;
257
0
            return;
258
0
        }
259
0
        BT_VLOG << "Write " << nw << " bytes into " << _filename;
260
0
    } while (!_disk_buf.empty() && ending);
261
0
}
262
263
// If contention profiler is on, this variable will be set with a valid
264
// instance. NULL otherwise.
265
BAIDU_CACHELINE_ALIGNMENT static ContentionProfiler* g_cp = NULL;
266
// Need this version to solve an issue that non-empty entries left by
267
// previous contention profilers should be detected and overwritten.
268
static uint64_t g_cp_version = 0;
269
// Protecting accesses to g_cp.
270
static pthread_mutex_t g_cp_mutex = PTHREAD_MUTEX_INITIALIZER;
271
272
// The map storing information for profiling pthread_mutex. Different from
273
// bthread_mutex, we can't save stuff into pthread_mutex, we neither can
274
// save the info in TLS reliably, since a mutex can be unlocked in a different
275
// thread from the one locked (although rare)
276
// This map must be very fast, since it's accessed inside the lock.
277
// Layout of the map:
278
//  * Align each entry by cacheline so that different threads do not collide.
279
//  * Hash the mutex into the map by its address. If the entry is occupied,
280
//    cancel sampling.
281
// The canceling rate should be small provided that programs are unlikely to
282
// lock a lot of mutexes simultaneously.
283
const size_t MUTEX_MAP_SIZE = 1024;
284
BAIDU_CASSERT((MUTEX_MAP_SIZE & (MUTEX_MAP_SIZE - 1)) == 0, must_be_power_of_2);
285
struct BAIDU_CACHELINE_ALIGNMENT MutexMapEntry {
286
    butil::static_atomic<uint64_t> versioned_mutex;
287
    bthread_contention_site_t csite;
288
};
289
static MutexMapEntry g_mutex_map[MUTEX_MAP_SIZE] = {}; // zero-initialize
290
291
0
void SampledContention::dump_and_destroy(size_t /*round*/) {
292
0
    if (g_cp) {
293
        // Must be protected with mutex to avoid race with deletion of ctx.
294
        // dump_and_destroy is called from dumping thread only so this mutex
295
        // is not contended at most of time.
296
0
        BAIDU_SCOPED_LOCK(g_cp_mutex);
297
0
        if (g_cp) {
298
0
            g_cp->dump_and_destroy(this);
299
0
            return;
300
0
        }
301
0
    }
302
0
    destroy();
303
0
}
304
305
0
void SampledContention::destroy() {
306
0
    _hash_code = 0;
307
0
    butil::return_object(this);
308
0
}
309
310
// Remember the conflict hashes for troubleshooting, should be 0 at most of time.
311
static butil::static_atomic<int64_t> g_nconflicthash = BUTIL_STATIC_ATOMIC_INIT(0);
312
0
static int64_t get_nconflicthash(void*) {
313
0
    return g_nconflicthash.load(butil::memory_order_relaxed);
314
0
}
315
316
// Start profiling contention.
317
0
bool ContentionProfilerStart(const char* filename) {
318
0
    if (filename == NULL) {
319
0
        LOG(ERROR) << "Parameter [filename] is NULL";
320
0
        return false;
321
0
    }
322
    // g_cp is also the flag marking start/stop.
323
0
    if (g_cp) {
324
0
        return false;
325
0
    }
326
327
    // Create related global bvar lazily.
328
0
    static bvar::PassiveStatus<int64_t> g_nconflicthash_var
329
0
        ("contention_profiler_conflict_hash", get_nconflicthash, NULL);
330
0
    static bvar::DisplaySamplingRatio g_sampling_ratio_var(
331
0
        "contention_profiler_sampling_ratio", &g_cp_sl);
332
    
333
    // Optimistic locking. A not-used ContentionProfiler does not write file.
334
0
    std::unique_ptr<ContentionProfiler> ctx(new ContentionProfiler(filename));
335
0
    {
336
0
        BAIDU_SCOPED_LOCK(g_cp_mutex);
337
0
        if (g_cp) {
338
0
            return false;
339
0
        }
340
0
        g_cp = ctx.release();
341
0
        ++g_cp_version;  // invalidate non-empty entries that may exist.
342
0
    }
343
0
    return true;
344
0
}
345
346
// Stop contention profiler.
347
0
void ContentionProfilerStop() {
348
0
    ContentionProfiler* ctx = NULL;
349
0
    if (g_cp) {
350
0
        std::unique_lock<pthread_mutex_t> mu(g_cp_mutex);
351
0
        if (g_cp) {
352
0
            ctx = g_cp;
353
0
            g_cp = NULL;
354
0
            mu.unlock();
355
356
            // make sure it's initialiazed in case no sample was gathered,
357
            // otherwise nothing will be written and succeeding pprof will fail.
358
0
            ctx->init_if_needed();
359
            // Deletion is safe because usages of g_cp are inside g_cp_mutex.
360
0
            delete ctx;
361
0
            return;
362
0
        }
363
0
    }
364
0
    LOG(ERROR) << "Contention profiler is not started!";
365
0
}
366
367
BUTIL_FORCE_INLINE bool
368
0
is_contention_site_valid(const bthread_contention_site_t& cs) {
369
0
    return cs.sampling_range;
370
0
}
371
372
BUTIL_FORCE_INLINE void
373
0
make_contention_site_invalid(bthread_contention_site_t* cs) {
374
0
    cs->sampling_range = 0;
375
0
}
376
377
#ifndef NO_PTHREAD_MUTEX_HOOK
378
// Replace pthread_mutex_lock and pthread_mutex_unlock:
379
// First call to sys_pthread_mutex_lock sets sys_pthread_mutex_lock to the
380
// real function so that next calls go to the real function directly. This
381
// technique avoids calling pthread_once each time.
382
typedef int (*MutexOp)(pthread_mutex_t*);
383
int first_sys_pthread_mutex_lock(pthread_mutex_t* mutex);
384
int first_sys_pthread_mutex_trylock(pthread_mutex_t* mutex);
385
int first_sys_pthread_mutex_unlock(pthread_mutex_t* mutex);
386
static MutexOp sys_pthread_mutex_lock = first_sys_pthread_mutex_lock;
387
static MutexOp sys_pthread_mutex_trylock = first_sys_pthread_mutex_trylock;
388
static MutexOp sys_pthread_mutex_unlock = first_sys_pthread_mutex_unlock;
389
static pthread_once_t init_sys_mutex_lock_once = PTHREAD_ONCE_INIT;
390
391
// dlsym may call malloc to allocate space for dlerror and causes contention
392
// profiler to deadlock at boostraping when the program is linked with
393
// libunwind. The deadlock bt:
394
//   #0  0x00007effddc99b80 in __nanosleep_nocancel () at ../sysdeps/unix/syscall-template.S:81
395
//   #1  0x00000000004b4df7 in butil::internal::SpinLockDelay(int volatile*, int, int) ()
396
//   #2  0x00000000004b4d57 in SpinLock::SlowLock() ()
397
//   #3  0x00000000004b4a63 in tcmalloc::ThreadCache::InitModule() ()
398
//   #4  0x00000000004aa2b5 in tcmalloc::ThreadCache::GetCache() ()
399
//   #5  0x000000000040c6c5 in (anonymous namespace)::do_malloc_no_errno(unsigned long) [clone.part.16] ()
400
//   #6  0x00000000006fc125 in tc_calloc ()
401
//   #7  0x00007effdd245690 in _dlerror_run (operate=operate@entry=0x7effdd245130 <dlsym_doit>, args=args@entry=0x7fff483dedf0) at dlerror.c:141
402
//   #8  0x00007effdd245198 in __dlsym (handle=<optimized out>, name=<optimized out>) at dlsym.c:70
403
//   #9  0x0000000000666517 in bthread::init_sys_mutex_lock () at bthread/mutex.cpp:358
404
//   #10 0x00007effddc97a90 in pthread_once () at ../nptl/sysdeps/unix/sysv/linux/x86_64/pthread_once.S:103
405
//   #11 0x000000000066649f in bthread::first_sys_pthread_mutex_lock (mutex=0xbaf880 <_ULx86_64_lock>) at bthread/mutex.cpp:366
406
//   #12 0x00000000006678bc in pthread_mutex_lock_impl (mutex=0xbaf880 <_ULx86_64_lock>) at bthread/mutex.cpp:489
407
//   #13 pthread_mutex_lock (__mutex=__mutex@entry=0xbaf880 <_ULx86_64_lock>) at bthread/mutex.cpp:751
408
//   #14 0x00000000004c6ea1 in _ULx86_64_init () at x86_64/Gglobal.c:83
409
//   #15 0x00000000004c44fb in _ULx86_64_init_local (cursor=0x7fff483df340, uc=0x7fff483def90) at x86_64/Ginit_local.c:47
410
//   #16 0x00000000004b5012 in GetStackTrace(void**, int, int) ()
411
//   #17 0x00000000004b2095 in tcmalloc::PageHeap::GrowHeap(unsigned long) ()
412
//   #18 0x00000000004b23a3 in tcmalloc::PageHeap::New(unsigned long) ()
413
//   #19 0x00000000004ad457 in tcmalloc::CentralFreeList::Populate() ()
414
//   #20 0x00000000004ad628 in tcmalloc::CentralFreeList::FetchFromSpansSafe() ()
415
//   #21 0x00000000004ad6a3 in tcmalloc::CentralFreeList::RemoveRange(void**, void**, int) ()
416
//   #22 0x00000000004b3ed3 in tcmalloc::ThreadCache::FetchFromCentralCache(unsigned long, unsigned long) ()
417
//   #23 0x00000000006fbb9a in tc_malloc ()
418
// Call _dl_sym which is a private function in glibc to workaround the malloc
419
// causing deadlock temporarily. This fix is hardly portable.
420
421
20
static void init_sys_mutex_lock() {
422
// When bRPC library is linked as a shared library, need to make sure bRPC
423
// shared library is loaded before the pthread shared library. Otherwise,
424
// it may cause runtime error: undefined symbol: pthread_mutex_xxx.
425
// Alternatively, static linking can also avoid this problem.
426
20
#if defined(OS_LINUX)
427
    // TODO: may need dlvsym when GLIBC has multiple versions of a same symbol.
428
    // http://blog.fesnel.com/blog/2009/08/25/preloading-with-multiple-symbol-versions
429
20
    if (_dl_sym) {
430
20
        sys_pthread_mutex_lock = (MutexOp)_dl_sym(
431
20
            RTLD_NEXT, "pthread_mutex_lock", (void*)init_sys_mutex_lock);
432
20
        sys_pthread_mutex_unlock = (MutexOp)_dl_sym(
433
20
            RTLD_NEXT, "pthread_mutex_unlock", (void*)init_sys_mutex_lock);
434
20
        sys_pthread_mutex_trylock = (MutexOp)_dl_sym(
435
20
            RTLD_NEXT, "pthread_mutex_trylock", (void*)init_sys_mutex_lock);
436
20
    } else {
437
        // _dl_sym may be undefined reference in some system, fallback to dlsym
438
0
        sys_pthread_mutex_lock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_lock");
439
0
        sys_pthread_mutex_unlock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_unlock");
440
0
        sys_pthread_mutex_trylock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_trylock");
441
0
    }
442
#elif defined(OS_MACOSX)
443
    // TODO: look workaround for dlsym on mac
444
    sys_pthread_mutex_lock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_lock");
445
    sys_pthread_mutex_trylock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_trylock");
446
    sys_pthread_mutex_unlock = (MutexOp)dlsym(RTLD_NEXT, "pthread_mutex_unlock");
447
#endif
448
20
}
449
450
// Make sure pthread functions are ready before main().
451
const int ALLOW_UNUSED dummy = pthread_once(&init_sys_mutex_lock_once, init_sys_mutex_lock);
452
453
20
int first_sys_pthread_mutex_lock(pthread_mutex_t* mutex) {
454
20
    pthread_once(&init_sys_mutex_lock_once, init_sys_mutex_lock);
455
20
    return sys_pthread_mutex_lock(mutex);
456
20
}
457
458
0
int first_sys_pthread_mutex_trylock(pthread_mutex_t* mutex) {
459
0
    pthread_once(&init_sys_mutex_lock_once, init_sys_mutex_lock);
460
0
    return sys_pthread_mutex_trylock(mutex);
461
0
}
462
463
0
int first_sys_pthread_mutex_unlock(pthread_mutex_t* mutex) {
464
0
    pthread_once(&init_sys_mutex_lock_once, init_sys_mutex_lock);
465
0
    return sys_pthread_mutex_unlock(mutex);
466
0
}
467
#endif
468
469
template <typename Mutex>
470
0
inline uint64_t hash_mutex_ptr(const Mutex* m) {
471
0
    return butil::fmix64((uint64_t)m);
472
0
}
Unexecuted instantiation: unsigned long bthread::hash_mutex_ptr<pthread_mutex_t>(pthread_mutex_t const*)
Unexecuted instantiation: unsigned long bthread::hash_mutex_ptr<bthread::internal::FastPthreadMutex>(bthread::internal::FastPthreadMutex const*)
473
474
// Mark being inside locking so that pthread_mutex calls inside collecting
475
// code are never sampled, otherwise deadlock may occur.
476
static __thread bool tls_inside_lock = false;
477
478
// Warn up some singleton objects used in contention profiler
479
// to avoid deadlock in malloc call stack.
480
static __thread bool tls_warn_up = false;
481
482
// ++tls_pthread_lock_count when pthread locking,
483
// --tls_pthread_lock_count when pthread unlocking.
484
// Only when it is equal to 0, it is safe for the bthread to be scheduled.
485
static __thread int tls_pthread_lock_count = 0;
486
487
0
void CheckBthreadScheSafety() {
488
0
    if (BAIDU_LIKELY(0 == tls_pthread_lock_count)) {
489
0
        return;
490
0
    }
491
492
0
    static butil::atomic<bool> b_sched_in_p_lock_logged{false};
493
0
    if (BAIDU_UNLIKELY(!b_sched_in_p_lock_logged.exchange(
494
0
        true, butil::memory_order_relaxed))) {
495
0
        butil::debug::StackTrace trace(true);
496
        // It can only be checked once because the counter is messed up.
497
0
        LOG(ERROR) << "bthread is suspended while holding"
498
0
                   << tls_pthread_lock_count << " pthread locks."
499
0
                   << std::endl << trace.ToString();
500
0
    }
501
0
}
502
503
// Speed up with TLS:
504
//   Most pthread_mutex are locked and unlocked in the same thread. Putting
505
//   contention information in TLS avoids collisions that may occur in
506
//   g_mutex_map. However when user unlocks in another thread, the info cached
507
//   in the locking thread is not removed, making the space bloated. We use a
508
//   simple strategy to solve the issue: If a thread has enough thread-local
509
//   space to store the info, save it, otherwise save it in g_mutex_map. For
510
//   a program that locks and unlocks in the same thread and does not lock a
511
//   lot of mutexes simulateneously, this strategy always uses the TLS.
512
#ifndef DONT_SPEEDUP_PTHREAD_CONTENTION_PROFILER_WITH_TLS
513
const int TLS_MAX_COUNT = 3;
514
struct MutexAndContentionSite {
515
    void* mutex;
516
    bthread_contention_site_t csite;
517
};
518
struct TLSPthreadContentionSites {
519
    int count;
520
    uint64_t cp_version;
521
    MutexAndContentionSite list[TLS_MAX_COUNT];
522
};
523
static __thread TLSPthreadContentionSites tls_csites = {0,0,{}};
524
#endif  // DONT_SPEEDUP_PTHREAD_CONTENTION_PROFILER_WITH_TLS
525
526
// Guaranteed in linux/win.
527
const int PTR_BITS = 48;
528
529
template <typename Mutex>
530
inline bthread_contention_site_t*
531
0
add_pthread_contention_site(const Mutex* mutex) {
532
0
    MutexMapEntry& entry = g_mutex_map[hash_mutex_ptr(mutex) & (MUTEX_MAP_SIZE - 1)];
533
0
    butil::static_atomic<uint64_t>& m = entry.versioned_mutex;
534
0
    uint64_t expected = m.load(butil::memory_order_relaxed);
535
    // If the entry is not used or used by previous profiler, try to CAS it.
536
0
    if (expected == 0 ||
537
0
        (expected >> PTR_BITS) != (g_cp_version & ((1 << (64 - PTR_BITS)) - 1))) {
538
0
        uint64_t desired = (g_cp_version << PTR_BITS) | (uint64_t)mutex;
539
0
        if (m.compare_exchange_strong(
540
0
                expected, desired, butil::memory_order_acquire)) {
541
0
            return &entry.csite;
542
0
        }
543
0
    }
544
0
    g_nconflicthash.fetch_add(1, butil::memory_order_relaxed);
545
0
    return NULL;
546
0
}
Unexecuted instantiation: bthread_contention_site_t* bthread::add_pthread_contention_site<pthread_mutex_t>(pthread_mutex_t const*)
Unexecuted instantiation: bthread_contention_site_t* bthread::add_pthread_contention_site<bthread::internal::FastPthreadMutex>(bthread::internal::FastPthreadMutex const*)
547
548
template <typename Mutex>
549
inline bool remove_pthread_contention_site(const Mutex* mutex,
550
0
                                           bthread_contention_site_t* saved_csite) {
551
0
    MutexMapEntry& entry = g_mutex_map[hash_mutex_ptr(mutex) & (MUTEX_MAP_SIZE - 1)];
552
0
    butil::static_atomic<uint64_t>& m = entry.versioned_mutex;
553
0
    if ((m.load(butil::memory_order_relaxed) & ((((uint64_t)1) << PTR_BITS) - 1))
554
0
        != (uint64_t)mutex) {
555
        // This branch should be the most common case since most locks are
556
        // neither contended nor sampled. We have one memory indirection and
557
        // several bitwise operations here, the cost should be ~ 5-50ns
558
0
        return false;
559
0
    }
560
    // Although this branch is inside a contended lock, we should also make it
561
    // as simple as possible because altering the critical section too much
562
    // may make unpredictable impact to thread interleaving status, which
563
    // makes profiling result less accurate.
564
0
    *saved_csite = entry.csite;
565
0
    make_contention_site_invalid(&entry.csite);
566
0
    m.store(0, butil::memory_order_release);
567
0
    return true;
568
0
}
Unexecuted instantiation: bool bthread::remove_pthread_contention_site<pthread_mutex_t>(pthread_mutex_t const*, bthread_contention_site_t*)
Unexecuted instantiation: bool bthread::remove_pthread_contention_site<bthread::internal::FastPthreadMutex>(bthread::internal::FastPthreadMutex const*, bthread_contention_site_t*)
569
570
// Submit the contention along with the callsite('s stacktrace)
571
0
void submit_contention(const bthread_contention_site_t& csite, int64_t now_ns) {
572
0
    tls_inside_lock = true;
573
0
    BRPC_SCOPE_EXIT {
574
0
        tls_inside_lock = false;
575
0
    };
576
577
0
    butil::debug::StackTrace stack(true); // May lock.
578
0
    if (0 == stack.FrameCount()) {
579
0
        return;
580
0
    }
581
    // There are two situations where we need to check whether in the
582
    // malloc call stack:
583
    // 1. Warn up some singleton objects used in `submit_contention'
584
    // to avoid deadlock in malloc call stack.
585
    // 2. LocalPool is empty, GlobalPool may allocate memory by malloc.
586
0
    if (!tls_warn_up || butil::local_pool_free_empty<SampledContention>()) {
587
        // In malloc call stack, can not submit contention.
588
0
        if (stack.FindSymbol((void*)malloc)) {
589
0
            return;
590
0
        }
591
0
    }
592
593
0
    auto sc = butil::get_object<SampledContention>();
594
    // Normalize duration_us and count so that they're addable in later
595
    // processings. Notice that sampling_range is adjusted periodically by
596
    // collecting thread.
597
0
    sc->duration_ns = csite.duration_ns * bvar::COLLECTOR_SAMPLING_BASE
598
0
        / csite.sampling_range;
599
0
    sc->count = bvar::COLLECTOR_SAMPLING_BASE / (double)csite.sampling_range;
600
0
    sc->nframes = stack.CopyAddressTo(sc->stack, arraysize(sc->stack));
601
0
    sc->submit(now_ns / 1000);  // may lock
602
    // Once submit a contention, complete warn up.
603
0
    tls_warn_up = true;
604
0
}
605
606
namespace internal {
607
#ifndef NO_PTHREAD_MUTEX_HOOK
608
32.5k
BUTIL_FORCE_INLINE int pthread_mutex_lock_internal(pthread_mutex_t* mutex) {
609
32.5k
    ++bthread::tls_pthread_lock_count;
610
32.5k
    return sys_pthread_mutex_lock(mutex);
611
32.5k
}
612
613
0
BUTIL_FORCE_INLINE int pthread_mutex_trylock_internal(pthread_mutex_t* mutex) {
614
0
    int rc = sys_pthread_mutex_trylock(mutex);
615
0
    if (0 == rc) {
616
0
        ++tls_pthread_lock_count;
617
0
    }
618
0
    return rc;
619
0
}
620
621
32.5k
BUTIL_FORCE_INLINE int pthread_mutex_unlock_internal(pthread_mutex_t* mutex) {
622
32.5k
    --tls_pthread_lock_count;
623
32.5k
    return sys_pthread_mutex_unlock(mutex);
624
32.5k
}
625
#endif
626
627
0
BUTIL_FORCE_INLINE int pthread_mutex_lock_internal(FastPthreadMutex* mutex) {
628
0
    mutex->lock();
629
0
    return 0;
630
0
}
631
632
0
BUTIL_FORCE_INLINE int pthread_mutex_trylock_internal(FastPthreadMutex* mutex) {
633
0
    return mutex->try_lock() ? 0 : EBUSY;
634
0
}
635
636
0
BUTIL_FORCE_INLINE int pthread_mutex_unlock_internal(FastPthreadMutex* mutex) {
637
0
    mutex->unlock();
638
0
    return 0;
639
0
}
640
641
template <typename Mutex>
642
32.5k
BUTIL_FORCE_INLINE int pthread_mutex_lock_impl(Mutex* mutex) {
643
    // Don't change behavior of lock when profiler is off.
644
32.5k
    if (!g_cp ||
645
        // collecting code including backtrace() and submit() may call
646
        // pthread_mutex_lock and cause deadlock. Don't sample.
647
32.5k
        tls_inside_lock) {
648
32.5k
        return pthread_mutex_lock_internal(mutex);
649
32.5k
    }
650
    // Don't slow down non-contended locks.
651
0
    int rc = pthread_mutex_trylock_internal(mutex);
652
0
    if (rc != EBUSY) {
653
0
        return rc;
654
0
    }
655
    // Ask bvar::Collector if this (contended) locking should be sampled
656
0
    const size_t sampling_range = bvar::is_collectable(&g_cp_sl);
657
658
0
    bthread_contention_site_t* csite = NULL;
659
0
#ifndef DONT_SPEEDUP_PTHREAD_CONTENTION_PROFILER_WITH_TLS
660
0
    TLSPthreadContentionSites& fast_alt = tls_csites;
661
0
    if (fast_alt.cp_version != g_cp_version) {
662
0
        fast_alt.cp_version = g_cp_version;
663
0
        fast_alt.count = 0;
664
0
    }
665
0
    if (fast_alt.count < TLS_MAX_COUNT) {
666
0
        MutexAndContentionSite& entry = fast_alt.list[fast_alt.count++];
667
0
        entry.mutex = mutex;
668
0
        csite = &entry.csite;
669
0
        if (!sampling_range) {
670
0
            make_contention_site_invalid(&entry.csite);
671
0
            return pthread_mutex_lock_internal(mutex);
672
0
        }
673
0
    }
674
0
#endif
675
0
    if (!sampling_range) {  // don't sample
676
0
        return pthread_mutex_lock_internal(mutex);
677
0
    }
678
    // Lock and monitor the waiting time.
679
0
    const int64_t start_ns = butil::cpuwide_time_ns();
680
0
    rc = pthread_mutex_lock_internal(mutex);
681
0
    if (!rc) { // Inside lock
682
0
        if (!csite) {
683
0
            csite = add_pthread_contention_site(mutex);
684
0
            if (csite == NULL) {
685
0
                return rc;
686
0
            }
687
0
        }
688
0
        csite->duration_ns = butil::cpuwide_time_ns() - start_ns;
689
0
        csite->sampling_range = sampling_range;
690
0
    } // else rare
691
0
    return rc;
692
0
}
int bthread::internal::pthread_mutex_lock_impl<pthread_mutex_t>(pthread_mutex_t*)
Line
Count
Source
642
32.5k
BUTIL_FORCE_INLINE int pthread_mutex_lock_impl(Mutex* mutex) {
643
    // Don't change behavior of lock when profiler is off.
644
32.5k
    if (!g_cp ||
645
        // collecting code including backtrace() and submit() may call
646
        // pthread_mutex_lock and cause deadlock. Don't sample.
647
32.5k
        tls_inside_lock) {
648
32.5k
        return pthread_mutex_lock_internal(mutex);
649
32.5k
    }
650
    // Don't slow down non-contended locks.
651
0
    int rc = pthread_mutex_trylock_internal(mutex);
652
0
    if (rc != EBUSY) {
653
0
        return rc;
654
0
    }
655
    // Ask bvar::Collector if this (contended) locking should be sampled
656
0
    const size_t sampling_range = bvar::is_collectable(&g_cp_sl);
657
658
0
    bthread_contention_site_t* csite = NULL;
659
0
#ifndef DONT_SPEEDUP_PTHREAD_CONTENTION_PROFILER_WITH_TLS
660
0
    TLSPthreadContentionSites& fast_alt = tls_csites;
661
0
    if (fast_alt.cp_version != g_cp_version) {
662
0
        fast_alt.cp_version = g_cp_version;
663
0
        fast_alt.count = 0;
664
0
    }
665
0
    if (fast_alt.count < TLS_MAX_COUNT) {
666
0
        MutexAndContentionSite& entry = fast_alt.list[fast_alt.count++];
667
0
        entry.mutex = mutex;
668
0
        csite = &entry.csite;
669
0
        if (!sampling_range) {
670
0
            make_contention_site_invalid(&entry.csite);
671
0
            return pthread_mutex_lock_internal(mutex);
672
0
        }
673
0
    }
674
0
#endif
675
0
    if (!sampling_range) {  // don't sample
676
0
        return pthread_mutex_lock_internal(mutex);
677
0
    }
678
    // Lock and monitor the waiting time.
679
0
    const int64_t start_ns = butil::cpuwide_time_ns();
680
0
    rc = pthread_mutex_lock_internal(mutex);
681
0
    if (!rc) { // Inside lock
682
0
        if (!csite) {
683
0
            csite = add_pthread_contention_site(mutex);
684
0
            if (csite == NULL) {
685
0
                return rc;
686
0
            }
687
0
        }
688
0
        csite->duration_ns = butil::cpuwide_time_ns() - start_ns;
689
0
        csite->sampling_range = sampling_range;
690
0
    } // else rare
691
0
    return rc;
692
0
}
Unexecuted instantiation: int bthread::internal::pthread_mutex_lock_impl<bthread::internal::FastPthreadMutex>(bthread::internal::FastPthreadMutex*)
693
694
template <typename Mutex>
695
0
BUTIL_FORCE_INLINE int pthread_mutex_trylock_impl(Mutex* mutex) {
696
0
    return pthread_mutex_trylock_internal(mutex);
697
0
}
698
699
template <typename Mutex>
700
32.5k
BUTIL_FORCE_INLINE int pthread_mutex_unlock_impl(Mutex* mutex) {
701
    // Don't change behavior of unlock when profiler is off.
702
32.5k
    if (!g_cp || tls_inside_lock) {
703
        // This branch brings an issue that an entry created by
704
        // add_pthread_contention_site may not be cleared. Thus we add a
705
        // 16-bit rolling version in the entry to find out such entry.
706
32.5k
        return pthread_mutex_unlock_internal(mutex);
707
32.5k
    }
708
0
    int64_t unlock_start_ns = 0;
709
0
    bool miss_in_tls = true;
710
0
    bthread_contention_site_t saved_csite = {0,0};
711
0
#ifndef DONT_SPEEDUP_PTHREAD_CONTENTION_PROFILER_WITH_TLS
712
0
    TLSPthreadContentionSites& fast_alt = tls_csites;
713
0
    for (int i = fast_alt.count - 1; i >= 0; --i) {
714
0
        if (fast_alt.list[i].mutex == mutex) {
715
0
            if (is_contention_site_valid(fast_alt.list[i].csite)) {
716
0
                saved_csite = fast_alt.list[i].csite;
717
0
                unlock_start_ns = butil::cpuwide_time_ns();
718
0
            }
719
0
            fast_alt.list[i] = fast_alt.list[--fast_alt.count];
720
0
            miss_in_tls = false;
721
0
            break;
722
0
        }
723
0
    }
724
0
#endif
725
    // Check the map to see if the lock is sampled. Notice that we're still
726
    // inside critical section.
727
0
    if (miss_in_tls) {
728
0
        if (remove_pthread_contention_site(mutex, &saved_csite)) {
729
0
            unlock_start_ns = butil::cpuwide_time_ns();
730
0
        }
731
0
    }
732
0
    const int rc = pthread_mutex_unlock_internal(mutex);
733
    // [Outside lock]
734
0
    if (unlock_start_ns) {
735
0
        const int64_t unlock_end_ns = butil::cpuwide_time_ns();
736
0
        saved_csite.duration_ns += unlock_end_ns - unlock_start_ns;
737
0
        submit_contention(saved_csite, unlock_end_ns);
738
0
    }
739
0
    return rc;
740
32.5k
}
int bthread::internal::pthread_mutex_unlock_impl<pthread_mutex_t>(pthread_mutex_t*)
Line
Count
Source
700
32.5k
BUTIL_FORCE_INLINE int pthread_mutex_unlock_impl(Mutex* mutex) {
701
    // Don't change behavior of unlock when profiler is off.
702
32.5k
    if (!g_cp || tls_inside_lock) {
703
        // This branch brings an issue that an entry created by
704
        // add_pthread_contention_site may not be cleared. Thus we add a
705
        // 16-bit rolling version in the entry to find out such entry.
706
32.5k
        return pthread_mutex_unlock_internal(mutex);
707
32.5k
    }
708
0
    int64_t unlock_start_ns = 0;
709
0
    bool miss_in_tls = true;
710
0
    bthread_contention_site_t saved_csite = {0,0};
711
0
#ifndef DONT_SPEEDUP_PTHREAD_CONTENTION_PROFILER_WITH_TLS
712
0
    TLSPthreadContentionSites& fast_alt = tls_csites;
713
0
    for (int i = fast_alt.count - 1; i >= 0; --i) {
714
0
        if (fast_alt.list[i].mutex == mutex) {
715
0
            if (is_contention_site_valid(fast_alt.list[i].csite)) {
716
0
                saved_csite = fast_alt.list[i].csite;
717
0
                unlock_start_ns = butil::cpuwide_time_ns();
718
0
            }
719
0
            fast_alt.list[i] = fast_alt.list[--fast_alt.count];
720
0
            miss_in_tls = false;
721
0
            break;
722
0
        }
723
0
    }
724
0
#endif
725
    // Check the map to see if the lock is sampled. Notice that we're still
726
    // inside critical section.
727
0
    if (miss_in_tls) {
728
0
        if (remove_pthread_contention_site(mutex, &saved_csite)) {
729
0
            unlock_start_ns = butil::cpuwide_time_ns();
730
0
        }
731
0
    }
732
0
    const int rc = pthread_mutex_unlock_internal(mutex);
733
    // [Outside lock]
734
0
    if (unlock_start_ns) {
735
0
        const int64_t unlock_end_ns = butil::cpuwide_time_ns();
736
0
        saved_csite.duration_ns += unlock_end_ns - unlock_start_ns;
737
0
        submit_contention(saved_csite, unlock_end_ns);
738
0
    }
739
0
    return rc;
740
32.5k
}
Unexecuted instantiation: int bthread::internal::pthread_mutex_unlock_impl<bthread::internal::FastPthreadMutex>(bthread::internal::FastPthreadMutex*)
741
742
}
743
744
#ifndef NO_PTHREAD_MUTEX_HOOK
745
32.5k
BUTIL_FORCE_INLINE int pthread_mutex_lock_impl(pthread_mutex_t* mutex) {
746
32.5k
    return internal::pthread_mutex_lock_impl(mutex);
747
32.5k
}
748
749
0
BUTIL_FORCE_INLINE int pthread_mutex_trylock_impl(pthread_mutex_t* mutex) {
750
0
    return internal::pthread_mutex_trylock_impl(mutex);
751
0
}
752
753
32.5k
BUTIL_FORCE_INLINE int pthread_mutex_unlock_impl(pthread_mutex_t* mutex) {
754
32.5k
    return internal::pthread_mutex_unlock_impl(mutex);
755
32.5k
}
756
#endif
757
758
// Implement bthread_mutex_t related functions
759
struct MutexInternal {
760
    butil::static_atomic<unsigned char> locked;
761
    butil::static_atomic<unsigned char> contended;
762
    unsigned short padding;
763
};
764
765
const MutexInternal MUTEX_CONTENDED_RAW = {{1},{1},0};
766
const MutexInternal MUTEX_LOCKED_RAW = {{1},{0},0};
767
// Define as macros rather than constants which can't be put in read-only
768
// section and affected by initialization-order fiasco.
769
0
#define BTHREAD_MUTEX_CONTENDED (*(const unsigned*)&bthread::MUTEX_CONTENDED_RAW)
770
0
#define BTHREAD_MUTEX_LOCKED (*(const unsigned*)&bthread::MUTEX_LOCKED_RAW)
771
772
BAIDU_CASSERT(sizeof(unsigned) == sizeof(MutexInternal),
773
              sizeof_mutex_internal_must_equal_unsigned);
774
775
0
inline int mutex_lock_contended(bthread_mutex_t* m) {
776
0
    butil::atomic<unsigned>* whole = (butil::atomic<unsigned>*)m->butex;
777
0
    while (whole->exchange(BTHREAD_MUTEX_CONTENDED) & BTHREAD_MUTEX_LOCKED) {
778
0
        if (bthread::butex_wait(whole, BTHREAD_MUTEX_CONTENDED, NULL) < 0 &&
779
0
            errno != EWOULDBLOCK && errno != EINTR/*note*/) {
780
            // a mutex lock should ignore interruptions in general since
781
            // user code is unlikely to check the return value.
782
0
            return errno;
783
0
        }
784
0
    }
785
0
    return 0;
786
0
}
787
788
inline int mutex_timedlock_contended(
789
0
    bthread_mutex_t* m, const struct timespec* __restrict abstime) {
790
0
    butil::atomic<unsigned>* whole = (butil::atomic<unsigned>*)m->butex;
791
0
    while (whole->exchange(BTHREAD_MUTEX_CONTENDED) & BTHREAD_MUTEX_LOCKED) {
792
0
        if (bthread::butex_wait(whole, BTHREAD_MUTEX_CONTENDED, abstime) < 0 &&
793
0
            errno != EWOULDBLOCK && errno != EINTR/*note*/) {
794
            // a mutex lock should ignore interrruptions in general since
795
            // user code is unlikely to check the return value.
796
0
            return errno;
797
0
        }
798
0
    }
799
0
    return 0;
800
0
}
801
802
#ifdef BTHREAD_USE_FAST_PTHREAD_MUTEX
803
namespace internal {
804
805
0
int FastPthreadMutex::lock_contended() {
806
0
    butil::atomic<unsigned>* whole = (butil::atomic<unsigned>*)&_futex;
807
0
    while (whole->exchange(BTHREAD_MUTEX_CONTENDED) & BTHREAD_MUTEX_LOCKED) {
808
0
        if (futex_wait_private(whole, BTHREAD_MUTEX_CONTENDED, NULL) < 0
809
0
            && errno != EWOULDBLOCK) {
810
0
            return errno;
811
0
        }
812
0
    }
813
0
    return 0;
814
0
}
815
816
0
void FastPthreadMutex::lock() {
817
0
    auto split = (bthread::MutexInternal*)&_futex;
818
0
    if (split->locked.exchange(1, butil::memory_order_acquire)) {
819
0
        (void)lock_contended();
820
0
    }
821
0
    ++tls_pthread_lock_count;
822
0
}
823
824
0
bool FastPthreadMutex::try_lock() {
825
0
    auto split = (bthread::MutexInternal*)&_futex;
826
0
    bool lock = !split->locked.exchange(1, butil::memory_order_acquire);
827
0
    if (lock) {
828
0
        ++tls_pthread_lock_count;
829
0
    }
830
0
    return lock;
831
0
}
832
833
0
void FastPthreadMutex::unlock() {
834
0
    auto whole = (butil::atomic<unsigned>*)&_futex;
835
0
    const unsigned prev = whole->exchange(0, butil::memory_order_release);
836
    // CAUTION: the mutex may be destroyed, check comments before butex_create
837
0
    if (prev != BTHREAD_MUTEX_LOCKED) {
838
0
        futex_wake_private(whole, 1);
839
0
    }
840
0
    --tls_pthread_lock_count;
841
0
}
842
843
} // namespace internal
844
#endif // BTHREAD_USE_FAST_PTHREAD_MUTEX
845
846
0
void FastPthreadMutex::lock() {
847
0
    internal::pthread_mutex_lock_impl(&_mutex);
848
0
}
849
850
0
void FastPthreadMutex::unlock() {
851
0
    internal::pthread_mutex_unlock_impl(&_mutex);
852
0
}
853
854
} // namespace bthread
855
856
extern "C" {
857
858
int bthread_mutex_init(bthread_mutex_t* __restrict m,
859
0
                       const bthread_mutexattr_t* __restrict) {
860
0
    bthread::make_contention_site_invalid(&m->csite);
861
0
    m->butex = bthread::butex_create_checked<unsigned>();
862
0
    if (!m->butex) {
863
0
        return ENOMEM;
864
0
    }
865
0
    *m->butex = 0;
866
0
    return 0;
867
0
}
868
869
0
int bthread_mutex_destroy(bthread_mutex_t* m) {
870
0
    bthread::butex_destroy(m->butex);
871
0
    return 0;
872
0
}
873
874
0
int bthread_mutex_trylock(bthread_mutex_t* m) {
875
0
    bthread::MutexInternal* split = (bthread::MutexInternal*)m->butex;
876
0
    if (!split->locked.exchange(1, butil::memory_order_acquire)) {
877
0
        return 0;
878
0
    }
879
0
    return EBUSY;
880
0
}
881
882
0
int bthread_mutex_lock_contended(bthread_mutex_t* m) {
883
0
    return bthread::mutex_lock_contended(m);
884
0
}
885
886
0
int bthread_mutex_lock(bthread_mutex_t* m) {
887
0
    bthread::MutexInternal* split = (bthread::MutexInternal*)m->butex;
888
0
    if (!split->locked.exchange(1, butil::memory_order_acquire)) {
889
0
        return 0;
890
0
    }
891
    // Don't sample when contention profiler is off.
892
0
    if (!bthread::g_cp) {
893
0
        return bthread::mutex_lock_contended(m);
894
0
    }
895
    // Ask Collector if this (contended) locking should be sampled.
896
0
    const size_t sampling_range = bvar::is_collectable(&bthread::g_cp_sl);
897
0
    if (!sampling_range) { // Don't sample
898
0
        return bthread::mutex_lock_contended(m);
899
0
    }
900
    // Start sampling.
901
0
    const int64_t start_ns = butil::cpuwide_time_ns();
902
    // NOTE: Don't modify m->csite outside lock since multiple threads are
903
    // still contending with each other.
904
0
    const int rc = bthread::mutex_lock_contended(m);
905
0
    if (!rc) { // Inside lock
906
0
        m->csite.duration_ns = butil::cpuwide_time_ns() - start_ns;
907
0
        m->csite.sampling_range = sampling_range;
908
0
    } // else rare
909
0
    return rc;
910
0
}
911
912
int bthread_mutex_timedlock(bthread_mutex_t* __restrict m,
913
0
                            const struct timespec* __restrict abstime) {
914
0
    bthread::MutexInternal* split = (bthread::MutexInternal*)m->butex;
915
0
    if (!split->locked.exchange(1, butil::memory_order_acquire)) {
916
0
        return 0;
917
0
    }
918
    // Don't sample when contention profiler is off.
919
0
    if (!bthread::g_cp) {
920
0
        return bthread::mutex_timedlock_contended(m, abstime);
921
0
    }
922
    // Ask Collector if this (contended) locking should be sampled.
923
0
    const size_t sampling_range = bvar::is_collectable(&bthread::g_cp_sl);
924
0
    if (!sampling_range) { // Don't sample
925
0
        return bthread::mutex_timedlock_contended(m, abstime);
926
0
    }
927
    // Start sampling.
928
0
    const int64_t start_ns = butil::cpuwide_time_ns();
929
    // NOTE: Don't modify m->csite outside lock since multiple threads are
930
    // still contending with each other.
931
0
    const int rc = bthread::mutex_timedlock_contended(m, abstime);
932
0
    if (!rc) { // Inside lock
933
0
        m->csite.duration_ns = butil::cpuwide_time_ns() - start_ns;
934
0
        m->csite.sampling_range = sampling_range;
935
0
    } else if (rc == ETIMEDOUT) {
936
        // Failed to lock due to ETIMEDOUT, submit the elapse directly.
937
0
        const int64_t end_ns = butil::cpuwide_time_ns();
938
0
        const bthread_contention_site_t csite = {end_ns - start_ns, sampling_range};
939
0
        bthread::submit_contention(csite, end_ns);
940
0
    }
941
0
    return rc;
942
0
}
943
944
0
int bthread_mutex_unlock(bthread_mutex_t* m) {
945
0
    butil::atomic<unsigned>* whole = (butil::atomic<unsigned>*)m->butex;
946
0
    bthread_contention_site_t saved_csite = {0, 0};
947
0
    if (bthread::is_contention_site_valid(m->csite)) {
948
0
        saved_csite = m->csite;
949
0
        bthread::make_contention_site_invalid(&m->csite);
950
0
    }
951
0
    const unsigned prev = whole->exchange(0, butil::memory_order_release);
952
    // CAUTION: the mutex may be destroyed, check comments before butex_create
953
0
    if (prev == BTHREAD_MUTEX_LOCKED) {
954
0
        return 0;
955
0
    }
956
    // Wakeup one waiter
957
0
    if (!bthread::is_contention_site_valid(saved_csite)) {
958
0
        bthread::butex_wake(whole);
959
0
        return 0;
960
0
    }
961
0
    const int64_t unlock_start_ns = butil::cpuwide_time_ns();
962
0
    bthread::butex_wake(whole);
963
0
    const int64_t unlock_end_ns = butil::cpuwide_time_ns();
964
0
    saved_csite.duration_ns += unlock_end_ns - unlock_start_ns;
965
0
    bthread::submit_contention(saved_csite, unlock_end_ns);
966
0
    return 0;
967
0
}
968
969
#ifndef NO_PTHREAD_MUTEX_HOOK
970
32.5k
int pthread_mutex_lock(pthread_mutex_t* __mutex) {
971
32.5k
    return bthread::pthread_mutex_lock_impl(__mutex);
972
32.5k
}
973
0
int pthread_mutex_trylock(pthread_mutex_t* __mutex) {
974
0
    return bthread::pthread_mutex_trylock_impl(__mutex);
975
0
}
976
32.5k
int pthread_mutex_unlock(pthread_mutex_t* __mutex) {
977
32.5k
    return bthread::pthread_mutex_unlock_impl(__mutex);
978
32.5k
}
979
#endif
980
981
}  // extern "C"