Coverage Report

Created: 2018-09-25 14:53

/src/mozilla-central/dom/media/encoder/OpusTrackEncoder.cpp
Line
Count
Source (jump to first uncovered line)
1
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
2
/* This Source Code Form is subject to the terms of the Mozilla Public
3
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
4
 * You can obtain one at http://mozilla.org/MPL/2.0/. */
5
#include "OpusTrackEncoder.h"
6
#include "nsString.h"
7
#include "GeckoProfiler.h"
8
#include "mozilla/CheckedInt.h"
9
#include "VideoUtils.h"
10
11
#include <opus/opus.h>
12
13
#undef LOG
14
#define LOG(args, ...)
15
16
namespace mozilla {
17
18
// The Opus format supports up to 8 channels, and supports multitrack audio up
19
// to 255 channels, but the current implementation supports only mono and
20
// stereo, and downmixes any more than that.
21
static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8;
22
23
// http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
24
// In section "opus_encoder_init", channels must be 1 or 2 of input signal.
25
static const int MAX_CHANNELS = 2;
26
27
// A maximum data bytes for Opus to encode.
28
static const int MAX_DATA_BYTES = 4096;
29
30
// http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
31
// Second paragraph, " The granule position of an audio data page is in units
32
// of PCM audio samples at a fixed rate of 48 kHz."
33
static const int kOpusSamplingRate = 48000;
34
35
// The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
36
static const int kFrameDurationMs  = 20;
37
38
// The supported sampling rate of input signal (Hz),
39
// must be one of the following. Will resampled to 48kHz otherwise.
40
static const int kOpusSupportedInputSamplingRates[] =
41
                   {8000, 12000, 16000, 24000, 48000};
42
43
namespace {
44
45
// An endian-neutral serialization of integers. Serializing T in little endian
46
// format to aOutput, where T is a 16 bits or 32 bits integer.
47
template<typename T>
48
static void
49
SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput)
50
0
{
51
0
  for (uint32_t i = 0; i < sizeof(T); i++) {
52
0
    aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
53
0
  }
54
0
}
Unexecuted instantiation: Unified_cpp_dom_media_encoder0.cpp:void mozilla::(anonymous namespace)::SerializeToBuffer<unsigned short>(unsigned short, nsTArray<unsigned char>*)
Unexecuted instantiation: Unified_cpp_dom_media_encoder0.cpp:void mozilla::(anonymous namespace)::SerializeToBuffer<unsigned int>(unsigned int, nsTArray<unsigned char>*)
Unexecuted instantiation: Unified_cpp_dom_media_encoder0.cpp:void mozilla::(anonymous namespace)::SerializeToBuffer<short>(short, nsTArray<unsigned char>*)
55
56
static inline void
57
SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput)
58
0
{
59
0
  // Format of serializing a string to buffer is, the length of string (32 bits,
60
0
  // little endian), and the string.
61
0
  SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
62
0
  aOutput->AppendElements(aComment.get(), aComment.Length());
63
0
}
64
65
66
static void
67
SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
68
                      uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput)
69
0
{
70
0
  // The magic signature, null terminator has to be stripped off from strings.
71
0
  static const uint8_t magic[] = "OpusHead";
72
0
  aOutput->AppendElements(magic, sizeof(magic) - 1);
73
0
74
0
  // The version must always be 1 (8 bits, unsigned).
75
0
  aOutput->AppendElement(1);
76
0
77
0
  // Number of output channels (8 bits, unsigned).
78
0
  aOutput->AppendElement(aChannelCount);
79
0
80
0
  // Number of samples (at 48 kHz) to discard from the decoder output when
81
0
  // starting playback (16 bits, unsigned, little endian).
82
0
  SerializeToBuffer(aPreskip, aOutput);
83
0
84
0
  // The sampling rate of input source (32 bits, unsigned, little endian).
85
0
  SerializeToBuffer(aInputSampleRate, aOutput);
86
0
87
0
  // Output gain, an encoder should set this field to zero (16 bits, signed,
88
0
  // little endian).
89
0
  SerializeToBuffer((int16_t)0, aOutput);
90
0
91
0
  // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
92
0
  // unsigned).
93
0
  aOutput->AppendElement(0);
94
0
}
95
96
static void
97
SerializeOpusCommentHeader(const nsCString& aVendor,
98
                           const nsTArray<nsCString>& aComments,
99
                           nsTArray<uint8_t>* aOutput)
100
0
{
101
0
  // The magic signature, null terminator has to be stripped off.
102
0
  static const uint8_t magic[] = "OpusTags";
103
0
  aOutput->AppendElements(magic, sizeof(magic) - 1);
104
0
105
0
  // The vendor; Should append in the following order:
106
0
  // vendor string length (32 bits, unsigned, little endian)
107
0
  // vendor string.
108
0
  SerializeToBuffer(aVendor, aOutput);
109
0
110
0
  // Add comments; Should append in the following order:
111
0
  // comment list length (32 bits, unsigned, little endian)
112
0
  // comment #0 string length (32 bits, unsigned, little endian)
113
0
  // comment #0 string
114
0
  // comment #1 string length (32 bits, unsigned, little endian)
115
0
  // comment #1 string ...
116
0
  SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
117
0
  for (uint32_t i = 0; i < aComments.Length(); ++i) {
118
0
    SerializeToBuffer(aComments[i], aOutput);
119
0
  }
120
0
}
121
122
}  // Anonymous namespace.
123
124
OpusTrackEncoder::OpusTrackEncoder(TrackRate aTrackRate)
125
  : AudioTrackEncoder(aTrackRate)
126
  , mEncoder(nullptr)
127
  , mLookahead(0)
128
  , mResampler(nullptr)
129
  , mOutputTimeStamp(0)
130
0
{
131
0
}
132
133
OpusTrackEncoder::~OpusTrackEncoder()
134
0
{
135
0
  if (mEncoder) {
136
0
    opus_encoder_destroy(mEncoder);
137
0
  }
138
0
  if (mResampler) {
139
0
    speex_resampler_destroy(mResampler);
140
0
    mResampler = nullptr;
141
0
  }
142
0
}
143
144
nsresult
145
OpusTrackEncoder::Init(int aChannels, int aSamplingRate)
146
0
{
147
0
  NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
148
0
                 NS_ERROR_FAILURE);
149
0
150
0
  // This version of encoder API only support 1 or 2 channels,
151
0
  // So set the mChannels less or equal 2 and
152
0
  // let InterleaveTrackData downmix pcm data.
153
0
  mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;
154
0
155
0
  // Reject non-audio sample rates.
156
0
  NS_ENSURE_TRUE(aSamplingRate >= 8000, NS_ERROR_INVALID_ARG);
157
0
  NS_ENSURE_TRUE(aSamplingRate <= 192000, NS_ERROR_INVALID_ARG);
158
0
159
0
  // According to www.opus-codec.org, creating an opus encoder requires the
160
0
  // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
161
0
  // 48000. If this constraint is not satisfied, we resample the input to 48kHz.
162
0
  nsTArray<int> supportedSamplingRates;
163
0
  supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates,
164
0
                         ArrayLength(kOpusSupportedInputSamplingRates));
165
0
  if (!supportedSamplingRates.Contains(aSamplingRate)) {
166
0
    int error;
167
0
    mResampler = speex_resampler_init(mChannels,
168
0
                                      aSamplingRate,
169
0
                                      kOpusSamplingRate,
170
0
                                      SPEEX_RESAMPLER_QUALITY_DEFAULT,
171
0
                                      &error);
172
0
173
0
    if (error != RESAMPLER_ERR_SUCCESS) {
174
0
      return NS_ERROR_FAILURE;
175
0
    }
176
0
  }
177
0
  mSamplingRate = aSamplingRate;
178
0
  NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE);
179
0
180
0
  int error = 0;
181
0
  mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels,
182
0
                                 OPUS_APPLICATION_AUDIO, &error);
183
0
184
0
185
0
  if (error == OPUS_OK) {
186
0
    SetInitialized();
187
0
  }
188
0
189
0
  if (mAudioBitrate) {
190
0
    opus_encoder_ctl(mEncoder, OPUS_SET_BITRATE(static_cast<int>(mAudioBitrate)));
191
0
  }
192
0
193
0
  return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE;
194
0
}
195
196
int
197
OpusTrackEncoder::GetOutputSampleRate()
198
0
{
199
0
  return mResampler ? kOpusSamplingRate : mSamplingRate;
200
0
}
201
202
int
203
OpusTrackEncoder::GetPacketDuration()
204
0
{
205
0
  return GetOutputSampleRate() * kFrameDurationMs / 1000;
206
0
}
207
208
already_AddRefed<TrackMetadataBase>
209
OpusTrackEncoder::GetMetadata()
210
0
{
211
0
  AUTO_PROFILER_LABEL("OpusTrackEncoder::GetMetadata", OTHER);
212
0
213
0
  MOZ_ASSERT(mInitialized || mCanceled);
214
0
215
0
  if (mCanceled || mEncodingComplete) {
216
0
    return nullptr;
217
0
  }
218
0
219
0
  if (!mInitialized) {
220
0
    return nullptr;
221
0
  }
222
0
223
0
  RefPtr<OpusMetadata> meta = new OpusMetadata();
224
0
  meta->mChannels = mChannels;
225
0
  meta->mSamplingFrequency = mSamplingRate;
226
0
227
0
  mLookahead = 0;
228
0
  int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
229
0
  if (error != OPUS_OK) {
230
0
    mLookahead = 0;
231
0
  }
232
0
233
0
  // The ogg time stamping and pre-skip is always timed at 48000.
234
0
  SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate /
235
0
                        GetOutputSampleRate()), mSamplingRate,
236
0
                        &meta->mIdHeader);
237
0
238
0
  nsCString vendor;
239
0
  vendor.AppendASCII(opus_get_version_string());
240
0
241
0
  nsTArray<nsCString> comments;
242
0
  comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION));
243
0
244
0
  SerializeOpusCommentHeader(vendor, comments,
245
0
                             &meta->mCommentHeader);
246
0
247
0
  return meta.forget();
248
0
}
249
250
nsresult
251
OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
252
0
{
253
0
  AUTO_PROFILER_LABEL("OpusTrackEncoder::GetEncodedTrack", OTHER);
254
0
255
0
  MOZ_ASSERT(mInitialized || mCanceled);
256
0
257
0
  if (mCanceled || mEncodingComplete) {
258
0
    return NS_ERROR_FAILURE;
259
0
  }
260
0
261
0
  if (!mInitialized) {
262
0
    // calculation below depends on the truth that mInitialized is true.
263
0
    return NS_ERROR_FAILURE;
264
0
  }
265
0
266
0
  TakeTrackData(mSourceSegment);
267
0
268
0
  int result = 0;
269
0
  // Loop until we run out of packets of input data
270
0
  while (result >= 0 && !mEncodingComplete) {
271
0
    // re-sampled frames left last time which didn't fit into an Opus packet duration.
272
0
    const int framesLeft = mResampledLeftover.Length() / mChannels;
273
0
    // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
274
0
    // of kOpusSamplingRate. There is not precision loss in the integer division
275
0
    // in computing framesToFetch. If frameLeft > 0, we need to add 1 to
276
0
    // framesToFetch to ensure there will be at least n frames after re-sampling.
277
0
    const int frameRoundUp = framesLeft ? 1 : 0;
278
0
279
0
    MOZ_ASSERT(GetPacketDuration() >= framesLeft);
280
0
    // Try to fetch m frames such that there will be n frames
281
0
    // where (n + frameLeft) >= GetPacketDuration() after re-sampling.
282
0
    const int framesToFetch = !mResampler ? GetPacketDuration()
283
0
                              : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
284
0
                              + frameRoundUp;
285
0
286
0
    if (!mEndOfStream && mSourceSegment.GetDuration() < framesToFetch) {
287
0
      // Not enough raw data
288
0
      return NS_OK;
289
0
    }
290
0
291
0
    // Pad |mLookahead| samples to the end of source stream to prevent lost of
292
0
    // original data, the pcm duration will be calculated at rate 48K later.
293
0
    if (mEndOfStream && !mEosSetInEncoder) {
294
0
      mEosSetInEncoder = true;
295
0
      mSourceSegment.AppendNullData(mLookahead);
296
0
    }
297
0
298
0
    // Start encoding data.
299
0
    AutoTArray<AudioDataValue, 9600> pcm;
300
0
    pcm.SetLength(GetPacketDuration() * mChannels);
301
0
302
0
    int frameCopied = 0;
303
0
304
0
    for (AudioSegment::ChunkIterator iter(mSourceSegment);
305
0
         !iter.IsEnded() && frameCopied < framesToFetch;
306
0
         iter.Next()) {
307
0
      AudioChunk chunk = *iter;
308
0
309
0
      // Chunk to the required frame size.
310
0
      StreamTime frameToCopy = chunk.GetDuration();
311
0
      if (frameToCopy > framesToFetch - frameCopied) {
312
0
        frameToCopy = framesToFetch - frameCopied;
313
0
      }
314
0
      // Possible greatest value of framesToFetch = 3844: see
315
0
      // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameToCopy
316
0
      // should not be able to exceed this value.
317
0
      MOZ_ASSERT(frameToCopy <= 3844, "frameToCopy exceeded expected range");
318
0
319
0
      if (!chunk.IsNull()) {
320
0
        // Append the interleaved data to the end of pcm buffer.
321
0
        AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
322
0
                                               pcm.Elements() + frameCopied * mChannels);
323
0
      } else {
324
0
        CheckedInt<int> memsetLength = CheckedInt<int>(frameToCopy) *
325
0
                                       mChannels *
326
0
                                       sizeof(AudioDataValue);
327
0
        if (!memsetLength.isValid()) {
328
0
          // This should never happen, but we use a defensive check because
329
0
          // we really don't want a bad memset
330
0
          MOZ_ASSERT_UNREACHABLE("memsetLength invalid!");
331
0
          return NS_ERROR_FAILURE;
332
0
        }
333
0
        memset(pcm.Elements() + frameCopied * mChannels, 0,
334
0
               memsetLength.value());
335
0
      }
336
0
337
0
      frameCopied += frameToCopy;
338
0
    }
339
0
340
0
    // Possible greatest value of framesToFetch = 3844: see
341
0
    // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameCopied
342
0
    // should not be able to exceed this value.
343
0
    MOZ_ASSERT(frameCopied <= 3844, "frameCopied exceeded expected range");
344
0
345
0
    RefPtr<EncodedFrame> audiodata = new EncodedFrame();
346
0
    audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
347
0
    int framesInPCM = frameCopied;
348
0
    if (mResampler) {
349
0
      AutoTArray<AudioDataValue, 9600> resamplingDest;
350
0
      // We want to consume all the input data, so we slightly oversize the
351
0
      // resampled data buffer so we can fit the output data in. We cannot really
352
0
      // predict the output frame count at each call.
353
0
      uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
354
0
      uint32_t inframes = frameCopied;
355
0
356
0
      resamplingDest.SetLength(outframes * mChannels);
357
0
358
#if MOZ_SAMPLE_TYPE_S16
359
      short* in = reinterpret_cast<short*>(pcm.Elements());
360
      short* out = reinterpret_cast<short*>(resamplingDest.Elements());
361
      speex_resampler_process_interleaved_int(mResampler, in, &inframes,
362
                                              out, &outframes);
363
#else
364
      float* in = reinterpret_cast<float*>(pcm.Elements());
365
0
      float* out = reinterpret_cast<float*>(resamplingDest.Elements());
366
0
      speex_resampler_process_interleaved_float(mResampler, in, &inframes,
367
0
                                                out, &outframes);
368
0
#endif
369
0
370
0
      MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
371
0
      PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
372
0
              mResampledLeftover.Length());
373
0
374
0
      uint32_t outframesToCopy = std::min(outframes,
375
0
                                          static_cast<uint32_t>(GetPacketDuration() - framesLeft));
376
0
377
0
      MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
378
0
                 outframesToCopy * mChannels);
379
0
      PodCopy(pcm.Elements() + mResampledLeftover.Length(),
380
0
              resamplingDest.Elements(), outframesToCopy * mChannels);
381
0
      int frameLeftover = outframes - outframesToCopy;
382
0
      mResampledLeftover.SetLength(frameLeftover * mChannels);
383
0
      PodCopy(mResampledLeftover.Elements(),
384
0
              resamplingDest.Elements() + outframesToCopy * mChannels,
385
0
              mResampledLeftover.Length());
386
0
      // This is always at 48000Hz.
387
0
      framesInPCM = framesLeft + outframesToCopy;
388
0
      audiodata->SetDuration(framesInPCM);
389
0
    } else {
390
0
      // The ogg time stamping and pre-skip is always timed at 48000.
391
0
      audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
392
0
    }
393
0
394
0
    // Remove the raw data which has been pulled to pcm buffer.
395
0
    // The value of frameCopied should equal to (or smaller than, if eos)
396
0
    // GetPacketDuration().
397
0
    mSourceSegment.RemoveLeading(frameCopied);
398
0
399
0
    // Has reached the end of input stream and all queued data has pulled for
400
0
    // encoding.
401
0
    if (mSourceSegment.GetDuration() == 0 && mEosSetInEncoder) {
402
0
      mEncodingComplete = true;
403
0
      LOG("[Opus] Done encoding.");
404
0
    }
405
0
406
0
    MOZ_ASSERT(mEosSetInEncoder || framesInPCM == GetPacketDuration());
407
0
408
0
    // Append null data to pcm buffer if the leftover data is not enough for
409
0
    // opus encoder.
410
0
    if (framesInPCM < GetPacketDuration() && mEosSetInEncoder) {
411
0
      PodZero(pcm.Elements() + framesInPCM * mChannels,
412
0
              (GetPacketDuration() - framesInPCM) * mChannels);
413
0
    }
414
0
    nsTArray<uint8_t> frameData;
415
0
    // Encode the data with Opus Encoder.
416
0
    frameData.SetLength(MAX_DATA_BYTES);
417
0
    // result is returned as opus error code if it is negative.
418
0
    result = 0;
419
#ifdef MOZ_SAMPLE_TYPE_S16
420
    const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
421
    result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
422
                         frameData.Elements(), MAX_DATA_BYTES);
423
#else
424
    const float* pcmBuf = static_cast<float*>(pcm.Elements());
425
0
    result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
426
0
                               frameData.Elements(), MAX_DATA_BYTES);
427
0
#endif
428
0
    frameData.SetLength(result >= 0 ? result : 0);
429
0
430
0
    if (result < 0) {
431
0
      LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
432
0
    }
433
0
    if (mEncodingComplete) {
434
0
      if (mResampler) {
435
0
        speex_resampler_destroy(mResampler);
436
0
        mResampler = nullptr;
437
0
      }
438
0
      mResampledLeftover.SetLength(0);
439
0
    }
440
0
441
0
    audiodata->SwapInFrameData(frameData);
442
0
    // timestamp should be the time of the first sample
443
0
    audiodata->SetTimeStamp(mOutputTimeStamp);
444
0
    mOutputTimeStamp += FramesToUsecs(GetPacketDuration(), kOpusSamplingRate).value();
445
0
    LOG("[Opus] mOutputTimeStamp %lld.",mOutputTimeStamp);
446
0
    aData.AppendEncodedFrame(audiodata);
447
0
  }
448
0
449
0
  return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
450
0
}
451
452
} // namespace mozilla