/src/mozilla-central/dom/media/encoder/OpusTrackEncoder.cpp

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-*/
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this file,
 * You can obtain one at http://mozilla.org/MPL/2.0/. */
#include "OpusTrackEncoder.h"
#include "nsString.h"
#include "GeckoProfiler.h"
#include "mozilla/CheckedInt.h"
#include "VideoUtils.h"

#include <opus/opus.h>

#undef LOG
#define LOG(args, ...)

namespace mozilla {

// The Opus format supports up to 8 channels, and supports multitrack audio up
// to 255 channels, but the current implementation supports only mono and
// stereo, and downmixes any more than that.
static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8;

// http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
// In section "opus_encoder_init", channels must be 1 or 2 of input signal.
static const int MAX_CHANNELS = 2;

// A maximum data bytes for Opus to encode.
static const int MAX_DATA_BYTES = 4096;

// http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
// Second paragraph, " The granule position of an audio data page is in units
// of PCM audio samples at a fixed rate of 48 kHz."
static const int kOpusSamplingRate = 48000;

// The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
static const int kFrameDurationMs  = 20;

// The supported sampling rate of input signal (Hz),
// must be one of the following. Will resampled to 48kHz otherwise.
static const int kOpusSupportedInputSamplingRates[] =
                   {8000, 12000, 16000, 24000, 48000};

namespace {

// An endian-neutral serialization of integers. Serializing T in little endian
// format to aOutput, where T is a 16 bits or 32 bits integer.
template<typename T>
static void
SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput)
{
  for (uint32_t i = 0; i < sizeof(T); i++) {
    aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
  }
}

static inline void
SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput)
{
  // Format of serializing a string to buffer is, the length of string (32 bits,
  // little endian), and the string.
  SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
  aOutput->AppendElements(aComment.get(), aComment.Length());
}


static void
SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
                      uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput)
{
  // The magic signature, null terminator has to be stripped off from strings.
  static const uint8_t magic[] = "OpusHead";
  aOutput->AppendElements(magic, sizeof(magic) - 1);

  // The version must always be 1 (8 bits, unsigned).
  aOutput->AppendElement(1);

  // Number of output channels (8 bits, unsigned).
  aOutput->AppendElement(aChannelCount);

  // Number of samples (at 48 kHz) to discard from the decoder output when
  // starting playback (16 bits, unsigned, little endian).
  SerializeToBuffer(aPreskip, aOutput);

  // The sampling rate of input source (32 bits, unsigned, little endian).
  SerializeToBuffer(aInputSampleRate, aOutput);

  // Output gain, an encoder should set this field to zero (16 bits, signed,
  // little endian).
  SerializeToBuffer((int16_t)0, aOutput);

  // Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
  // unsigned).
  aOutput->AppendElement(0);
}

static void
SerializeOpusCommentHeader(const nsCString& aVendor,
                           const nsTArray<nsCString>& aComments,
                           nsTArray<uint8_t>* aOutput)
{
  // The magic signature, null terminator has to be stripped off.
  static const uint8_t magic[] = "OpusTags";
  aOutput->AppendElements(magic, sizeof(magic) - 1);

  // The vendor; Should append in the following order:
  // vendor string length (32 bits, unsigned, little endian)
  // vendor string.
  SerializeToBuffer(aVendor, aOutput);

  // Add comments; Should append in the following order:
  // comment list length (32 bits, unsigned, little endian)
  // comment #0 string length (32 bits, unsigned, little endian)
  // comment #0 string
  // comment #1 string length (32 bits, unsigned, little endian)
  // comment #1 string ...
  SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
  for (uint32_t i = 0; i < aComments.Length(); ++i) {
    SerializeToBuffer(aComments[i], aOutput);
  }
}

}  // Anonymous namespace.

OpusTrackEncoder::OpusTrackEncoder(TrackRate aTrackRate)
  : AudioTrackEncoder(aTrackRate)
  , mEncoder(nullptr)
  , mLookahead(0)
  , mResampler(nullptr)
  , mOutputTimeStamp(0)
{
}

OpusTrackEncoder::~OpusTrackEncoder()
{
  if (mEncoder) {
    opus_encoder_destroy(mEncoder);
  }
  if (mResampler) {
    speex_resampler_destroy(mResampler);
    mResampler = nullptr;
  }
}

nsresult
OpusTrackEncoder::Init(int aChannels, int aSamplingRate)
{
  NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
                 NS_ERROR_FAILURE);

  // This version of encoder API only support 1 or 2 channels,
  // So set the mChannels less or equal 2 and
  // let InterleaveTrackData downmix pcm data.
  mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;

  // Reject non-audio sample rates.
  NS_ENSURE_TRUE(aSamplingRate >= 8000, NS_ERROR_INVALID_ARG);
  NS_ENSURE_TRUE(aSamplingRate <= 192000, NS_ERROR_INVALID_ARG);

  // According to www.opus-codec.org, creating an opus encoder requires the
  // sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
  // 48000. If this constraint is not satisfied, we resample the input to 48kHz.
  nsTArray<int> supportedSamplingRates;
  supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates,
                         ArrayLength(kOpusSupportedInputSamplingRates));
  if (!supportedSamplingRates.Contains(aSamplingRate)) {
    int error;
    mResampler = speex_resampler_init(mChannels,
                                      aSamplingRate,
                                      kOpusSamplingRate,
                                      SPEEX_RESAMPLER_QUALITY_DEFAULT,
                                      &error);

    if (error != RESAMPLER_ERR_SUCCESS) {
      return NS_ERROR_FAILURE;
    }
  }
  mSamplingRate = aSamplingRate;
  NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE);

  int error = 0;
  mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels,
                                 OPUS_APPLICATION_AUDIO, &error);


  if (error == OPUS_OK) {
    SetInitialized();
  }

  if (mAudioBitrate) {
    opus_encoder_ctl(mEncoder, OPUS_SET_BITRATE(static_cast<int>(mAudioBitrate)));
  }

  return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE;
}

int
OpusTrackEncoder::GetOutputSampleRate()
{
  return mResampler ? kOpusSamplingRate : mSamplingRate;
}

int
OpusTrackEncoder::GetPacketDuration()
{
  return GetOutputSampleRate() * kFrameDurationMs / 1000;
}

already_AddRefed<TrackMetadataBase>
OpusTrackEncoder::GetMetadata()
{
  AUTO_PROFILER_LABEL("OpusTrackEncoder::GetMetadata", OTHER);

  MOZ_ASSERT(mInitialized || mCanceled);

  if (mCanceled || mEncodingComplete) {
    return nullptr;
  }

  if (!mInitialized) {
    return nullptr;
  }

  RefPtr<OpusMetadata> meta = new OpusMetadata();
  meta->mChannels = mChannels;
  meta->mSamplingFrequency = mSamplingRate;

  mLookahead = 0;
  int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
  if (error != OPUS_OK) {
    mLookahead = 0;
  }

  // The ogg time stamping and pre-skip is always timed at 48000.
  SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate /
                        GetOutputSampleRate()), mSamplingRate,
                        &meta->mIdHeader);

  nsCString vendor;
  vendor.AppendASCII(opus_get_version_string());

  nsTArray<nsCString> comments;
  comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION));

  SerializeOpusCommentHeader(vendor, comments,
                             &meta->mCommentHeader);

  return meta.forget();
}

nsresult
OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
{
  AUTO_PROFILER_LABEL("OpusTrackEncoder::GetEncodedTrack", OTHER);

  MOZ_ASSERT(mInitialized || mCanceled);

  if (mCanceled || mEncodingComplete) {
    return NS_ERROR_FAILURE;
  }

  if (!mInitialized) {
    // calculation below depends on the truth that mInitialized is true.
    return NS_ERROR_FAILURE;
  }

  TakeTrackData(mSourceSegment);

  int result = 0;
  // Loop until we run out of packets of input data
  while (result >= 0 && !mEncodingComplete) {
    // re-sampled frames left last time which didn't fit into an Opus packet duration.
    const int framesLeft = mResampledLeftover.Length() / mChannels;
    // When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
    // of kOpusSamplingRate. There is not precision loss in the integer division
    // in computing framesToFetch. If frameLeft > 0, we need to add 1 to
    // framesToFetch to ensure there will be at least n frames after re-sampling.
    const int frameRoundUp = framesLeft ? 1 : 0;

    MOZ_ASSERT(GetPacketDuration() >= framesLeft);
    // Try to fetch m frames such that there will be n frames
    // where (n + frameLeft) >= GetPacketDuration() after re-sampling.
    const int framesToFetch = !mResampler ? GetPacketDuration()
                              : (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
                              + frameRoundUp;

    if (!mEndOfStream && mSourceSegment.GetDuration() < framesToFetch) {
      // Not enough raw data
      return NS_OK;
    }

    // Pad |mLookahead| samples to the end of source stream to prevent lost of
    // original data, the pcm duration will be calculated at rate 48K later.
    if (mEndOfStream && !mEosSetInEncoder) {
      mEosSetInEncoder = true;
      mSourceSegment.AppendNullData(mLookahead);
    }

    // Start encoding data.
    AutoTArray<AudioDataValue, 9600> pcm;
    pcm.SetLength(GetPacketDuration() * mChannels);

    int frameCopied = 0;

    for (AudioSegment::ChunkIterator iter(mSourceSegment);
         !iter.IsEnded() && frameCopied < framesToFetch;
         iter.Next()) {
      AudioChunk chunk = *iter;

      // Chunk to the required frame size.
      StreamTime frameToCopy = chunk.GetDuration();
      if (frameToCopy > framesToFetch - frameCopied) {
        frameToCopy = framesToFetch - frameCopied;
      }
      // Possible greatest value of framesToFetch = 3844: see
      // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameToCopy
      // should not be able to exceed this value.
      MOZ_ASSERT(frameToCopy <= 3844, "frameToCopy exceeded expected range");

      if (!chunk.IsNull()) {
        // Append the interleaved data to the end of pcm buffer.
        AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
                                               pcm.Elements() + frameCopied * mChannels);
      } else {
        CheckedInt<int> memsetLength = CheckedInt<int>(frameToCopy) *
                                       mChannels *
                                       sizeof(AudioDataValue);
        if (!memsetLength.isValid()) {
          // This should never happen, but we use a defensive check because
          // we really don't want a bad memset
          MOZ_ASSERT_UNREACHABLE("memsetLength invalid!");
          return NS_ERROR_FAILURE;
        }
        memset(pcm.Elements() + frameCopied * mChannels, 0,
               memsetLength.value());
      }

      frameCopied += frameToCopy;
    }

    // Possible greatest value of framesToFetch = 3844: see
    // https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameCopied
    // should not be able to exceed this value.
    MOZ_ASSERT(frameCopied <= 3844, "frameCopied exceeded expected range");

    RefPtr<EncodedFrame> audiodata = new EncodedFrame();
    audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
    int framesInPCM = frameCopied;
    if (mResampler) {
      AutoTArray<AudioDataValue, 9600> resamplingDest;
      // We want to consume all the input data, so we slightly oversize the
      // resampled data buffer so we can fit the output data in. We cannot really
      // predict the output frame count at each call.
      uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
      uint32_t inframes = frameCopied;

      resamplingDest.SetLength(outframes * mChannels);

#if MOZ_SAMPLE_TYPE_S16
      short* in = reinterpret_cast<short*>(pcm.Elements());
      short* out = reinterpret_cast<short*>(resamplingDest.Elements());
      speex_resampler_process_interleaved_int(mResampler, in, &inframes,
                                              out, &outframes);
#else
      float* in = reinterpret_cast<float*>(pcm.Elements());
      float* out = reinterpret_cast<float*>(resamplingDest.Elements());
      speex_resampler_process_interleaved_float(mResampler, in, &inframes,
                                                out, &outframes);
#endif

      MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
      PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
              mResampledLeftover.Length());

      uint32_t outframesToCopy = std::min(outframes,
                                          static_cast<uint32_t>(GetPacketDuration() - framesLeft));

      MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
                 outframesToCopy * mChannels);
      PodCopy(pcm.Elements() + mResampledLeftover.Length(),
              resamplingDest.Elements(), outframesToCopy * mChannels);
      int frameLeftover = outframes - outframesToCopy;
      mResampledLeftover.SetLength(frameLeftover * mChannels);
      PodCopy(mResampledLeftover.Elements(),
              resamplingDest.Elements() + outframesToCopy * mChannels,
              mResampledLeftover.Length());
      // This is always at 48000Hz.
      framesInPCM = framesLeft + outframesToCopy;
      audiodata->SetDuration(framesInPCM);
    } else {
      // The ogg time stamping and pre-skip is always timed at 48000.
      audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
    }

    // Remove the raw data which has been pulled to pcm buffer.
    // The value of frameCopied should equal to (or smaller than, if eos)
    // GetPacketDuration().
    mSourceSegment.RemoveLeading(frameCopied);

    // Has reached the end of input stream and all queued data has pulled for
    // encoding.
    if (mSourceSegment.GetDuration() == 0 && mEosSetInEncoder) {
      mEncodingComplete = true;
      LOG("[Opus] Done encoding.");
    }

    MOZ_ASSERT(mEosSetInEncoder || framesInPCM == GetPacketDuration());

    // Append null data to pcm buffer if the leftover data is not enough for
    // opus encoder.
    if (framesInPCM < GetPacketDuration() && mEosSetInEncoder) {
      PodZero(pcm.Elements() + framesInPCM * mChannels,
              (GetPacketDuration() - framesInPCM) * mChannels);
    }
    nsTArray<uint8_t> frameData;
    // Encode the data with Opus Encoder.
    frameData.SetLength(MAX_DATA_BYTES);
    // result is returned as opus error code if it is negative.
    result = 0;
#ifdef MOZ_SAMPLE_TYPE_S16
    const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
    result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
                         frameData.Elements(), MAX_DATA_BYTES);
#else
    const float* pcmBuf = static_cast<float*>(pcm.Elements());
    result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
                               frameData.Elements(), MAX_DATA_BYTES);
#endif
    frameData.SetLength(result >= 0 ? result : 0);

    if (result < 0) {
      LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
    }
    if (mEncodingComplete) {
      if (mResampler) {
        speex_resampler_destroy(mResampler);
        mResampler = nullptr;
      }
      mResampledLeftover.SetLength(0);
    }

    audiodata->SwapInFrameData(frameData);
    // timestamp should be the time of the first sample
    audiodata->SetTimeStamp(mOutputTimeStamp);
    mOutputTimeStamp += FramesToUsecs(GetPacketDuration(), kOpusSamplingRate).value();
    LOG("[Opus] mOutputTimeStamp %lld.",mOutputTimeStamp);
    aData.AppendEncodedFrame(audiodata);
  }

  return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
}

} // namespace mozilla

Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 --*/
2		/* This Source Code Form is subject to the terms of the Mozilla Public
3		* License, v. 2.0. If a copy of the MPL was not distributed with this file,
4		* You can obtain one at http://mozilla.org/MPL/2.0/. */
5		#include "OpusTrackEncoder.h"
6		#include "nsString.h"
7		#include "GeckoProfiler.h"
8		#include "mozilla/CheckedInt.h"
9		#include "VideoUtils.h"
10
11		#include <opus/opus.h>
12
13		#undef LOG
14		#define LOG(args, ...)
15
16		namespace mozilla {
17
18		// The Opus format supports up to 8 channels, and supports multitrack audio up
19		// to 255 channels, but the current implementation supports only mono and
20		// stereo, and downmixes any more than that.
21		static const int MAX_SUPPORTED_AUDIO_CHANNELS = 8;
22
23		// http://www.opus-codec.org/docs/html_api-1.0.2/group__opus__encoder.html
24		// In section "opus_encoder_init", channels must be 1 or 2 of input signal.
25		static const int MAX_CHANNELS = 2;
26
27		// A maximum data bytes for Opus to encode.
28		static const int MAX_DATA_BYTES = 4096;
29
30		// http://tools.ietf.org/html/draft-ietf-codec-oggopus-00#section-4
31		// Second paragraph, " The granule position of an audio data page is in units
32		// of PCM audio samples at a fixed rate of 48 kHz."
33		static const int kOpusSamplingRate = 48000;
34
35		// The duration of an Opus frame, and it must be 2.5, 5, 10, 20, 40 or 60 ms.
36		static const int kFrameDurationMs = 20;
37
38		// The supported sampling rate of input signal (Hz),
39		// must be one of the following. Will resampled to 48kHz otherwise.
40		static const int kOpusSupportedInputSamplingRates[] =
41		{8000, 12000, 16000, 24000, 48000};
42
43		namespace {
44
45		// An endian-neutral serialization of integers. Serializing T in little endian
46		// format to aOutput, where T is a 16 bits or 32 bits integer.
47		template<typename T>
48		static void
49		SerializeToBuffer(T aValue, nsTArray<uint8_t>* aOutput)
50	0	{
51	0	for (uint32_t i = 0; i < sizeof(T); i++) {
52	0	aOutput->AppendElement((uint8_t)(0x000000ff & (aValue >> (i * 8))));
53	0	}
54	0	} Unexecuted instantiation: Unified_cpp_dom_media_encoder0.cpp:void mozilla::(anonymous namespace)::SerializeToBuffer<unsigned short>(unsigned short, nsTArray<unsigned char>) Unexecuted instantiation: Unified_cpp_dom_media_encoder0.cpp:void mozilla::(anonymous namespace)::SerializeToBuffer<unsigned int>(unsigned int, nsTArray<unsigned char>) Unexecuted instantiation: Unified_cpp_dom_media_encoder0.cpp:void mozilla::(anonymous namespace)::SerializeToBuffer<short>(short, nsTArray<unsigned char>*)
55
56		static inline void
57		SerializeToBuffer(const nsCString& aComment, nsTArray<uint8_t>* aOutput)
58	0	{
59	0	// Format of serializing a string to buffer is, the length of string (32 bits,
60	0	// little endian), and the string.
61	0	SerializeToBuffer((uint32_t)(aComment.Length()), aOutput);
62	0	aOutput->AppendElements(aComment.get(), aComment.Length());
63	0	}
64
65
66		static void
67		SerializeOpusIdHeader(uint8_t aChannelCount, uint16_t aPreskip,
68		uint32_t aInputSampleRate, nsTArray<uint8_t>* aOutput)
69	0	{
70	0	// The magic signature, null terminator has to be stripped off from strings.
71	0	static const uint8_t magic[] = "OpusHead";
72	0	aOutput->AppendElements(magic, sizeof(magic) - 1);
73	0
74	0	// The version must always be 1 (8 bits, unsigned).
75	0	aOutput->AppendElement(1);
76	0
77	0	// Number of output channels (8 bits, unsigned).
78	0	aOutput->AppendElement(aChannelCount);
79	0
80	0	// Number of samples (at 48 kHz) to discard from the decoder output when
81	0	// starting playback (16 bits, unsigned, little endian).
82	0	SerializeToBuffer(aPreskip, aOutput);
83	0
84	0	// The sampling rate of input source (32 bits, unsigned, little endian).
85	0	SerializeToBuffer(aInputSampleRate, aOutput);
86	0
87	0	// Output gain, an encoder should set this field to zero (16 bits, signed,
88	0	// little endian).
89	0	SerializeToBuffer((int16_t)0, aOutput);
90	0
91	0	// Channel mapping family. Family 0 allows only 1 or 2 channels (8 bits,
92	0	// unsigned).
93	0	aOutput->AppendElement(0);
94	0	}
95
96		static void
97		SerializeOpusCommentHeader(const nsCString& aVendor,
98		const nsTArray<nsCString>& aComments,
99		nsTArray<uint8_t>* aOutput)
100	0	{
101	0	// The magic signature, null terminator has to be stripped off.
102	0	static const uint8_t magic[] = "OpusTags";
103	0	aOutput->AppendElements(magic, sizeof(magic) - 1);
104	0
105	0	// The vendor; Should append in the following order:
106	0	// vendor string length (32 bits, unsigned, little endian)
107	0	// vendor string.
108	0	SerializeToBuffer(aVendor, aOutput);
109	0
110	0	// Add comments; Should append in the following order:
111	0	// comment list length (32 bits, unsigned, little endian)
112	0	// comment #0 string length (32 bits, unsigned, little endian)
113	0	// comment #0 string
114	0	// comment #1 string length (32 bits, unsigned, little endian)
115	0	// comment #1 string ...
116	0	SerializeToBuffer((uint32_t)aComments.Length(), aOutput);
117	0	for (uint32_t i = 0; i < aComments.Length(); ++i) {
118	0	SerializeToBuffer(aComments[i], aOutput);
119	0	}
120	0	}
121
122		} // Anonymous namespace.
123
124		OpusTrackEncoder::OpusTrackEncoder(TrackRate aTrackRate)
125		: AudioTrackEncoder(aTrackRate)
126		, mEncoder(nullptr)
127		, mLookahead(0)
128		, mResampler(nullptr)
129		, mOutputTimeStamp(0)
130	0	{
131	0	}
132
133		OpusTrackEncoder::~OpusTrackEncoder()
134	0	{
135	0	if (mEncoder) {
136	0	opus_encoder_destroy(mEncoder);
137	0	}
138	0	if (mResampler) {
139	0	speex_resampler_destroy(mResampler);
140	0	mResampler = nullptr;
141	0	}
142	0	}
143
144		nsresult
145		OpusTrackEncoder::Init(int aChannels, int aSamplingRate)
146	0	{
147	0	NS_ENSURE_TRUE((aChannels <= MAX_SUPPORTED_AUDIO_CHANNELS) && (aChannels > 0),
148	0	NS_ERROR_FAILURE);
149	0
150	0	// This version of encoder API only support 1 or 2 channels,
151	0	// So set the mChannels less or equal 2 and
152	0	// let InterleaveTrackData downmix pcm data.
153	0	mChannels = aChannels > MAX_CHANNELS ? MAX_CHANNELS : aChannels;
154	0
155	0	// Reject non-audio sample rates.
156	0	NS_ENSURE_TRUE(aSamplingRate >= 8000, NS_ERROR_INVALID_ARG);
157	0	NS_ENSURE_TRUE(aSamplingRate <= 192000, NS_ERROR_INVALID_ARG);
158	0
159	0	// According to www.opus-codec.org, creating an opus encoder requires the
160	0	// sampling rate of source signal be one of 8000, 12000, 16000, 24000, or
161	0	// 48000. If this constraint is not satisfied, we resample the input to 48kHz.
162	0	nsTArray<int> supportedSamplingRates;
163	0	supportedSamplingRates.AppendElements(kOpusSupportedInputSamplingRates,
164	0	ArrayLength(kOpusSupportedInputSamplingRates));
165	0	if (!supportedSamplingRates.Contains(aSamplingRate)) {
166	0	int error;
167	0	mResampler = speex_resampler_init(mChannels,
168	0	aSamplingRate,
169	0	kOpusSamplingRate,
170	0	SPEEX_RESAMPLER_QUALITY_DEFAULT,
171	0	&error);
172	0
173	0	if (error != RESAMPLER_ERR_SUCCESS) {
174	0	return NS_ERROR_FAILURE;
175	0	}
176	0	}
177	0	mSamplingRate = aSamplingRate;
178	0	NS_ENSURE_TRUE(mSamplingRate > 0, NS_ERROR_FAILURE);
179	0
180	0	int error = 0;
181	0	mEncoder = opus_encoder_create(GetOutputSampleRate(), mChannels,
182	0	OPUS_APPLICATION_AUDIO, &error);
183	0
184	0
185	0	if (error == OPUS_OK) {
186	0	SetInitialized();
187	0	}
188	0
189	0	if (mAudioBitrate) {
190	0	opus_encoder_ctl(mEncoder, OPUS_SET_BITRATE(static_cast<int>(mAudioBitrate)));
191	0	}
192	0
193	0	return error == OPUS_OK ? NS_OK : NS_ERROR_FAILURE;
194	0	}
195
196		int
197		OpusTrackEncoder::GetOutputSampleRate()
198	0	{
199	0	return mResampler ? kOpusSamplingRate : mSamplingRate;
200	0	}
201
202		int
203		OpusTrackEncoder::GetPacketDuration()
204	0	{
205	0	return GetOutputSampleRate() * kFrameDurationMs / 1000;
206	0	}
207
208		already_AddRefed<TrackMetadataBase>
209		OpusTrackEncoder::GetMetadata()
210	0	{
211	0	AUTO_PROFILER_LABEL("OpusTrackEncoder::GetMetadata", OTHER);
212	0
213	0	MOZ_ASSERT(mInitialized \|\| mCanceled);
214	0
215	0	if (mCanceled \|\| mEncodingComplete) {
216	0	return nullptr;
217	0	}
218	0
219	0	if (!mInitialized) {
220	0	return nullptr;
221	0	}
222	0
223	0	RefPtr<OpusMetadata> meta = new OpusMetadata();
224	0	meta->mChannels = mChannels;
225	0	meta->mSamplingFrequency = mSamplingRate;
226	0
227	0	mLookahead = 0;
228	0	int error = opus_encoder_ctl(mEncoder, OPUS_GET_LOOKAHEAD(&mLookahead));
229	0	if (error != OPUS_OK) {
230	0	mLookahead = 0;
231	0	}
232	0
233	0	// The ogg time stamping and pre-skip is always timed at 48000.
234	0	SerializeOpusIdHeader(mChannels, mLookahead * (kOpusSamplingRate /
235	0	GetOutputSampleRate()), mSamplingRate,
236	0	&meta->mIdHeader);
237	0
238	0	nsCString vendor;
239	0	vendor.AppendASCII(opus_get_version_string());
240	0
241	0	nsTArray<nsCString> comments;
242	0	comments.AppendElement(NS_LITERAL_CSTRING("ENCODER=Mozilla" MOZ_APP_UA_VERSION));
243	0
244	0	SerializeOpusCommentHeader(vendor, comments,
245	0	&meta->mCommentHeader);
246	0
247	0	return meta.forget();
248	0	}
249
250		nsresult
251		OpusTrackEncoder::GetEncodedTrack(EncodedFrameContainer& aData)
252	0	{
253	0	AUTO_PROFILER_LABEL("OpusTrackEncoder::GetEncodedTrack", OTHER);
254	0
255	0	MOZ_ASSERT(mInitialized \|\| mCanceled);
256	0
257	0	if (mCanceled \|\| mEncodingComplete) {
258	0	return NS_ERROR_FAILURE;
259	0	}
260	0
261	0	if (!mInitialized) {
262	0	// calculation below depends on the truth that mInitialized is true.
263	0	return NS_ERROR_FAILURE;
264	0	}
265	0
266	0	TakeTrackData(mSourceSegment);
267	0
268	0	int result = 0;
269	0	// Loop until we run out of packets of input data
270	0	while (result >= 0 && !mEncodingComplete) {
271	0	// re-sampled frames left last time which didn't fit into an Opus packet duration.
272	0	const int framesLeft = mResampledLeftover.Length() / mChannels;
273	0	// When framesLeft is 0, (GetPacketDuration() - framesLeft) is a multiple
274	0	// of kOpusSamplingRate. There is not precision loss in the integer division
275	0	// in computing framesToFetch. If frameLeft > 0, we need to add 1 to
276	0	// framesToFetch to ensure there will be at least n frames after re-sampling.
277	0	const int frameRoundUp = framesLeft ? 1 : 0;
278	0
279	0	MOZ_ASSERT(GetPacketDuration() >= framesLeft);
280	0	// Try to fetch m frames such that there will be n frames
281	0	// where (n + frameLeft) >= GetPacketDuration() after re-sampling.
282	0	const int framesToFetch = !mResampler ? GetPacketDuration()
283	0	: (GetPacketDuration() - framesLeft) * mSamplingRate / kOpusSamplingRate
284	0	+ frameRoundUp;
285	0
286	0	if (!mEndOfStream && mSourceSegment.GetDuration() < framesToFetch) {
287	0	// Not enough raw data
288	0	return NS_OK;
289	0	}
290	0
291	0	// Pad \|mLookahead\| samples to the end of source stream to prevent lost of
292	0	// original data, the pcm duration will be calculated at rate 48K later.
293	0	if (mEndOfStream && !mEosSetInEncoder) {
294	0	mEosSetInEncoder = true;
295	0	mSourceSegment.AppendNullData(mLookahead);
296	0	}
297	0
298	0	// Start encoding data.
299	0	AutoTArray<AudioDataValue, 9600> pcm;
300	0	pcm.SetLength(GetPacketDuration() * mChannels);
301	0
302	0	int frameCopied = 0;
303	0
304	0	for (AudioSegment::ChunkIterator iter(mSourceSegment);
305	0	!iter.IsEnded() && frameCopied < framesToFetch;
306	0	iter.Next()) {
307	0	AudioChunk chunk = *iter;
308	0
309	0	// Chunk to the required frame size.
310	0	StreamTime frameToCopy = chunk.GetDuration();
311	0	if (frameToCopy > framesToFetch - frameCopied) {
312	0	frameToCopy = framesToFetch - frameCopied;
313	0	}
314	0	// Possible greatest value of framesToFetch = 3844: see
315	0	// https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameToCopy
316	0	// should not be able to exceed this value.
317	0	MOZ_ASSERT(frameToCopy <= 3844, "frameToCopy exceeded expected range");
318	0
319	0	if (!chunk.IsNull()) {
320	0	// Append the interleaved data to the end of pcm buffer.
321	0	AudioTrackEncoder::InterleaveTrackData(chunk, frameToCopy, mChannels,
322	0	pcm.Elements() + frameCopied * mChannels);
323	0	} else {
324	0	CheckedInt<int> memsetLength = CheckedInt<int>(frameToCopy) *
325	0	mChannels *
326	0	sizeof(AudioDataValue);
327	0	if (!memsetLength.isValid()) {
328	0	// This should never happen, but we use a defensive check because
329	0	// we really don't want a bad memset
330	0	MOZ_ASSERT_UNREACHABLE("memsetLength invalid!");
331	0	return NS_ERROR_FAILURE;
332	0	}
333	0	memset(pcm.Elements() + frameCopied * mChannels, 0,
334	0	memsetLength.value());
335	0	}
336	0
337	0	frameCopied += frameToCopy;
338	0	}
339	0
340	0	// Possible greatest value of framesToFetch = 3844: see
341	0	// https://bugzilla.mozilla.org/show_bug.cgi?id=1349421#c8. frameCopied
342	0	// should not be able to exceed this value.
343	0	MOZ_ASSERT(frameCopied <= 3844, "frameCopied exceeded expected range");
344	0
345	0	RefPtr<EncodedFrame> audiodata = new EncodedFrame();
346	0	audiodata->SetFrameType(EncodedFrame::OPUS_AUDIO_FRAME);
347	0	int framesInPCM = frameCopied;
348	0	if (mResampler) {
349	0	AutoTArray<AudioDataValue, 9600> resamplingDest;
350	0	// We want to consume all the input data, so we slightly oversize the
351	0	// resampled data buffer so we can fit the output data in. We cannot really
352	0	// predict the output frame count at each call.
353	0	uint32_t outframes = frameCopied * kOpusSamplingRate / mSamplingRate + 1;
354	0	uint32_t inframes = frameCopied;
355	0
356	0	resamplingDest.SetLength(outframes * mChannels);
357	0
358		#if MOZ_SAMPLE_TYPE_S16
359		short* in = reinterpret_cast<short*>(pcm.Elements());
360		short* out = reinterpret_cast<short*>(resamplingDest.Elements());
361		speex_resampler_process_interleaved_int(mResampler, in, &inframes,
362		out, &outframes);
363		#else
364		float* in = reinterpret_cast<float*>(pcm.Elements());
365	0	float* out = reinterpret_cast<float*>(resamplingDest.Elements());
366	0	speex_resampler_process_interleaved_float(mResampler, in, &inframes,
367	0	out, &outframes);
368	0	#endif
369	0
370	0	MOZ_ASSERT(pcm.Length() >= mResampledLeftover.Length());
371	0	PodCopy(pcm.Elements(), mResampledLeftover.Elements(),
372	0	mResampledLeftover.Length());
373	0
374	0	uint32_t outframesToCopy = std::min(outframes,
375	0	static_cast<uint32_t>(GetPacketDuration() - framesLeft));
376	0
377	0	MOZ_ASSERT(pcm.Length() - mResampledLeftover.Length() >=
378	0	outframesToCopy * mChannels);
379	0	PodCopy(pcm.Elements() + mResampledLeftover.Length(),
380	0	resamplingDest.Elements(), outframesToCopy * mChannels);
381	0	int frameLeftover = outframes - outframesToCopy;
382	0	mResampledLeftover.SetLength(frameLeftover * mChannels);
383	0	PodCopy(mResampledLeftover.Elements(),
384	0	resamplingDest.Elements() + outframesToCopy * mChannels,
385	0	mResampledLeftover.Length());
386	0	// This is always at 48000Hz.
387	0	framesInPCM = framesLeft + outframesToCopy;
388	0	audiodata->SetDuration(framesInPCM);
389	0	} else {
390	0	// The ogg time stamping and pre-skip is always timed at 48000.
391	0	audiodata->SetDuration(frameCopied * (kOpusSamplingRate / mSamplingRate));
392	0	}
393	0
394	0	// Remove the raw data which has been pulled to pcm buffer.
395	0	// The value of frameCopied should equal to (or smaller than, if eos)
396	0	// GetPacketDuration().
397	0	mSourceSegment.RemoveLeading(frameCopied);
398	0
399	0	// Has reached the end of input stream and all queued data has pulled for
400	0	// encoding.
401	0	if (mSourceSegment.GetDuration() == 0 && mEosSetInEncoder) {
402	0	mEncodingComplete = true;
403	0	LOG("[Opus] Done encoding.");
404	0	}
405	0
406	0	MOZ_ASSERT(mEosSetInEncoder \|\| framesInPCM == GetPacketDuration());
407	0
408	0	// Append null data to pcm buffer if the leftover data is not enough for
409	0	// opus encoder.
410	0	if (framesInPCM < GetPacketDuration() && mEosSetInEncoder) {
411	0	PodZero(pcm.Elements() + framesInPCM * mChannels,
412	0	(GetPacketDuration() - framesInPCM) * mChannels);
413	0	}
414	0	nsTArray<uint8_t> frameData;
415	0	// Encode the data with Opus Encoder.
416	0	frameData.SetLength(MAX_DATA_BYTES);
417	0	// result is returned as opus error code if it is negative.
418	0	result = 0;
419		#ifdef MOZ_SAMPLE_TYPE_S16
420		const opus_int16* pcmBuf = static_cast<opus_int16*>(pcm.Elements());
421		result = opus_encode(mEncoder, pcmBuf, GetPacketDuration(),
422		frameData.Elements(), MAX_DATA_BYTES);
423		#else
424		const float* pcmBuf = static_cast<float*>(pcm.Elements());
425	0	result = opus_encode_float(mEncoder, pcmBuf, GetPacketDuration(),
426	0	frameData.Elements(), MAX_DATA_BYTES);
427	0	#endif
428	0	frameData.SetLength(result >= 0 ? result : 0);
429	0
430	0	if (result < 0) {
431	0	LOG("[Opus] Fail to encode data! Result: %s.", opus_strerror(result));
432	0	}
433	0	if (mEncodingComplete) {
434	0	if (mResampler) {
435	0	speex_resampler_destroy(mResampler);
436	0	mResampler = nullptr;
437	0	}
438	0	mResampledLeftover.SetLength(0);
439	0	}
440	0
441	0	audiodata->SwapInFrameData(frameData);
442	0	// timestamp should be the time of the first sample
443	0	audiodata->SetTimeStamp(mOutputTimeStamp);
444	0	mOutputTimeStamp += FramesToUsecs(GetPacketDuration(), kOpusSamplingRate).value();
445	0	LOG("[Opus] mOutputTimeStamp %lld.",mOutputTimeStamp);
446	0	aData.AppendEncodedFrame(audiodata);
447	0	}
448	0
449	0	return result >= 0 ? NS_OK : NS_ERROR_FAILURE;
450	0	}
451
452		} // namespace mozilla