/src/mozilla-central/dom/media/webspeech/synth/speechd/SpeechDispatcherService.cpp

Source (jump to first uncovered line)
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

#include "SpeechDispatcherService.h"

#include "mozilla/dom/nsSpeechTask.h"
#include "mozilla/dom/nsSynthVoiceRegistry.h"
#include "mozilla/ClearOnShutdown.h"
#include "mozilla/Preferences.h"
#include "nsEscape.h"
#include "nsISupports.h"
#include "nsPrintfCString.h"
#include "nsReadableUtils.h"
#include "nsServiceManagerUtils.h"
#include "nsThreadUtils.h"
#include "prlink.h"

#include <math.h>
#include <stdlib.h>

#define URI_PREFIX "urn:moz-tts:speechd:"

#define MAX_RATE static_cast<float>(2.5)
#define MIN_RATE static_cast<float>(0.5)

// Some structures for libspeechd
typedef enum {
  SPD_EVENT_BEGIN,
  SPD_EVENT_END,
  SPD_EVENT_INDEX_MARK,
  SPD_EVENT_CANCEL,
  SPD_EVENT_PAUSE,
  SPD_EVENT_RESUME
} SPDNotificationType;

typedef enum {
  SPD_BEGIN = 1,
  SPD_END = 2,
  SPD_INDEX_MARKS = 4,
  SPD_CANCEL = 8,
  SPD_PAUSE = 16,
  SPD_RESUME = 32,

  SPD_ALL = 0x3f
} SPDNotification;

typedef enum {
  SPD_MODE_SINGLE = 0,
  SPD_MODE_THREADED = 1
} SPDConnectionMode;

typedef void (*SPDCallback) (size_t msg_id, size_t client_id,
                             SPDNotificationType state);

typedef void (*SPDCallbackIM) (size_t msg_id, size_t client_id,
                               SPDNotificationType state, char* index_mark);

struct SPDConnection
{
  SPDCallback callback_begin;
  SPDCallback callback_end;
  SPDCallback callback_cancel;
  SPDCallback callback_pause;
  SPDCallback callback_resume;
  SPDCallbackIM callback_im;

  /* partial, more private fields in structure */
};

struct SPDVoice
{
  char* name;
  char* language;
  char* variant;
};

typedef enum {
  SPD_IMPORTANT = 1,
  SPD_MESSAGE = 2,
  SPD_TEXT = 3,
  SPD_NOTIFICATION = 4,
  SPD_PROGRESS = 5
} SPDPriority;

#define SPEECHD_FUNCTIONS \
  FUNC(spd_open, SPDConnection*, (const char*, const char*, const char*, SPDConnectionMode)) \
  FUNC(spd_close, void, (SPDConnection*)) \
  FUNC(spd_list_synthesis_voices, SPDVoice**, (SPDConnection*)) \
  FUNC(spd_say, int, (SPDConnection*, SPDPriority, const char*)) \
  FUNC(spd_cancel, int, (SPDConnection*)) \
  FUNC(spd_set_volume, int, (SPDConnection*, int)) \
  FUNC(spd_set_voice_rate, int, (SPDConnection*, int)) \
  FUNC(spd_set_voice_pitch, int, (SPDConnection*, int)) \
  FUNC(spd_set_synthesis_voice, int, (SPDConnection*, const char*)) \
  FUNC(spd_set_notification_on, int, (SPDConnection*, SPDNotification))

#define FUNC(name, type, params) \
  typedef type (*_##name##_fn) params; \
  static _##name##_fn _##name;

SPEECHD_FUNCTIONS

#undef FUNC

#define spd_open _spd_open
#define spd_close _spd_close
#define spd_list_synthesis_voices _spd_list_synthesis_voices
#define spd_say _spd_say
#define spd_cancel _spd_cancel
#define spd_set_volume _spd_set_volume
#define spd_set_voice_rate _spd_set_voice_rate
#define spd_set_voice_pitch _spd_set_voice_pitch
#define spd_set_synthesis_voice _spd_set_synthesis_voice
#define spd_set_notification_on _spd_set_notification_on

static PRLibrary* speechdLib = nullptr;

typedef void (*nsSpeechDispatcherFunc)();
struct nsSpeechDispatcherDynamicFunction
{
  const char* functionName;
  nsSpeechDispatcherFunc* function;
};

namespace mozilla {
namespace dom {

StaticRefPtr<SpeechDispatcherService> SpeechDispatcherService::sSingleton;

class SpeechDispatcherVoice
{
public:

  SpeechDispatcherVoice(const nsAString& aName, const nsAString& aLanguage)
    : mName(aName), mLanguage(aLanguage) {}

  NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SpeechDispatcherVoice)

  // Voice name
  nsString mName;

  // Voice language, in BCP-47 syntax
  nsString mLanguage;

private:
  ~SpeechDispatcherVoice() {}
};


class SpeechDispatcherCallback final : public nsISpeechTaskCallback
{
public:
  SpeechDispatcherCallback(nsISpeechTask* aTask, SpeechDispatcherService* aService)
    : mTask(aTask)
    , mService(aService) {}

  NS_DECL_CYCLE_COLLECTING_ISUPPORTS
  NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechDispatcherCallback, nsISpeechTaskCallback)

  NS_DECL_NSISPEECHTASKCALLBACK

  bool OnSpeechEvent(SPDNotificationType state);

private:
  ~SpeechDispatcherCallback() { }

  // This pointer is used to dispatch events
  nsCOMPtr<nsISpeechTask> mTask;

  // By holding a strong reference to the service we guarantee that it won't be
  // destroyed before this runnable.
  RefPtr<SpeechDispatcherService> mService;

  TimeStamp mStartTime;
};

NS_IMPL_CYCLE_COLLECTION(SpeechDispatcherCallback, mTask);

NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechDispatcherCallback)
  NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
NS_INTERFACE_MAP_END

NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechDispatcherCallback)
NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechDispatcherCallback)

NS_IMETHODIMP
SpeechDispatcherCallback::OnPause()
{
  // XXX: Speech dispatcher does not pause immediately, but waits for the speech
  // to reach an index mark so that it could resume from that offset.
  // There is no support for word or sentence boundaries, so index marks would
  // only occur in explicit SSML marks, and we don't support that yet.
  // What in actuality happens, is that if you call spd_pause(), it will speak
  // the utterance in its entirety, dispatch an end event, and then put speechd
  // in a 'paused' state. Since it is after the utterance ended, we don't get
  // that state change, and our speech api is in an unrecoverable state.
  // So, since it is useless anyway, I am not implementing pause.
  return NS_OK;
}

NS_IMETHODIMP
SpeechDispatcherCallback::OnResume()
{
  // XXX: Unsupported, see OnPause().
  return NS_OK;
}

NS_IMETHODIMP
SpeechDispatcherCallback::OnCancel()
{
  if (spd_cancel(mService->mSpeechdClient) < 0) {
    return NS_ERROR_FAILURE;
  }

  return NS_OK;
}

NS_IMETHODIMP
SpeechDispatcherCallback::OnVolumeChanged(float aVolume)
{
  // XXX: This currently does not change the volume mid-utterance, but it
  // doesn't do anything bad either. So we could put this here with the hopes
  // that speechd supports this in the future.
  if (spd_set_volume(mService->mSpeechdClient, static_cast<int>(aVolume * 100)) < 0) {
    return NS_ERROR_FAILURE;
  }

  return NS_OK;
}

bool
SpeechDispatcherCallback::OnSpeechEvent(SPDNotificationType state)
{
  bool remove = false;

  switch (state) {
    case SPD_EVENT_BEGIN:
      mStartTime = TimeStamp::Now();
      mTask->DispatchStart();
      break;

    case SPD_EVENT_PAUSE:
      mTask->DispatchPause((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
      break;

    case SPD_EVENT_RESUME:
      mTask->DispatchResume((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
      break;

    case SPD_EVENT_CANCEL:
    case SPD_EVENT_END:
      mTask->DispatchEnd((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
      remove = true;
      break;

    case SPD_EVENT_INDEX_MARK:
      // Not yet supported
      break;

    default:
      break;
  }

  return remove;
}

static void
speechd_cb(size_t msg_id, size_t client_id, SPDNotificationType state)
{
  SpeechDispatcherService* service = SpeechDispatcherService::GetInstance(false);

  if (service) {
    NS_DispatchToMainThread(NewRunnableMethod<uint32_t, SPDNotificationType>(
      "dom::SpeechDispatcherService::EventNotify",
      service,
      &SpeechDispatcherService::EventNotify,
      static_cast<uint32_t>(msg_id),
      state));
  }
}


NS_INTERFACE_MAP_BEGIN(SpeechDispatcherService)
  NS_INTERFACE_MAP_ENTRY(nsISpeechService)
  NS_INTERFACE_MAP_ENTRY(nsIObserver)
  NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver)
NS_INTERFACE_MAP_END

NS_IMPL_ADDREF(SpeechDispatcherService)
NS_IMPL_RELEASE(SpeechDispatcherService)

SpeechDispatcherService::SpeechDispatcherService()
  : mInitialized(false)
  , mSpeechdClient(nullptr)
{
}

void
SpeechDispatcherService::Init()
{
  if (!Preferences::GetBool("media.webspeech.synth.enabled") ||
      Preferences::GetBool("media.webspeech.synth.test")) {
    return;
  }

  // While speech dispatcher has a "threaded" mode, only spd_say() is async.
  // Since synchronous socket i/o could impact startup time, we do
  // initialization in a separate thread.
  DebugOnly<nsresult> rv = NS_NewNamedThread("speechd init",
                                             getter_AddRefs(mInitThread));
  MOZ_ASSERT(NS_SUCCEEDED(rv));
  rv = mInitThread->Dispatch(
    NewRunnableMethod("dom::SpeechDispatcherService::Setup",
                      this,
                      &SpeechDispatcherService::Setup),
    NS_DISPATCH_NORMAL);
  MOZ_ASSERT(NS_SUCCEEDED(rv));
}

SpeechDispatcherService::~SpeechDispatcherService()
{
  if (mInitThread) {
    mInitThread->Shutdown();
  }

  if (mSpeechdClient) {
    spd_close(mSpeechdClient);
  }
}

void
SpeechDispatcherService::Setup()
{
#define FUNC(name, type, params) { #name, (nsSpeechDispatcherFunc *)&_##name },
  static const nsSpeechDispatcherDynamicFunction kSpeechDispatcherSymbols[] = {
    SPEECHD_FUNCTIONS
  };
#undef FUNC

  MOZ_ASSERT(!mInitialized);

  speechdLib = PR_LoadLibrary("libspeechd.so.2");

  if (!speechdLib) {
    NS_WARNING("Failed to load speechd library");
    return;
  }

  if (!PR_FindFunctionSymbol(speechdLib, "spd_get_volume")) {
    // There is no version getter function, so we rely on a symbol that was
    // introduced in release 0.8.2 in order to check for ABI compatibility.
    NS_WARNING("Unsupported version of speechd detected");
    return;
  }

  for (uint32_t i = 0; i < ArrayLength(kSpeechDispatcherSymbols); i++) {
    *kSpeechDispatcherSymbols[i].function =
      PR_FindFunctionSymbol(speechdLib, kSpeechDispatcherSymbols[i].functionName);

    if (!*kSpeechDispatcherSymbols[i].function) {
      NS_WARNING(nsPrintfCString("Failed to find speechd symbol for'%s'",
                                 kSpeechDispatcherSymbols[i].functionName).get());
      return;
    }
  }

  mSpeechdClient = spd_open("firefox", "web speech api", "who", SPD_MODE_THREADED);
  if (!mSpeechdClient) {
    NS_WARNING("Failed to call spd_open");
    return;
  }

  // Get all the voices from sapi and register in the SynthVoiceRegistry
  SPDVoice** list = spd_list_synthesis_voices(mSpeechdClient);

  mSpeechdClient->callback_begin = speechd_cb;
  mSpeechdClient->callback_end = speechd_cb;
  mSpeechdClient->callback_cancel = speechd_cb;
  mSpeechdClient->callback_pause = speechd_cb;
  mSpeechdClient->callback_resume = speechd_cb;

  spd_set_notification_on(mSpeechdClient, SPD_BEGIN);
  spd_set_notification_on(mSpeechdClient, SPD_END);
  spd_set_notification_on(mSpeechdClient, SPD_CANCEL);

  if (list != NULL) {
    for (int i = 0; list[i]; i++) {
      nsAutoString uri;

      uri.AssignLiteral(URI_PREFIX);
      nsAutoCString name;
      NS_EscapeURL(list[i]->name, -1, esc_OnlyNonASCII | esc_Spaces | esc_AlwaysCopy, name);
      uri.Append(NS_ConvertUTF8toUTF16(name));;
      uri.AppendLiteral("?");

      nsAutoCString lang(list[i]->language);

      if (strcmp(list[i]->variant, "none") != 0) {
        // In speech dispatcher, the variant will usually be the locale subtag
        // with another, non-standard suptag after it. We keep the first one
        // and convert it to uppercase.
        const char* v = list[i]->variant;
        const char* hyphen = strchr(v, '-');
        nsDependentCSubstring variant(v, hyphen ? hyphen - v : strlen(v));
        ToUpperCase(variant);

        // eSpeak uses UK which is not a valid region subtag in BCP47.
        if (variant.EqualsLiteral("UK")) {
          variant.AssignLiteral("GB");
        }

        lang.AppendLiteral("-");
        lang.Append(variant);
      }

      uri.Append(NS_ConvertUTF8toUTF16(lang));

      mVoices.Put(uri, new SpeechDispatcherVoice(
                    NS_ConvertUTF8toUTF16(list[i]->name),
                    NS_ConvertUTF8toUTF16(lang)));
    }
  }

  NS_DispatchToMainThread(
    NewRunnableMethod("dom::SpeechDispatcherService::RegisterVoices",
                      this,
                      &SpeechDispatcherService::RegisterVoices));

  //mInitialized = true;
}

// private methods

void
SpeechDispatcherService::RegisterVoices()
{
  RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance();
  for (auto iter = mVoices.Iter(); !iter.Done(); iter.Next()) {
    RefPtr<SpeechDispatcherVoice>& voice = iter.Data();

    // This service can only speak one utterance at a time, so we set
    // aQueuesUtterances to true in order to track global state and schedule
    // access to this service.
    DebugOnly<nsresult> rv =
      registry->AddVoice(this, iter.Key(), voice->mName, voice->mLanguage,
                         voice->mName.EqualsLiteral("default"), true);

    NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice");
  }

  mInitThread->Shutdown();
  mInitThread = nullptr;

  mInitialized = true;

  registry->NotifyVoicesChanged();
}

// nsIObserver

NS_IMETHODIMP
SpeechDispatcherService::Observe(nsISupports* aSubject, const char* aTopic,
                                 const char16_t* aData)
{
  return NS_OK;
}

// nsISpeechService

// TODO: Support SSML
NS_IMETHODIMP
SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri,
                               float aVolume, float aRate, float aPitch,
                               nsISpeechTask* aTask)
{
  if (NS_WARN_IF(!mInitialized)) {
    return NS_ERROR_NOT_AVAILABLE;
  }

  RefPtr<SpeechDispatcherCallback> callback =
    new SpeechDispatcherCallback(aTask, this);

  bool found = false;
  SpeechDispatcherVoice* voice = mVoices.GetWeak(aUri, &found);

  if(NS_WARN_IF(!(found))) {
    return NS_ERROR_NOT_AVAILABLE;
  }

  spd_set_synthesis_voice(mSpeechdClient,
                          NS_ConvertUTF16toUTF8(voice->mName).get());

  // We provide a volume of 0.0 to 1.0, speech-dispatcher expects 0 - 100.
  spd_set_volume(mSpeechdClient, static_cast<int>(aVolume * 100));

  // aRate is a value of 0.1 (0.1x) to 10 (10x) with 1 (1x) being normal rate.
  // speechd expects -100 to 100 with 0 being normal rate.
  float rate = 0;
  if (aRate > 1) {
    // Each step to 100 is logarithmically distributed up to 2.5x.
    rate = log10(std::min(aRate, MAX_RATE)) / log10(MAX_RATE) * 100;
  } else if (aRate < 1) {
    // Each step to -100 is logarithmically distributed down to 0.5x.
    rate = log10(std::max(aRate, MIN_RATE)) / log10(MIN_RATE) * -100;
  }

  spd_set_voice_rate(mSpeechdClient, static_cast<int>(rate));

  // We provide a pitch of 0 to 2 with 1 being the default.
  // speech-dispatcher expects -100 to 100 with 0 being default.
  spd_set_voice_pitch(mSpeechdClient, static_cast<int>((aPitch - 1) * 100));

  nsresult rv = aTask->Setup(callback);

  if (NS_FAILED(rv)) {
    return rv;
  }

  if (aText.Length()) {
    int msg_id = spd_say(
      mSpeechdClient, SPD_MESSAGE, NS_ConvertUTF16toUTF8(aText).get());

    if (msg_id < 0) {
      return NS_ERROR_FAILURE;
    }

    mCallbacks.Put(msg_id, callback);
  } else {
    // Speech dispatcher does not work well with empty strings.
    // In that case, don't send empty string to speechd,
    // and just emulate a speechd start and end event.
    NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>(
      "dom::SpeechDispatcherCallback::OnSpeechEvent",
      callback,
      &SpeechDispatcherCallback::OnSpeechEvent,
      SPD_EVENT_BEGIN));

    NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>(
      "dom::SpeechDispatcherCallback::OnSpeechEvent",
      callback,
      &SpeechDispatcherCallback::OnSpeechEvent,
      SPD_EVENT_END));
  }

  return NS_OK;
}

SpeechDispatcherService*
SpeechDispatcherService::GetInstance(bool create)
{
  if (XRE_GetProcessType() != GeckoProcessType_Default) {
    MOZ_ASSERT(false,
               "SpeechDispatcherService can only be started on main gecko process");
    return nullptr;
  }

  if (!sSingleton && create) {
    sSingleton = new SpeechDispatcherService();
    sSingleton->Init();
    ClearOnShutdown(&sSingleton);
  }

  return sSingleton;
}

already_AddRefed<SpeechDispatcherService>
SpeechDispatcherService::GetInstanceForService()
{
  MOZ_ASSERT(NS_IsMainThread());
  RefPtr<SpeechDispatcherService> sapiService = GetInstance();
  return sapiService.forget();
}

void
SpeechDispatcherService::EventNotify(uint32_t aMsgId, uint32_t aState)
{
  SpeechDispatcherCallback* callback = mCallbacks.GetWeak(aMsgId);

  if (callback) {
    if (callback->OnSpeechEvent((SPDNotificationType)aState)) {
      mCallbacks.Remove(aMsgId);
    }
  }
}

} // namespace dom
} // namespace mozilla

Coverage Report

Created: 2018-09-25 14:53

Line	Count	Source (jump to first uncovered line)
1		/* -- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -- */
2		/* vim: set ts=8 sts=2 et sw=2 tw=80: */
3		/* This Source Code Form is subject to the terms of the Mozilla Public
4		* License, v. 2.0. If a copy of the MPL was not distributed with this
5		* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7		#include "SpeechDispatcherService.h"
8
9		#include "mozilla/dom/nsSpeechTask.h"
10		#include "mozilla/dom/nsSynthVoiceRegistry.h"
11		#include "mozilla/ClearOnShutdown.h"
12		#include "mozilla/Preferences.h"
13		#include "nsEscape.h"
14		#include "nsISupports.h"
15		#include "nsPrintfCString.h"
16		#include "nsReadableUtils.h"
17		#include "nsServiceManagerUtils.h"
18		#include "nsThreadUtils.h"
19		#include "prlink.h"
20
21		#include <math.h>
22		#include <stdlib.h>
23
24	0	#define URI_PREFIX "urn:moz-tts:speechd:"
25
26	0	#define MAX_RATE static_cast<float>(2.5)
27	0	#define MIN_RATE static_cast<float>(0.5)
28
29		// Some structures for libspeechd
30		typedef enum {
31		SPD_EVENT_BEGIN,
32		SPD_EVENT_END,
33		SPD_EVENT_INDEX_MARK,
34		SPD_EVENT_CANCEL,
35		SPD_EVENT_PAUSE,
36		SPD_EVENT_RESUME
37		} SPDNotificationType;
38
39		typedef enum {
40		SPD_BEGIN = 1,
41		SPD_END = 2,
42		SPD_INDEX_MARKS = 4,
43		SPD_CANCEL = 8,
44		SPD_PAUSE = 16,
45		SPD_RESUME = 32,
46
47		SPD_ALL = 0x3f
48		} SPDNotification;
49
50		typedef enum {
51		SPD_MODE_SINGLE = 0,
52		SPD_MODE_THREADED = 1
53		} SPDConnectionMode;
54
55		typedef void (*SPDCallback) (size_t msg_id, size_t client_id,
56		SPDNotificationType state);
57
58		typedef void (*SPDCallbackIM) (size_t msg_id, size_t client_id,
59		SPDNotificationType state, char* index_mark);
60
61		struct SPDConnection
62		{
63		SPDCallback callback_begin;
64		SPDCallback callback_end;
65		SPDCallback callback_cancel;
66		SPDCallback callback_pause;
67		SPDCallback callback_resume;
68		SPDCallbackIM callback_im;
69
70		/* partial, more private fields in structure */
71		};
72
73		struct SPDVoice
74		{
75		char* name;
76		char* language;
77		char* variant;
78		};
79
80		typedef enum {
81		SPD_IMPORTANT = 1,
82		SPD_MESSAGE = 2,
83		SPD_TEXT = 3,
84		SPD_NOTIFICATION = 4,
85		SPD_PROGRESS = 5
86		} SPDPriority;
87
88		#define SPEECHD_FUNCTIONS \
89	0	FUNC(spd_open, SPDConnection, (const char, const char, const char, SPDConnectionMode)) \
90	0	FUNC(spd_close, void, (SPDConnection*)) \
91	0	FUNC(spd_list_synthesis_voices, SPDVoice*, (SPDConnection)) \
92	0	FUNC(spd_say, int, (SPDConnection, SPDPriority, const char)) \
93	0	FUNC(spd_cancel, int, (SPDConnection*)) \
94	0	FUNC(spd_set_volume, int, (SPDConnection*, int)) \
95	0	FUNC(spd_set_voice_rate, int, (SPDConnection*, int)) \
96	0	FUNC(spd_set_voice_pitch, int, (SPDConnection*, int)) \
97	0	FUNC(spd_set_synthesis_voice, int, (SPDConnection, const char)) \
98	0	FUNC(spd_set_notification_on, int, (SPDConnection*, SPDNotification))
99
100		#define FUNC(name, type, params) \
101		typedef type (*_##name##_fn) params; \
102		static _##name##_fn _##name;
103
104		SPEECHD_FUNCTIONS
105
106		#undef FUNC
107
108	0	#define spd_open _spd_open
109	0	#define spd_close _spd_close
110	0	#define spd_list_synthesis_voices _spd_list_synthesis_voices
111	0	#define spd_say _spd_say
112	0	#define spd_cancel _spd_cancel
113	0	#define spd_set_volume _spd_set_volume
114	0	#define spd_set_voice_rate _spd_set_voice_rate
115	0	#define spd_set_voice_pitch _spd_set_voice_pitch
116	0	#define spd_set_synthesis_voice _spd_set_synthesis_voice
117	0	#define spd_set_notification_on _spd_set_notification_on
118
119		static PRLibrary* speechdLib = nullptr;
120
121		typedef void (*nsSpeechDispatcherFunc)();
122		struct nsSpeechDispatcherDynamicFunction
123		{
124		const char* functionName;
125		nsSpeechDispatcherFunc* function;
126		};
127
128		namespace mozilla {
129		namespace dom {
130
131		StaticRefPtr<SpeechDispatcherService> SpeechDispatcherService::sSingleton;
132
133		class SpeechDispatcherVoice
134		{
135		public:
136
137		SpeechDispatcherVoice(const nsAString& aName, const nsAString& aLanguage)
138	0	: mName(aName), mLanguage(aLanguage) {}
139
140		NS_INLINE_DECL_THREADSAFE_REFCOUNTING(SpeechDispatcherVoice)
141
142		// Voice name
143		nsString mName;
144
145		// Voice language, in BCP-47 syntax
146		nsString mLanguage;
147
148		private:
149	0	~SpeechDispatcherVoice() {}
150		};
151
152
153		class SpeechDispatcherCallback final : public nsISpeechTaskCallback
154		{
155		public:
156		SpeechDispatcherCallback(nsISpeechTask* aTask, SpeechDispatcherService* aService)
157		: mTask(aTask)
158	0	, mService(aService) {}
159
160		NS_DECL_CYCLE_COLLECTING_ISUPPORTS
161		NS_DECL_CYCLE_COLLECTION_CLASS_AMBIGUOUS(SpeechDispatcherCallback, nsISpeechTaskCallback)
162
163		NS_DECL_NSISPEECHTASKCALLBACK
164
165		bool OnSpeechEvent(SPDNotificationType state);
166
167		private:
168	0	~SpeechDispatcherCallback() { }
169
170		// This pointer is used to dispatch events
171		nsCOMPtr<nsISpeechTask> mTask;
172
173		// By holding a strong reference to the service we guarantee that it won't be
174		// destroyed before this runnable.
175		RefPtr<SpeechDispatcherService> mService;
176
177		TimeStamp mStartTime;
178		};
179
180		NS_IMPL_CYCLE_COLLECTION(SpeechDispatcherCallback, mTask);
181
182	0	NS_INTERFACE_MAP_BEGIN_CYCLE_COLLECTION(SpeechDispatcherCallback)
183	0	NS_INTERFACE_MAP_ENTRY(nsISpeechTaskCallback)
184	0	NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsISpeechTaskCallback)
185	0	NS_INTERFACE_MAP_END
186
187		NS_IMPL_CYCLE_COLLECTING_ADDREF(SpeechDispatcherCallback)
188		NS_IMPL_CYCLE_COLLECTING_RELEASE(SpeechDispatcherCallback)
189
190		NS_IMETHODIMP
191		SpeechDispatcherCallback::OnPause()
192	0	{
193	0	// XXX: Speech dispatcher does not pause immediately, but waits for the speech
194	0	// to reach an index mark so that it could resume from that offset.
195	0	// There is no support for word or sentence boundaries, so index marks would
196	0	// only occur in explicit SSML marks, and we don't support that yet.
197	0	// What in actuality happens, is that if you call spd_pause(), it will speak
198	0	// the utterance in its entirety, dispatch an end event, and then put speechd
199	0	// in a 'paused' state. Since it is after the utterance ended, we don't get
200	0	// that state change, and our speech api is in an unrecoverable state.
201	0	// So, since it is useless anyway, I am not implementing pause.
202	0	return NS_OK;
203	0	}
204
205		NS_IMETHODIMP
206		SpeechDispatcherCallback::OnResume()
207	0	{
208	0	// XXX: Unsupported, see OnPause().
209	0	return NS_OK;
210	0	}
211
212		NS_IMETHODIMP
213		SpeechDispatcherCallback::OnCancel()
214	0	{
215	0	if (spd_cancel(mService->mSpeechdClient) < 0) {
216	0	return NS_ERROR_FAILURE;
217	0	}
218	0
219	0	return NS_OK;
220	0	}
221
222		NS_IMETHODIMP
223		SpeechDispatcherCallback::OnVolumeChanged(float aVolume)
224	0	{
225	0	// XXX: This currently does not change the volume mid-utterance, but it
226	0	// doesn't do anything bad either. So we could put this here with the hopes
227	0	// that speechd supports this in the future.
228	0	if (spd_set_volume(mService->mSpeechdClient, static_cast<int>(aVolume * 100)) < 0) {
229	0	return NS_ERROR_FAILURE;
230	0	}
231	0
232	0	return NS_OK;
233	0	}
234
235		bool
236		SpeechDispatcherCallback::OnSpeechEvent(SPDNotificationType state)
237	0	{
238	0	bool remove = false;
239	0
240	0	switch (state) {
241	0	case SPD_EVENT_BEGIN:
242	0	mStartTime = TimeStamp::Now();
243	0	mTask->DispatchStart();
244	0	break;
245	0
246	0	case SPD_EVENT_PAUSE:
247	0	mTask->DispatchPause((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
248	0	break;
249	0
250	0	case SPD_EVENT_RESUME:
251	0	mTask->DispatchResume((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
252	0	break;
253	0
254	0	case SPD_EVENT_CANCEL:
255	0	case SPD_EVENT_END:
256	0	mTask->DispatchEnd((TimeStamp::Now() - mStartTime).ToSeconds(), 0);
257	0	remove = true;
258	0	break;
259	0
260	0	case SPD_EVENT_INDEX_MARK:
261	0	// Not yet supported
262	0	break;
263	0
264	0	default:
265	0	break;
266	0	}
267	0
268	0	return remove;
269	0	}
270
271		static void
272		speechd_cb(size_t msg_id, size_t client_id, SPDNotificationType state)
273	0	{
274	0	SpeechDispatcherService* service = SpeechDispatcherService::GetInstance(false);
275	0
276	0	if (service) {
277	0	NS_DispatchToMainThread(NewRunnableMethod<uint32_t, SPDNotificationType>(
278	0	"dom::SpeechDispatcherService::EventNotify",
279	0	service,
280	0	&SpeechDispatcherService::EventNotify,
281	0	static_cast<uint32_t>(msg_id),
282	0	state));
283	0	}
284	0	}
285
286
287	0	NS_INTERFACE_MAP_BEGIN(SpeechDispatcherService)
288	0	NS_INTERFACE_MAP_ENTRY(nsISpeechService)
289	0	NS_INTERFACE_MAP_ENTRY(nsIObserver)
290	0	NS_INTERFACE_MAP_ENTRY_AMBIGUOUS(nsISupports, nsIObserver)
291	0	NS_INTERFACE_MAP_END
292
293		NS_IMPL_ADDREF(SpeechDispatcherService)
294		NS_IMPL_RELEASE(SpeechDispatcherService)
295
296		SpeechDispatcherService::SpeechDispatcherService()
297		: mInitialized(false)
298		, mSpeechdClient(nullptr)
299	0	{
300	0	}
301
302		void
303		SpeechDispatcherService::Init()
304	0	{
305	0	if (!Preferences::GetBool("media.webspeech.synth.enabled") \|\|
306	0	Preferences::GetBool("media.webspeech.synth.test")) {
307	0	return;
308	0	}
309	0
310	0	// While speech dispatcher has a "threaded" mode, only spd_say() is async.
311	0	// Since synchronous socket i/o could impact startup time, we do
312	0	// initialization in a separate thread.
313	0	DebugOnly<nsresult> rv = NS_NewNamedThread("speechd init",
314	0	getter_AddRefs(mInitThread));
315	0	MOZ_ASSERT(NS_SUCCEEDED(rv));
316	0	rv = mInitThread->Dispatch(
317	0	NewRunnableMethod("dom::SpeechDispatcherService::Setup",
318	0	this,
319	0	&SpeechDispatcherService::Setup),
320	0	NS_DISPATCH_NORMAL);
321	0	MOZ_ASSERT(NS_SUCCEEDED(rv));
322	0	}
323
324		SpeechDispatcherService::~SpeechDispatcherService()
325	0	{
326	0	if (mInitThread) {
327	0	mInitThread->Shutdown();
328	0	}
329	0
330	0	if (mSpeechdClient) {
331	0	spd_close(mSpeechdClient);
332	0	}
333	0	}
334
335		void
336		SpeechDispatcherService::Setup()
337	0	{
338	0	#define FUNC(name, type, params) { #name, (nsSpeechDispatcherFunc *)&_##name },
339	0	static const nsSpeechDispatcherDynamicFunction kSpeechDispatcherSymbols[] = {
340	0	SPEECHD_FUNCTIONS
341	0	};
342	0	#undef FUNC
343	0
344	0	MOZ_ASSERT(!mInitialized);
345	0
346	0	speechdLib = PR_LoadLibrary("libspeechd.so.2");
347	0
348	0	if (!speechdLib) {
349	0	NS_WARNING("Failed to load speechd library");
350	0	return;
351	0	}
352	0
353	0	if (!PR_FindFunctionSymbol(speechdLib, "spd_get_volume")) {
354	0	// There is no version getter function, so we rely on a symbol that was
355	0	// introduced in release 0.8.2 in order to check for ABI compatibility.
356	0	NS_WARNING("Unsupported version of speechd detected");
357	0	return;
358	0	}
359	0
360	0	for (uint32_t i = 0; i < ArrayLength(kSpeechDispatcherSymbols); i++) {
361	0	*kSpeechDispatcherSymbols[i].function =
362	0	PR_FindFunctionSymbol(speechdLib, kSpeechDispatcherSymbols[i].functionName);
363	0
364	0	if (!*kSpeechDispatcherSymbols[i].function) {
365	0	NS_WARNING(nsPrintfCString("Failed to find speechd symbol for'%s'",
366	0	kSpeechDispatcherSymbols[i].functionName).get());
367	0	return;
368	0	}
369	0	}
370	0
371	0	mSpeechdClient = spd_open("firefox", "web speech api", "who", SPD_MODE_THREADED);
372	0	if (!mSpeechdClient) {
373	0	NS_WARNING("Failed to call spd_open");
374	0	return;
375	0	}
376	0
377	0	// Get all the voices from sapi and register in the SynthVoiceRegistry
378	0	SPDVoice** list = spd_list_synthesis_voices(mSpeechdClient);
379	0
380	0	mSpeechdClient->callback_begin = speechd_cb;
381	0	mSpeechdClient->callback_end = speechd_cb;
382	0	mSpeechdClient->callback_cancel = speechd_cb;
383	0	mSpeechdClient->callback_pause = speechd_cb;
384	0	mSpeechdClient->callback_resume = speechd_cb;
385	0
386	0	spd_set_notification_on(mSpeechdClient, SPD_BEGIN);
387	0	spd_set_notification_on(mSpeechdClient, SPD_END);
388	0	spd_set_notification_on(mSpeechdClient, SPD_CANCEL);
389	0
390	0	if (list != NULL) {
391	0	for (int i = 0; list[i]; i++) {
392	0	nsAutoString uri;
393	0
394	0	uri.AssignLiteral(URI_PREFIX);
395	0	nsAutoCString name;
396	0	NS_EscapeURL(list[i]->name, -1, esc_OnlyNonASCII \| esc_Spaces \| esc_AlwaysCopy, name);
397	0	uri.Append(NS_ConvertUTF8toUTF16(name));;
398	0	uri.AppendLiteral("?");
399	0
400	0	nsAutoCString lang(list[i]->language);
401	0
402	0	if (strcmp(list[i]->variant, "none") != 0) {
403	0	// In speech dispatcher, the variant will usually be the locale subtag
404	0	// with another, non-standard suptag after it. We keep the first one
405	0	// and convert it to uppercase.
406	0	const char* v = list[i]->variant;
407	0	const char* hyphen = strchr(v, '-');
408	0	nsDependentCSubstring variant(v, hyphen ? hyphen - v : strlen(v));
409	0	ToUpperCase(variant);
410	0
411	0	// eSpeak uses UK which is not a valid region subtag in BCP47.
412	0	if (variant.EqualsLiteral("UK")) {
413	0	variant.AssignLiteral("GB");
414	0	}
415	0
416	0	lang.AppendLiteral("-");
417	0	lang.Append(variant);
418	0	}
419	0
420	0	uri.Append(NS_ConvertUTF8toUTF16(lang));
421	0
422	0	mVoices.Put(uri, new SpeechDispatcherVoice(
423	0	NS_ConvertUTF8toUTF16(list[i]->name),
424	0	NS_ConvertUTF8toUTF16(lang)));
425	0	}
426	0	}
427	0
428	0	NS_DispatchToMainThread(
429	0	NewRunnableMethod("dom::SpeechDispatcherService::RegisterVoices",
430	0	this,
431	0	&SpeechDispatcherService::RegisterVoices));
432	0
433	0	//mInitialized = true;
434	0	}
435
436		// private methods
437
438		void
439		SpeechDispatcherService::RegisterVoices()
440	0	{
441	0	RefPtr<nsSynthVoiceRegistry> registry = nsSynthVoiceRegistry::GetInstance();
442	0	for (auto iter = mVoices.Iter(); !iter.Done(); iter.Next()) {
443	0	RefPtr<SpeechDispatcherVoice>& voice = iter.Data();
444	0
445	0	// This service can only speak one utterance at a time, so we set
446	0	// aQueuesUtterances to true in order to track global state and schedule
447	0	// access to this service.
448	0	DebugOnly<nsresult> rv =
449	0	registry->AddVoice(this, iter.Key(), voice->mName, voice->mLanguage,
450	0	voice->mName.EqualsLiteral("default"), true);
451	0
452	0	NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "Failed to add voice");
453	0	}
454	0
455	0	mInitThread->Shutdown();
456	0	mInitThread = nullptr;
457	0
458	0	mInitialized = true;
459	0
460	0	registry->NotifyVoicesChanged();
461	0	}
462
463		// nsIObserver
464
465		NS_IMETHODIMP
466		SpeechDispatcherService::Observe(nsISupports* aSubject, const char* aTopic,
467		const char16_t* aData)
468	0	{
469	0	return NS_OK;
470	0	}
471
472		// nsISpeechService
473
474		// TODO: Support SSML
475		NS_IMETHODIMP
476		SpeechDispatcherService::Speak(const nsAString& aText, const nsAString& aUri,
477		float aVolume, float aRate, float aPitch,
478		nsISpeechTask* aTask)
479	0	{
480	0	if (NS_WARN_IF(!mInitialized)) {
481	0	return NS_ERROR_NOT_AVAILABLE;
482	0	}
483	0
484	0	RefPtr<SpeechDispatcherCallback> callback =
485	0	new SpeechDispatcherCallback(aTask, this);
486	0
487	0	bool found = false;
488	0	SpeechDispatcherVoice* voice = mVoices.GetWeak(aUri, &found);
489	0
490	0	if(NS_WARN_IF(!(found))) {
491	0	return NS_ERROR_NOT_AVAILABLE;
492	0	}
493	0
494	0	spd_set_synthesis_voice(mSpeechdClient,
495	0	NS_ConvertUTF16toUTF8(voice->mName).get());
496	0
497	0	// We provide a volume of 0.0 to 1.0, speech-dispatcher expects 0 - 100.
498	0	spd_set_volume(mSpeechdClient, static_cast<int>(aVolume * 100));
499	0
500	0	// aRate is a value of 0.1 (0.1x) to 10 (10x) with 1 (1x) being normal rate.
501	0	// speechd expects -100 to 100 with 0 being normal rate.
502	0	float rate = 0;
503	0	if (aRate > 1) {
504	0	// Each step to 100 is logarithmically distributed up to 2.5x.
505	0	rate = log10(std::min(aRate, MAX_RATE)) / log10(MAX_RATE) * 100;
506	0	} else if (aRate < 1) {
507	0	// Each step to -100 is logarithmically distributed down to 0.5x.
508	0	rate = log10(std::max(aRate, MIN_RATE)) / log10(MIN_RATE) * -100;
509	0	}
510	0
511	0	spd_set_voice_rate(mSpeechdClient, static_cast<int>(rate));
512	0
513	0	// We provide a pitch of 0 to 2 with 1 being the default.
514	0	// speech-dispatcher expects -100 to 100 with 0 being default.
515	0	spd_set_voice_pitch(mSpeechdClient, static_cast<int>((aPitch - 1) * 100));
516	0
517	0	nsresult rv = aTask->Setup(callback);
518	0
519	0	if (NS_FAILED(rv)) {
520	0	return rv;
521	0	}
522	0
523	0	if (aText.Length()) {
524	0	int msg_id = spd_say(
525	0	mSpeechdClient, SPD_MESSAGE, NS_ConvertUTF16toUTF8(aText).get());
526	0
527	0	if (msg_id < 0) {
528	0	return NS_ERROR_FAILURE;
529	0	}
530	0
531	0	mCallbacks.Put(msg_id, callback);
532	0	} else {
533	0	// Speech dispatcher does not work well with empty strings.
534	0	// In that case, don't send empty string to speechd,
535	0	// and just emulate a speechd start and end event.
536	0	NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>(
537	0	"dom::SpeechDispatcherCallback::OnSpeechEvent",
538	0	callback,
539	0	&SpeechDispatcherCallback::OnSpeechEvent,
540	0	SPD_EVENT_BEGIN));
541	0
542	0	NS_DispatchToMainThread(NewRunnableMethod<SPDNotificationType>(
543	0	"dom::SpeechDispatcherCallback::OnSpeechEvent",
544	0	callback,
545	0	&SpeechDispatcherCallback::OnSpeechEvent,
546	0	SPD_EVENT_END));
547	0	}
548	0
549	0	return NS_OK;
550	0	}
551
552		SpeechDispatcherService*
553		SpeechDispatcherService::GetInstance(bool create)
554	0	{
555	0	if (XRE_GetProcessType() != GeckoProcessType_Default) {
556	0	MOZ_ASSERT(false,
557	0	"SpeechDispatcherService can only be started on main gecko process");
558	0	return nullptr;
559	0	}
560	0
561	0	if (!sSingleton && create) {
562	0	sSingleton = new SpeechDispatcherService();
563	0	sSingleton->Init();
564	0	ClearOnShutdown(&sSingleton);
565	0	}
566	0
567	0	return sSingleton;
568	0	}
569
570		already_AddRefed<SpeechDispatcherService>
571		SpeechDispatcherService::GetInstanceForService()
572	0	{
573	0	MOZ_ASSERT(NS_IsMainThread());
574	0	RefPtr<SpeechDispatcherService> sapiService = GetInstance();
575	0	return sapiService.forget();
576	0	}
577
578		void
579		SpeechDispatcherService::EventNotify(uint32_t aMsgId, uint32_t aState)
580	0	{
581	0	SpeechDispatcherCallback* callback = mCallbacks.GetWeak(aMsgId);
582	0
583	0	if (callback) {
584	0	if (callback->OnSpeechEvent((SPDNotificationType)aState)) {
585	0	mCallbacks.Remove(aMsgId);
586	0	}
587	0	}
588	0	}
589
590		} // namespace dom
591		} // namespace mozilla