| 1 | // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | package org.chromium.content.browser; |
| 6 | |
| 7 | import android.content.ComponentName; |
| 8 | import android.content.Context; |
| 9 | import android.content.Intent; |
| 10 | import android.content.pm.PackageManager; |
| 11 | import android.content.pm.PackageManager.NameNotFoundException; |
| 12 | import android.content.pm.ResolveInfo; |
| 13 | import android.content.pm.ServiceInfo; |
| 14 | import android.os.Bundle; |
| 15 | import android.speech.RecognitionListener; |
| 16 | import android.speech.RecognitionService; |
| 17 | import android.speech.RecognizerIntent; |
| 18 | import android.speech.SpeechRecognizer; |
| 19 | |
| 20 | import org.chromium.base.CalledByNative; |
| 21 | import org.chromium.base.JNINamespace; |
| 22 | import org.chromium.content.browser.SpeechRecognitionError; |
| 23 | |
| 24 | import java.util.ArrayList; |
| 25 | import java.util.List; |
| 26 | |
| 27 | /** |
| 28 | * This class uses Android's SpeechRecognizer to perform speech recognition for the Web Speech API |
| 29 | * on Android. Using Android's platform recognizer offers several benefits, like good quality and |
| 30 | * good local fallback when no data connection is available. |
| 31 | */ |
| 32 | @JNINamespace("content") |
| 33 | public class SpeechRecognition { |
| 34 | |
| 35 | // Constants describing the speech recognition provider we depend on. |
| 36 | private static final String PROVIDER_PACKAGE_NAME = "com.google.android.googlequicksearchbox"; |
| 37 | private static final int PROVIDER_MIN_VERSION = 300207030; |
| 38 | |
| 39 | // We track the recognition state to remember what events we need to send when recognition is |
| 40 | // being aborted. Once Android's recognizer is cancelled, its listener won't yield any more |
| 41 | // events, but we still need to call OnSoundEnd and OnAudioEnd if corresponding On*Start were |
| 42 | // called before. |
| 43 | private static final int STATE_IDLE = 0; |
| 44 | private static final int STATE_AWAITING_SPEECH = 1; |
| 45 | private static final int STATE_CAPTURING_SPEECH = 2; |
| 46 | private int mState; |
| 47 | |
| 48 | // The speech recognition provider (if any) matching PROVIDER_PACKAGE_NAME and |
| 49 | // PROVIDER_MIN_VERSION as selected by initialize(). |
| 50 | private static ComponentName mRecognitionProvider; |
| 51 | |
| 52 | private final Context mContext; |
| 53 | private final Intent mIntent; |
| 54 | private final RecognitionListener mListener; |
| 55 | private SpeechRecognizer mRecognizer; |
| 56 | |
| 57 | // Native pointer to C++ SpeechRecognizerImplAndroid. |
| 58 | private int mNativeSpeechRecognizerImplAndroid; |
| 59 | |
| 60 | // Remember if we are using continuous recognition. |
| 61 | private boolean mContinuous; |
| 62 | |
| 63 | // Internal class to handle events from Android's SpeechRecognizer and route them to native. |
| 64 | class Listener implements RecognitionListener { |
| 65 | |
| 66 | @Override |
| 67 | public void onBeginningOfSpeech() { |
| 68 | mState = STATE_CAPTURING_SPEECH; |
| 69 | nativeOnSoundStart(mNativeSpeechRecognizerImplAndroid); |
| 70 | } |
| 71 | |
| 72 | @Override |
| 73 | public void onBufferReceived(byte[] buffer) { } |
| 74 | |
| 75 | @Override |
| 76 | public void onEndOfSpeech() { |
| 77 | // Ignore onEndOfSpeech in continuous mode to let terminate() take care of ending |
| 78 | // events. The Android API documentation is vague as to when onEndOfSpeech is called in |
| 79 | // continuous mode, whereas the Web Speech API defines a stronger semantic on the |
| 80 | // equivalent (onsoundend) event. Thus, the only way to provide a valid onsoundend |
| 81 | // event is to trigger it when the last result is received or the session is aborted. |
| 82 | if (!mContinuous) { |
| 83 | nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid); |
| 84 | // Since Android doesn't have a dedicated event for when audio capture is finished, |
| 85 | // we fire it after speech has ended. |
| 86 | nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid); |
| 87 | mState = STATE_IDLE; |
| 88 | } |
| 89 | } |
| 90 | |
| 91 | @Override |
| 92 | public void onError(int error) { |
| 93 | int code = SpeechRecognitionError.NONE; |
| 94 | |
| 95 | // Translate Android SpeechRecognizer errors to Web Speech API errors. |
| 96 | switch(error) { |
| 97 | case SpeechRecognizer.ERROR_AUDIO: |
| 98 | code = SpeechRecognitionError.AUDIO; |
| 99 | break; |
| 100 | case SpeechRecognizer.ERROR_CLIENT: |
| 101 | code = SpeechRecognitionError.ABORTED; |
| 102 | break; |
| 103 | case SpeechRecognizer.ERROR_RECOGNIZER_BUSY: |
| 104 | case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS: |
| 105 | code = SpeechRecognitionError.NOT_ALLOWED; |
| 106 | break; |
| 107 | case SpeechRecognizer.ERROR_NETWORK_TIMEOUT: |
| 108 | case SpeechRecognizer.ERROR_NETWORK: |
| 109 | case SpeechRecognizer.ERROR_SERVER: |
| 110 | code = SpeechRecognitionError.NETWORK; |
| 111 | break; |
| 112 | case SpeechRecognizer.ERROR_NO_MATCH: |
| 113 | code = SpeechRecognitionError.NO_MATCH; |
| 114 | break; |
| 115 | case SpeechRecognizer.ERROR_SPEECH_TIMEOUT: |
| 116 | code = SpeechRecognitionError.NO_SPEECH; |
| 117 | break; |
| 118 | default: |
| 119 | assert false; |
| 120 | return; |
| 121 | } |
| 122 | |
| 123 | terminate(code); |
| 124 | } |
| 125 | |
| 126 | @Override |
| 127 | public void onEvent(int event, Bundle bundle) { } |
| 128 | |
| 129 | @Override |
| 130 | public void onPartialResults(Bundle bundle) { |
| 131 | handleResults(bundle, true); |
| 132 | } |
| 133 | |
| 134 | @Override |
| 135 | public void onReadyForSpeech(Bundle bundle) { |
| 136 | mState = STATE_AWAITING_SPEECH; |
| 137 | nativeOnAudioStart(mNativeSpeechRecognizerImplAndroid); |
| 138 | } |
| 139 | |
| 140 | @Override |
| 141 | public void onResults(Bundle bundle) { |
| 142 | handleResults(bundle, false); |
| 143 | // We assume that onResults is called only once, at the end of a session, thus we |
| 144 | // terminate. If one day the recognition provider changes dictation mode behavior to |
| 145 | // call onResults several times, we should terminate only if (!mContinuous). |
| 146 | terminate(SpeechRecognitionError.NONE); |
| 147 | } |
| 148 | |
| 149 | @Override |
| 150 | public void onRmsChanged(float rms) { } |
| 151 | |
| 152 | private void handleResults(Bundle bundle, boolean provisional) { |
| 153 | if (mContinuous && provisional) { |
| 154 | // In continuous mode, Android's recognizer sends final results as provisional. |
| 155 | provisional = false; |
| 156 | } |
| 157 | |
| 158 | ArrayList<String> list = bundle.getStringArrayList( |
| 159 | SpeechRecognizer.RESULTS_RECOGNITION); |
| 160 | String[] results = list.toArray(new String[list.size()]); |
| 161 | |
| 162 | float[] scores = bundle.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES); |
| 163 | |
| 164 | nativeOnRecognitionResults(mNativeSpeechRecognizerImplAndroid, |
| 165 | results, |
| 166 | scores, |
| 167 | provisional); |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | // This method must be called before any instance of SpeechRecognition can be created. It will |
| 172 | // query Android's package manager to find a suitable speech recognition provider that supports |
| 173 | // continuous recognition. |
| 174 | public static boolean initialize(Context context) { |
| 175 | if (!SpeechRecognizer.isRecognitionAvailable(context)) |
| 176 | return false; |
| 177 | |
| 178 | PackageManager pm = context.getPackageManager(); |
| 179 | Intent intent = new Intent(RecognitionService.SERVICE_INTERFACE); |
| 180 | final List<ResolveInfo> list = pm.queryIntentServices(intent, PackageManager.GET_SERVICES); |
| 181 | |
| 182 | for (ResolveInfo resolve : list) { |
| 183 | ServiceInfo service = resolve.serviceInfo; |
| 184 | |
| 185 | if (!service.packageName.equals(PROVIDER_PACKAGE_NAME)) |
| 186 | continue; |
| 187 | |
| 188 | int versionCode; |
| 189 | try { |
| 190 | versionCode = pm.getPackageInfo(service.packageName, 0).versionCode; |
| 191 | } catch (NameNotFoundException e) { |
| 192 | continue; |
| 193 | } |
| 194 | |
| 195 | if (versionCode < PROVIDER_MIN_VERSION) |
| 196 | continue; |
| 197 | |
| 198 | mRecognitionProvider = new ComponentName(service.packageName, service.name); |
| 199 | |
| 200 | return true; |
| 201 | } |
| 202 | |
| 203 | // If we reach this point, we failed to find a suitable recognition provider. |
| 204 | return false; |
| 205 | } |
| 206 | |
| 207 | private SpeechRecognition(final Context context, int nativeSpeechRecognizerImplAndroid) { |
| 208 | mContext = context; |
| 209 | mContinuous = false; |
| 210 | mNativeSpeechRecognizerImplAndroid = nativeSpeechRecognizerImplAndroid; |
| 211 | mListener = new Listener(); |
| 212 | mIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH); |
| 213 | |
| 214 | if (mRecognitionProvider != null) { |
| 215 | mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext, mRecognitionProvider); |
| 216 | } else { |
| 217 | // It is possible to force-enable the speech recognition web platform feature (using a |
| 218 | // command-line flag) even if initialize() failed to find the PROVIDER_PACKAGE_NAME |
| 219 | // provider, in which case the first available speech recognition provider is used. |
| 220 | // Caveat: Continuous mode may not work as expected with a different provider. |
| 221 | mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext); |
| 222 | } |
| 223 | |
| 224 | mRecognizer.setRecognitionListener(mListener); |
| 225 | } |
| 226 | |
| 227 | // This function destroys everything when recognition is done, taking care to properly tear |
| 228 | // down by calling On{Sound,Audio}End if corresponding On{Audio,Sound}Start were called. |
| 229 | private void terminate(int error) { |
| 230 | |
| 231 | if (mState != STATE_IDLE) { |
| 232 | if (mState == STATE_CAPTURING_SPEECH) { |
| 233 | nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid); |
| 234 | } |
| 235 | nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid); |
| 236 | mState = STATE_IDLE; |
| 237 | } |
| 238 | |
| 239 | if (error != SpeechRecognitionError.NONE) |
| 240 | nativeOnRecognitionError(mNativeSpeechRecognizerImplAndroid, error); |
| 241 | |
| 242 | mRecognizer.destroy(); |
| 243 | mRecognizer = null; |
| 244 | nativeOnRecognitionEnd(mNativeSpeechRecognizerImplAndroid); |
| 245 | mNativeSpeechRecognizerImplAndroid = 0; |
| 246 | } |
| 247 | |
| 248 | @CalledByNative |
| 249 | private static SpeechRecognition createSpeechRecognition( |
| 250 | Context context, int nativeSpeechRecognizerImplAndroid) { |
| 251 | return new SpeechRecognition(context, nativeSpeechRecognizerImplAndroid); |
| 252 | } |
| 253 | |
| 254 | @CalledByNative |
| 255 | private void startRecognition(String language, boolean continuous, boolean interim_results) { |
| 256 | if (mRecognizer == null) |
| 257 | return; |
| 258 | |
| 259 | mContinuous = continuous; |
| 260 | mIntent.putExtra("android.speech.extra.DICTATION_MODE", continuous); |
| 261 | mIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, language); |
| 262 | mIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, interim_results); |
| 263 | mRecognizer.startListening(mIntent); |
| 264 | } |
| 265 | |
| 266 | @CalledByNative |
| 267 | private void abortRecognition() { |
| 268 | if (mRecognizer == null) |
| 269 | return; |
| 270 | |
| 271 | mRecognizer.cancel(); |
| 272 | terminate(SpeechRecognitionError.ABORTED); |
| 273 | } |
| 274 | |
| 275 | @CalledByNative |
| 276 | private void stopRecognition() { |
| 277 | if (mRecognizer == null) |
| 278 | return; |
| 279 | |
| 280 | mContinuous = false; |
| 281 | mRecognizer.stopListening(); |
| 282 | } |
| 283 | |
| 284 | // Native JNI calls to content/browser/speech/speech_recognizer_impl_android.cc |
| 285 | private native void nativeOnAudioStart(int nativeSpeechRecognizerImplAndroid); |
| 286 | private native void nativeOnSoundStart(int nativeSpeechRecognizerImplAndroid); |
| 287 | private native void nativeOnSoundEnd(int nativeSpeechRecognizerImplAndroid); |
| 288 | private native void nativeOnAudioEnd(int nativeSpeechRecognizerImplAndroid); |
| 289 | private native void nativeOnRecognitionResults(int nativeSpeechRecognizerImplAndroid, |
| 290 | String[] results, |
| 291 | float[] scores, |
| 292 | boolean provisional); |
| 293 | private native void nativeOnRecognitionError(int nativeSpeechRecognizerImplAndroid, int error); |
| 294 | private native void nativeOnRecognitionEnd(int nativeSpeechRecognizerImplAndroid); |
| 295 | } |