1 | // Copyright 2013 The Chromium Authors. All rights reserved. |
2 | // Use of this source code is governed by a BSD-style license that can be |
3 | // found in the LICENSE file. |
4 | |
5 | package org.chromium.content.browser; |
6 | |
7 | import android.content.ComponentName; |
8 | import android.content.Context; |
9 | import android.content.Intent; |
10 | import android.content.pm.PackageManager; |
11 | import android.content.pm.PackageManager.NameNotFoundException; |
12 | import android.content.pm.ResolveInfo; |
13 | import android.content.pm.ServiceInfo; |
14 | import android.os.Bundle; |
15 | import android.speech.RecognitionListener; |
16 | import android.speech.RecognitionService; |
17 | import android.speech.RecognizerIntent; |
18 | import android.speech.SpeechRecognizer; |
19 | |
20 | import org.chromium.base.CalledByNative; |
21 | import org.chromium.base.JNINamespace; |
22 | import org.chromium.content.browser.SpeechRecognitionError; |
23 | |
24 | import java.util.ArrayList; |
25 | import java.util.List; |
26 | |
27 | /** |
28 | * This class uses Android's SpeechRecognizer to perform speech recognition for the Web Speech API |
29 | * on Android. Using Android's platform recognizer offers several benefits, like good quality and |
30 | * good local fallback when no data connection is available. |
31 | */ |
32 | @JNINamespace("content") |
33 | public class SpeechRecognition { |
34 | |
35 | // Constants describing the speech recognition provider we depend on. |
36 | private static final String PROVIDER_PACKAGE_NAME = "com.google.android.googlequicksearchbox"; |
37 | private static final int PROVIDER_MIN_VERSION = 300207030; |
38 | |
39 | // We track the recognition state to remember what events we need to send when recognition is |
40 | // being aborted. Once Android's recognizer is cancelled, its listener won't yield any more |
41 | // events, but we still need to call OnSoundEnd and OnAudioEnd if corresponding On*Start were |
42 | // called before. |
43 | private static final int STATE_IDLE = 0; |
44 | private static final int STATE_AWAITING_SPEECH = 1; |
45 | private static final int STATE_CAPTURING_SPEECH = 2; |
46 | private int mState; |
47 | |
48 | // The speech recognition provider (if any) matching PROVIDER_PACKAGE_NAME and |
49 | // PROVIDER_MIN_VERSION as selected by initialize(). |
50 | private static ComponentName mRecognitionProvider; |
51 | |
52 | private final Context mContext; |
53 | private final Intent mIntent; |
54 | private final RecognitionListener mListener; |
55 | private SpeechRecognizer mRecognizer; |
56 | |
57 | // Native pointer to C++ SpeechRecognizerImplAndroid. |
58 | private int mNativeSpeechRecognizerImplAndroid; |
59 | |
60 | // Remember if we are using continuous recognition. |
61 | private boolean mContinuous; |
62 | |
63 | // Internal class to handle events from Android's SpeechRecognizer and route them to native. |
64 | class Listener implements RecognitionListener { |
65 | |
66 | @Override |
67 | public void onBeginningOfSpeech() { |
68 | mState = STATE_CAPTURING_SPEECH; |
69 | nativeOnSoundStart(mNativeSpeechRecognizerImplAndroid); |
70 | } |
71 | |
72 | @Override |
73 | public void onBufferReceived(byte[] buffer) { } |
74 | |
75 | @Override |
76 | public void onEndOfSpeech() { |
77 | // Ignore onEndOfSpeech in continuous mode to let terminate() take care of ending |
78 | // events. The Android API documentation is vague as to when onEndOfSpeech is called in |
79 | // continuous mode, whereas the Web Speech API defines a stronger semantic on the |
80 | // equivalent (onsoundend) event. Thus, the only way to provide a valid onsoundend |
81 | // event is to trigger it when the last result is received or the session is aborted. |
82 | if (!mContinuous) { |
83 | nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid); |
84 | // Since Android doesn't have a dedicated event for when audio capture is finished, |
85 | // we fire it after speech has ended. |
86 | nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid); |
87 | mState = STATE_IDLE; |
88 | } |
89 | } |
90 | |
91 | @Override |
92 | public void onError(int error) { |
93 | int code = SpeechRecognitionError.NONE; |
94 | |
95 | // Translate Android SpeechRecognizer errors to Web Speech API errors. |
96 | switch(error) { |
97 | case SpeechRecognizer.ERROR_AUDIO: |
98 | code = SpeechRecognitionError.AUDIO; |
99 | break; |
100 | case SpeechRecognizer.ERROR_CLIENT: |
101 | code = SpeechRecognitionError.ABORTED; |
102 | break; |
103 | case SpeechRecognizer.ERROR_RECOGNIZER_BUSY: |
104 | case SpeechRecognizer.ERROR_INSUFFICIENT_PERMISSIONS: |
105 | code = SpeechRecognitionError.NOT_ALLOWED; |
106 | break; |
107 | case SpeechRecognizer.ERROR_NETWORK_TIMEOUT: |
108 | case SpeechRecognizer.ERROR_NETWORK: |
109 | case SpeechRecognizer.ERROR_SERVER: |
110 | code = SpeechRecognitionError.NETWORK; |
111 | break; |
112 | case SpeechRecognizer.ERROR_NO_MATCH: |
113 | code = SpeechRecognitionError.NO_MATCH; |
114 | break; |
115 | case SpeechRecognizer.ERROR_SPEECH_TIMEOUT: |
116 | code = SpeechRecognitionError.NO_SPEECH; |
117 | break; |
118 | default: |
119 | assert false; |
120 | return; |
121 | } |
122 | |
123 | terminate(code); |
124 | } |
125 | |
126 | @Override |
127 | public void onEvent(int event, Bundle bundle) { } |
128 | |
129 | @Override |
130 | public void onPartialResults(Bundle bundle) { |
131 | handleResults(bundle, true); |
132 | } |
133 | |
134 | @Override |
135 | public void onReadyForSpeech(Bundle bundle) { |
136 | mState = STATE_AWAITING_SPEECH; |
137 | nativeOnAudioStart(mNativeSpeechRecognizerImplAndroid); |
138 | } |
139 | |
140 | @Override |
141 | public void onResults(Bundle bundle) { |
142 | handleResults(bundle, false); |
143 | // We assume that onResults is called only once, at the end of a session, thus we |
144 | // terminate. If one day the recognition provider changes dictation mode behavior to |
145 | // call onResults several times, we should terminate only if (!mContinuous). |
146 | terminate(SpeechRecognitionError.NONE); |
147 | } |
148 | |
149 | @Override |
150 | public void onRmsChanged(float rms) { } |
151 | |
152 | private void handleResults(Bundle bundle, boolean provisional) { |
153 | if (mContinuous && provisional) { |
154 | // In continuous mode, Android's recognizer sends final results as provisional. |
155 | provisional = false; |
156 | } |
157 | |
158 | ArrayList<String> list = bundle.getStringArrayList( |
159 | SpeechRecognizer.RESULTS_RECOGNITION); |
160 | String[] results = list.toArray(new String[list.size()]); |
161 | |
162 | float[] scores = bundle.getFloatArray(SpeechRecognizer.CONFIDENCE_SCORES); |
163 | |
164 | nativeOnRecognitionResults(mNativeSpeechRecognizerImplAndroid, |
165 | results, |
166 | scores, |
167 | provisional); |
168 | } |
169 | } |
170 | |
171 | // This method must be called before any instance of SpeechRecognition can be created. It will |
172 | // query Android's package manager to find a suitable speech recognition provider that supports |
173 | // continuous recognition. |
174 | public static boolean initialize(Context context) { |
175 | if (!SpeechRecognizer.isRecognitionAvailable(context)) |
176 | return false; |
177 | |
178 | PackageManager pm = context.getPackageManager(); |
179 | Intent intent = new Intent(RecognitionService.SERVICE_INTERFACE); |
180 | final List<ResolveInfo> list = pm.queryIntentServices(intent, PackageManager.GET_SERVICES); |
181 | |
182 | for (ResolveInfo resolve : list) { |
183 | ServiceInfo service = resolve.serviceInfo; |
184 | |
185 | if (!service.packageName.equals(PROVIDER_PACKAGE_NAME)) |
186 | continue; |
187 | |
188 | int versionCode; |
189 | try { |
190 | versionCode = pm.getPackageInfo(service.packageName, 0).versionCode; |
191 | } catch (NameNotFoundException e) { |
192 | continue; |
193 | } |
194 | |
195 | if (versionCode < PROVIDER_MIN_VERSION) |
196 | continue; |
197 | |
198 | mRecognitionProvider = new ComponentName(service.packageName, service.name); |
199 | |
200 | return true; |
201 | } |
202 | |
203 | // If we reach this point, we failed to find a suitable recognition provider. |
204 | return false; |
205 | } |
206 | |
207 | private SpeechRecognition(final Context context, int nativeSpeechRecognizerImplAndroid) { |
208 | mContext = context; |
209 | mContinuous = false; |
210 | mNativeSpeechRecognizerImplAndroid = nativeSpeechRecognizerImplAndroid; |
211 | mListener = new Listener(); |
212 | mIntent = new Intent(RecognizerIntent.ACTION_RECOGNIZE_SPEECH); |
213 | |
214 | if (mRecognitionProvider != null) { |
215 | mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext, mRecognitionProvider); |
216 | } else { |
217 | // It is possible to force-enable the speech recognition web platform feature (using a |
218 | // command-line flag) even if initialize() failed to find the PROVIDER_PACKAGE_NAME |
219 | // provider, in which case the first available speech recognition provider is used. |
220 | // Caveat: Continuous mode may not work as expected with a different provider. |
221 | mRecognizer = SpeechRecognizer.createSpeechRecognizer(mContext); |
222 | } |
223 | |
224 | mRecognizer.setRecognitionListener(mListener); |
225 | } |
226 | |
227 | // This function destroys everything when recognition is done, taking care to properly tear |
228 | // down by calling On{Sound,Audio}End if corresponding On{Audio,Sound}Start were called. |
229 | private void terminate(int error) { |
230 | |
231 | if (mState != STATE_IDLE) { |
232 | if (mState == STATE_CAPTURING_SPEECH) { |
233 | nativeOnSoundEnd(mNativeSpeechRecognizerImplAndroid); |
234 | } |
235 | nativeOnAudioEnd(mNativeSpeechRecognizerImplAndroid); |
236 | mState = STATE_IDLE; |
237 | } |
238 | |
239 | if (error != SpeechRecognitionError.NONE) |
240 | nativeOnRecognitionError(mNativeSpeechRecognizerImplAndroid, error); |
241 | |
242 | mRecognizer.destroy(); |
243 | mRecognizer = null; |
244 | nativeOnRecognitionEnd(mNativeSpeechRecognizerImplAndroid); |
245 | mNativeSpeechRecognizerImplAndroid = 0; |
246 | } |
247 | |
248 | @CalledByNative |
249 | private static SpeechRecognition createSpeechRecognition( |
250 | Context context, int nativeSpeechRecognizerImplAndroid) { |
251 | return new SpeechRecognition(context, nativeSpeechRecognizerImplAndroid); |
252 | } |
253 | |
254 | @CalledByNative |
255 | private void startRecognition(String language, boolean continuous, boolean interim_results) { |
256 | if (mRecognizer == null) |
257 | return; |
258 | |
259 | mContinuous = continuous; |
260 | mIntent.putExtra("android.speech.extra.DICTATION_MODE", continuous); |
261 | mIntent.putExtra(RecognizerIntent.EXTRA_LANGUAGE, language); |
262 | mIntent.putExtra(RecognizerIntent.EXTRA_PARTIAL_RESULTS, interim_results); |
263 | mRecognizer.startListening(mIntent); |
264 | } |
265 | |
266 | @CalledByNative |
267 | private void abortRecognition() { |
268 | if (mRecognizer == null) |
269 | return; |
270 | |
271 | mRecognizer.cancel(); |
272 | terminate(SpeechRecognitionError.ABORTED); |
273 | } |
274 | |
275 | @CalledByNative |
276 | private void stopRecognition() { |
277 | if (mRecognizer == null) |
278 | return; |
279 | |
280 | mContinuous = false; |
281 | mRecognizer.stopListening(); |
282 | } |
283 | |
284 | // Native JNI calls to content/browser/speech/speech_recognizer_impl_android.cc |
285 | private native void nativeOnAudioStart(int nativeSpeechRecognizerImplAndroid); |
286 | private native void nativeOnSoundStart(int nativeSpeechRecognizerImplAndroid); |
287 | private native void nativeOnSoundEnd(int nativeSpeechRecognizerImplAndroid); |
288 | private native void nativeOnAudioEnd(int nativeSpeechRecognizerImplAndroid); |
289 | private native void nativeOnRecognitionResults(int nativeSpeechRecognizerImplAndroid, |
290 | String[] results, |
291 | float[] scores, |
292 | boolean provisional); |
293 | private native void nativeOnRecognitionError(int nativeSpeechRecognizerImplAndroid, int error); |
294 | private native void nativeOnRecognitionEnd(int nativeSpeechRecognizerImplAndroid); |
295 | } |