1

キーワードの認識に基づいてアクティビティを開くことができる音声テキスト アプリの作成に成功しました。現時点では、Speech to Text 機能を開始するには、ボタンをクリックする必要があります。ボタンを手動で押すのではなく、継続的なリスナーを使用してボタンをトリガーしたいと考えています。

私はポケットフィンクスの使用を検討しており、このキーフレーズが聞こえたら「コマンドを聞く」というキーフレーズを追加しました。ボタンを自動的に押してから、コードに追加できる一連のコマンドを続けます。私はオフラインのスピーチからテキストへの変換などは必要ないため、Google のスピーチからテキストへのオプションを使用していますが、Google のスピーチからテキストへの機能をトリガーするために Pocketphinx を使用する予定です。

以下は、ポケットフィンクスから部分的に変更したコードのほとんどです。

public class PocketSphinxActivity extends Activity implements RecognitionListener {

 /* Named searches allow to quickly reconfigure the decoder */
 private static final String KWS_SEARCH = "wakeup";
 private static final String FORECAST_SEARCH = "forecast";
 private static final String DIGITS_SEARCH = "digits";
 private static final String PHONE_SEARCH = "phones";
 private static final String MENU_SEARCH = "menu";

 /* Keyword we are looking for to activate menu */
 private static final String KEYPHRASE = "listen to command"; //adjust this keyphrase!

 private SpeechRecognizer recognizer;
 private HashMap < String, Integer > captions;

 @Override
 public void onCreate(Bundle state) {
  super.onCreate(state);

  // Prepare the data for UI
  captions = new HashMap < String, Integer > ();
  captions.put(KWS_SEARCH, R.string.kws_caption);
  captions.put(MENU_SEARCH, R.string.menu_caption);
  captions.put(DIGITS_SEARCH, R.string.digits_caption);
  captions.put(PHONE_SEARCH, R.string.phone_caption);
  captions.put(FORECAST_SEARCH, R.string.forecast_caption);
  setContentView(R.layout.main);
  ((TextView) findViewById(R.id.caption_text))
  .setText("Preparing the recognizer");

  // Recognizer initialization is a time-consuming and it involves IO,
  // so we execute it in async task

  new AsyncTask < Void, Void, Exception > () {
   @Override
   protected Exception doInBackground(Void...params) {
    try {
     Assets assets = new Assets(PocketSphinxActivity.this);
     File assetDir = assets.syncAssets();
     setupRecognizer(assetDir);
    } catch (IOException e) {
     return e;
    }
    return null;
   }

   @Override
   protected void onPostExecute(Exception result) {
    if (result != null) {
     ((TextView) findViewById(R.id.caption_text))
     .setText("Failed to init recognizer " + result);
    } else {
     switchSearch(KWS_SEARCH);
    }
   }
  }.execute();
 }

 @Override
 public void onDestroy() {
  super.onDestroy();
  recognizer.cancel();
  recognizer.shutdown();
 }

 /**
  * In partial result we get quick updates about current hypothesis. In
  * keyword spotting mode we can react here, in other modes we need to wait
  * for final result in onResult.
  */
 @Override
 public void onPartialResult(Hypothesis hypothesis) {
  if (hypothesis == null)
   return;

  String text = hypothesis.getHypstr();
  if (text.equals(KEYPHRASE))
   switchSearch(MENU_SEARCH);
  else if (text.equals(DIGITS_SEARCH))
   switchSearch(DIGITS_SEARCH);
  else if (text.equals(PHONE_SEARCH))
   switchSearch(PHONE_SEARCH);
  else if (text.equals(FORECAST_SEARCH))
   switchSearch(FORECAST_SEARCH);
  else
   ((TextView) findViewById(R.id.result_text)).setText(text);
 }

 /**
  * This callback is called when we stop the recognizer.
  */
 @Override
 public void onResult(Hypothesis hypothesis) {
  ((TextView) findViewById(R.id.result_text)).setText("");
  if (hypothesis != null) {
   String text = hypothesis.getHypstr();
   makeText(getApplicationContext(), text, Toast.LENGTH_SHORT).show();
  }
 }

 @Override
 public void onBeginningOfSpeech() {}

 /**
  * We stop recognizer here to get a final result
  */
 @Override
 public void onEndOfSpeech() {
  if (!recognizer.getSearchName().equals(KWS_SEARCH))
   switchSearch(KWS_SEARCH);
 }

 private void switchSearch(String searchName) {
  recognizer.stop();

  // If we are not spotting, start listening with timeout (10000 ms or 10 seconds).
  if (searchName.equals(KWS_SEARCH))
   recognizer.startListening(searchName);
  else
   recognizer.startListening(searchName, 10000);

  String caption = getResources().getString(captions.get(searchName));
  ((TextView) findViewById(R.id.caption_text)).setText(caption);
 }

 private void setupRecognizer(File assetsDir) throws IOException {
  // The recognizer can be configured to perform multiple searches
  // of different kind and switch between them

  recognizer = defaultSetup()
   .setAcousticModel(new File(assetsDir, "en-us-ptm"))
   .setDictionary(new File(assetsDir, "cmudict-en-us.dict"))

  // To disable logging of raw audio comment out this call (takes a lot of space on the device)
  .setRawLogDir(assetsDir)

  // Threshold to tune for keyphrase to balance between false alarms and misses
  .setKeywordThreshold(1e-45 f)

  // Use context-independent phonetic search, context-dependent is too slow for mobile
  .setBoolean("-allphone_ci", true)

  .getRecognizer();
  recognizer.addListener(this);

  /** In your application you might not need to add all those searches.
   * They are added here for demonstration. You can leave just one.
   */

  // Create keyword-activation search.
  recognizer.addKeyphraseSearch(KWS_SEARCH, KEYPHRASE);

  // Create grammar-based search for selection between demos
  File menuGrammar = new File(assetsDir, "menu.gram");
  recognizer.addGrammarSearch(MENU_SEARCH, menuGrammar);

  // Create grammar-based search for digit recognition
  File digitsGrammar = new File(assetsDir, "digits.gram");
  recognizer.addGrammarSearch(DIGITS_SEARCH, digitsGrammar);

  // Create language model search
  File languageModel = new File(assetsDir, "weather.dmp");
  recognizer.addNgramSearch(FORECAST_SEARCH, languageModel);

  // Phonetic search
  File phoneticModel = new File(assetsDir, "en-phone.dmp");
  recognizer.addAllphoneSearch(PHONE_SEARCH, phoneticModel);
 }

 @Override
 public void onError(Exception error) {
  ((TextView) findViewById(R.id.caption_text)).setText(error.getMessage());
 }

 @Override
 public void onTimeout() {
  switchSearch(KWS_SEARCH);
 }

私はほとんどのコマンドに興味がありません。アプリが開かれるとすぐにアプリがリッスンするようにしたいだけです(現在はそうしています)Listen to commandbVoice

上記のコードを変更して、何を削除し、何を追加すればよいかを誰かが親切に手伝ってくれれば、大歓迎です。また、これを行うためのより簡単な方法があれば、お気軽に共有してください (カスタム キー フレーズも必要ありません)。

4

1 に答える 1

0

このようなもので、「ボタンを押す」必要はありません。代わりにボタン コールバックを直接呼び出して、関心のある実際の手順を実行できます。

public class PocketSphinxActivity extends Activity implements RecognitionListener {

 private static final String KWS_SEARCH = "wakeup";
 private static final String KEYPHRASE = "listen to command"; //adjust this keyphrase!

 private SpeechRecognizer recognizer;

 @Override
 public void onCreate(Bundle state) {
  super.onCreate(state);

  setContentView(R.layout.main);
  ((TextView) findViewById(R.id.caption_text))
  .setText("Preparing the recognizer");

  new AsyncTask < Void, Void, Exception > () {
   @Override
   protected Exception doInBackground(Void...params) {
    try {
     Assets assets = new Assets(PocketSphinxActivity.this);
     File assetDir = assets.syncAssets();
     setupRecognizer(assetDir);
    } catch (IOException e) {
     return e;
    }
    return null;
   }

   @Override
   protected void onPostExecute(Exception result) {
    if (result != null) {
     ((TextView) findViewById(R.id.caption_text))
     .setText("Failed to init recognizer " + result);
    } else {
     recognizer.startListening(KWS_SEARCH);
    }
   }
  }.execute();
 }

 @Override
 public void onDestroy() {
  super.onDestroy();
  recognizer.cancel();
  recognizer.shutdown();
 }

 /**
  * In partial result we get quick updates about current hypothesis. In
  * keyword spotting mode we can react here, in other modes we need to wait
  * for final result in onResult.
  */
 @Override
 public void onPartialResult(Hypothesis hypothesis) {
  if (hypothesis == null)
   return;

  String text = hypothesis.getHypstr();
  if (text.equals(KEYPHRASE)) {
        recognizer.cancel();
        performAction();     // <- You have to implement this
        recognizer.startListening(KWS_SEARCH);
  }
 }

 @Override
 public void onResult(Hypothesis hypothesis) {}

 @Override
 public void onBeginningOfSpeech() {}

 @Override
 public void onEndOfSpeech() {}

 @Override
 public void onTimeout() {}

 private void setupRecognizer(File assetsDir) throws IOException {
  // The recognizer can be configured to perform multiple searches
  // of different kind and switch between them

  recognizer = defaultSetup()
   .setAcousticModel(new File(assetsDir, "en-us-ptm"))
   .setDictionary(new File(assetsDir, "cmudict-en-us.dict"))
   .getRecognizer();
  recognizer.addListener(this);

  // Create keyword-activation search.
  recognizer.addKeyphraseSearch(KWS_SEARCH, KEYPHRASE);
 }

 @Override
 public void onError(Exception error) {
  ((TextView) findViewById(R.id.caption_text)).setText(error.getMessage());
 }

 public void peformAction() {
    // do here whatever you want
 }
于 2016-05-05T06:46:41.823 に答える