LCOV - code coverage report
Current view: top level - dom/media/webspeech/recognition - endpointer.cc (source / functions) Hit Total Coverage
Test: output.info Lines: 0 102 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 8 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Copyright (c) 2013 The Chromium Authors. All rights reserved.
       2             : //
       3             : // Redistribution and use in source and binary forms, with or without
       4             : // modification, are permitted provided that the following conditions are
       5             : // met:
       6             : //
       7             : //    * Redistributions of source code must retain the above copyright
       8             : // notice, this list of conditions and the following disclaimer.
       9             : //    * Redistributions in binary form must reproduce the above
      10             : // copyright notice, this list of conditions and the following disclaimer
      11             : // in the documentation and/or other materials provided with the
      12             : // distribution.
      13             : //    * Neither the name of Google Inc. nor the names of its
      14             : // contributors may be used to endorse or promote products derived from
      15             : // this software without specific prior written permission.
      16             : //
      17             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      18             : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      19             : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      20             : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
      21             : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      22             : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
      23             : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      24             : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      25             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      26             : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      27             : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      28             : 
      29             : #include "endpointer.h"
      30             : 
      31             : #include "AudioSegment.h"
      32             : 
      33             : namespace {
      34             : const int kFrameRate = 200;  // 1 frame = 5ms of audio.
      35             : }
      36             : 
      37             : namespace mozilla {
      38             : 
      39           0 : Endpointer::Endpointer(int sample_rate)
      40             :     : speech_input_possibly_complete_silence_length_us_(-1),
      41             :       speech_input_complete_silence_length_us_(-1),
      42             :       audio_frame_time_us_(0),
      43             :       sample_rate_(sample_rate),
      44           0 :       frame_size_(0) {
      45           0 :   Reset();
      46             : 
      47           0 :   frame_size_ = static_cast<int>(sample_rate / static_cast<float>(kFrameRate));
      48             : 
      49           0 :   speech_input_minimum_length_us_ =
      50             :       static_cast<int64_t>(1.7 * 1000000);
      51           0 :   speech_input_complete_silence_length_us_ =
      52             :       static_cast<int64_t>(0.5 * 1000000);
      53           0 :   long_speech_input_complete_silence_length_us_ = -1;
      54           0 :   long_speech_length_us_ = -1;
      55           0 :   speech_input_possibly_complete_silence_length_us_ =
      56             :       1 * 1000000;
      57             : 
      58             :   // Set the default configuration for Push To Talk mode.
      59           0 :   EnergyEndpointerParams ep_config;
      60           0 :   ep_config.set_frame_period(1.0f / static_cast<float>(kFrameRate));
      61           0 :   ep_config.set_frame_duration(1.0f / static_cast<float>(kFrameRate));
      62           0 :   ep_config.set_endpoint_margin(0.2f);
      63           0 :   ep_config.set_onset_window(0.15f);
      64           0 :   ep_config.set_speech_on_window(0.4f);
      65           0 :   ep_config.set_offset_window(0.15f);
      66           0 :   ep_config.set_onset_detect_dur(0.09f);
      67           0 :   ep_config.set_onset_confirm_dur(0.075f);
      68           0 :   ep_config.set_on_maintain_dur(0.10f);
      69           0 :   ep_config.set_offset_confirm_dur(0.12f);
      70           0 :   ep_config.set_decision_threshold(1000.0f);
      71           0 :   ep_config.set_min_decision_threshold(50.0f);
      72           0 :   ep_config.set_fast_update_dur(0.2f);
      73           0 :   ep_config.set_sample_rate(static_cast<float>(sample_rate));
      74           0 :   ep_config.set_min_fundamental_frequency(57.143f);
      75           0 :   ep_config.set_max_fundamental_frequency(400.0f);
      76           0 :   ep_config.set_contamination_rejection_period(0.25f);
      77           0 :   energy_endpointer_.Init(ep_config);
      78           0 : }
      79             : 
      80           0 : void Endpointer::Reset() {
      81           0 :   old_ep_status_ = EP_PRE_SPEECH;
      82           0 :   waiting_for_speech_possibly_complete_timeout_ = false;
      83           0 :   waiting_for_speech_complete_timeout_ = false;
      84           0 :   speech_previously_detected_ = false;
      85           0 :   speech_input_complete_ = false;
      86           0 :   audio_frame_time_us_ = 0; // Reset time for packets sent to endpointer.
      87           0 :   speech_end_time_us_ = -1;
      88           0 :   speech_start_time_us_ = -1;
      89           0 : }
      90             : 
      91           0 : void Endpointer::StartSession() {
      92           0 :   Reset();
      93           0 :   energy_endpointer_.StartSession();
      94           0 : }
      95             : 
      96           0 : void Endpointer::EndSession() {
      97           0 :   energy_endpointer_.EndSession();
      98           0 : }
      99             : 
     100           0 : void Endpointer::SetEnvironmentEstimationMode() {
     101           0 :   Reset();
     102           0 :   energy_endpointer_.SetEnvironmentEstimationMode();
     103           0 : }
     104             : 
     105           0 : void Endpointer::SetUserInputMode() {
     106           0 :   energy_endpointer_.SetUserInputMode();
     107           0 : }
     108             : 
     109           0 : EpStatus Endpointer::Status(int64_t *time) {
     110           0 :   return energy_endpointer_.Status(time);
     111             : }
     112             : 
     113           0 : EpStatus Endpointer::ProcessAudio(const AudioChunk& raw_audio, float* rms_out) {
     114           0 :   MOZ_ASSERT(raw_audio.mBufferFormat == AUDIO_FORMAT_S16, "Audio is not in 16 bit format");
     115           0 :   const int16_t* audio_data = static_cast<const int16_t*>(raw_audio.mChannelData[0]);
     116           0 :   const int num_samples = raw_audio.mDuration;
     117           0 :   EpStatus ep_status = EP_PRE_SPEECH;
     118             : 
     119             :   // Process the input data in blocks of frame_size_, dropping any incomplete
     120             :   // frames at the end (which is ok since typically the caller will be recording
     121             :   // audio in multiples of our frame size).
     122           0 :   int sample_index = 0;
     123           0 :   while (sample_index + frame_size_ <= num_samples) {
     124             :     // Have the endpointer process the frame.
     125           0 :     energy_endpointer_.ProcessAudioFrame(audio_frame_time_us_,
     126           0 :                                          audio_data + sample_index,
     127             :                                          frame_size_,
     128           0 :                                          rms_out);
     129           0 :     sample_index += frame_size_;
     130           0 :     audio_frame_time_us_ += (frame_size_ * 1000000) /
     131           0 :                          sample_rate_;
     132             : 
     133             :     // Get the status of the endpointer.
     134             :     int64_t ep_time;
     135           0 :     ep_status = energy_endpointer_.Status(&ep_time);
     136           0 :     if (old_ep_status_ != ep_status)
     137           0 :         fprintf(stderr, "Status changed old= %d, new= %d\n", old_ep_status_, ep_status);
     138             : 
     139             :     // Handle state changes.
     140           0 :     if ((EP_SPEECH_PRESENT == ep_status) &&
     141           0 :         (EP_POSSIBLE_ONSET == old_ep_status_)) {
     142           0 :       speech_end_time_us_ = -1;
     143           0 :       waiting_for_speech_possibly_complete_timeout_ = false;
     144           0 :       waiting_for_speech_complete_timeout_ = false;
     145             :       // Trigger SpeechInputDidStart event on first detection.
     146           0 :       if (false == speech_previously_detected_) {
     147           0 :         speech_previously_detected_ = true;
     148           0 :         speech_start_time_us_ = ep_time;
     149             :       }
     150             :     }
     151           0 :     if ((EP_PRE_SPEECH == ep_status) &&
     152           0 :         (EP_POSSIBLE_OFFSET == old_ep_status_)) {
     153           0 :       speech_end_time_us_ = ep_time;
     154           0 :       waiting_for_speech_possibly_complete_timeout_ = true;
     155           0 :       waiting_for_speech_complete_timeout_ = true;
     156             :     }
     157           0 :     if (ep_time > speech_input_minimum_length_us_) {
     158             :       // Speech possibly complete timeout.
     159           0 :       if ((waiting_for_speech_possibly_complete_timeout_) &&
     160           0 :           (ep_time - speech_end_time_us_ >
     161           0 :               speech_input_possibly_complete_silence_length_us_)) {
     162           0 :         waiting_for_speech_possibly_complete_timeout_ = false;
     163             :       }
     164           0 :       if (waiting_for_speech_complete_timeout_) {
     165             :         // The length of the silence timeout period can be held constant, or it
     166             :         // can be changed after a fixed amount of time from the beginning of
     167             :         // speech.
     168             :         bool has_stepped_silence =
     169           0 :             (long_speech_length_us_ > 0) &&
     170           0 :             (long_speech_input_complete_silence_length_us_ > 0);
     171             :         int64_t requested_silence_length;
     172           0 :         if (has_stepped_silence &&
     173           0 :             (ep_time - speech_start_time_us_) > long_speech_length_us_) {
     174           0 :           requested_silence_length =
     175           0 :               long_speech_input_complete_silence_length_us_;
     176             :         } else {
     177           0 :           requested_silence_length =
     178             :               speech_input_complete_silence_length_us_;
     179             :         }
     180             : 
     181             :         // Speech complete timeout.
     182           0 :         if ((ep_time - speech_end_time_us_) > requested_silence_length) {
     183           0 :           waiting_for_speech_complete_timeout_ = false;
     184           0 :           speech_input_complete_ = true;
     185             :         }
     186             :       }
     187             :     }
     188           0 :     old_ep_status_ = ep_status;
     189             :   }
     190           0 :   return ep_status;
     191             : }
     192             : 
     193             : }  // namespace mozilla

Generated by: LCOV version 1.13