LCOV - code coverage report
Current view: top level - dom/media/webspeech/recognition - endpointer.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 16 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 7 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : // Copyright (c) 2013 The Chromium Authors. All rights reserved.
       2             : //
       3             : // Redistribution and use in source and binary forms, with or without
       4             : // modification, are permitted provided that the following conditions are
       5             : // met:
       6             : //
       7             : //    * Redistributions of source code must retain the above copyright
       8             : // notice, this list of conditions and the following disclaimer.
       9             : //    * Redistributions in binary form must reproduce the above
      10             : // copyright notice, this list of conditions and the following disclaimer
      11             : // in the documentation and/or other materials provided with the
      12             : // distribution.
      13             : //    * Neither the name of Google Inc. nor the names of its
      14             : // contributors may be used to endorse or promote products derived from
      15             : // this software without specific prior written permission.
      16             : //
      17             : // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
      18             : // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
      19             : // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
      20             : // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
      21             : // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
      22             : // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
      23             : // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
      24             : // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
      25             : // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
      26             : // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
      27             : // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
      28             : 
      29             : #ifndef CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
      30             : #define CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_
      31             : 
      32             : #include "energy_endpointer.h"
      33             : 
      34             : namespace mozilla {
      35             : 
      36             : struct AudioChunk;
      37             : 
      38             : // A simple interface to the underlying energy-endpointer implementation, this
      39             : // class lets callers provide audio as being recorded and let them poll to find
      40             : // when the user has stopped speaking.
      41             : //
      42             : // There are two events that may trigger the end of speech:
      43             : //
      44             : // speechInputPossiblyComplete event:
      45             : //
      46             : // Signals that silence/noise has  been detected for a *short* amount of
      47             : // time after some speech has been detected. It can be used for low latency
      48             : // UI feedback. To disable it, set it to a large amount.
      49             : //
      50             : // speechInputComplete event:
      51             : //
      52             : // This event is intended to signal end of input and to stop recording.
      53             : // The amount of time to wait after speech is set by
      54             : // speech_input_complete_silence_length_ and optionally two other
      55             : // parameters (see below).
      56             : // This time can be held constant, or can change as more speech is detected.
      57             : // In the latter case, the time changes after a set amount of time from the
      58             : // *beginning* of speech.  This is motivated by the expectation that there
      59             : // will be two distinct types of inputs: short search queries and longer
      60             : // dictation style input.
      61             : //
      62             : // Three parameters are used to define the piecewise constant timeout function.
      63             : // The timeout length is speech_input_complete_silence_length until
      64             : // long_speech_length, when it changes to
      65             : // long_speech_input_complete_silence_length.
      66           0 : class Endpointer {
      67             :  public:
      68             :   explicit Endpointer(int sample_rate);
      69             : 
      70             :   // Start the endpointer. This should be called at the beginning of a session.
      71             :   void StartSession();
      72             : 
      73             :   // Stop the endpointer.
      74             :   void EndSession();
      75             : 
      76             :   // Start environment estimation. Audio will be used for environment estimation
      77             :   // i.e. noise level estimation.
      78             :   void SetEnvironmentEstimationMode();
      79             : 
      80             :   // Start user input. This should be called when the user indicates start of
      81             :   // input, e.g. by pressing a button.
      82             :   void SetUserInputMode();
      83             : 
      84             :   // Process a segment of audio, which may be more than one frame.
      85             :   // The status of the last frame will be returned.
      86             :   EpStatus ProcessAudio(const AudioChunk& raw_audio, float* rms_out);
      87             : 
      88             :   // Get the status of the endpointer.
      89             :   EpStatus Status(int64_t *time_us);
      90             : 
      91             :   // Get the expected frame size for audio chunks. Audio chunks are expected
      92             :   // to contain a number of samples that is a multiple of this number, and extra
      93             :   // samples will be dropped.
      94           0 :   int32_t FrameSize() const {
      95           0 :     return frame_size_;
      96             :   }
      97             : 
      98             :   // Returns true if the endpointer detected reasonable audio levels above
      99             :   // background noise which could be user speech, false if not.
     100           0 :   bool DidStartReceivingSpeech() const {
     101           0 :     return speech_previously_detected_;
     102             :   }
     103             : 
     104             :   bool IsEstimatingEnvironment() const {
     105             :     return energy_endpointer_.estimating_environment();
     106             :   }
     107             : 
     108           0 :   void set_speech_input_complete_silence_length(int64_t time_us) {
     109           0 :     speech_input_complete_silence_length_us_ = time_us;
     110           0 :   }
     111             : 
     112           0 :   void set_long_speech_input_complete_silence_length(int64_t time_us) {
     113           0 :     long_speech_input_complete_silence_length_us_ = time_us;
     114           0 :   }
     115             : 
     116             :   void set_speech_input_possibly_complete_silence_length(int64_t time_us) {
     117             :     speech_input_possibly_complete_silence_length_us_ = time_us;
     118             :   }
     119             : 
     120           0 :   void set_long_speech_length(int64_t time_us) {
     121           0 :     long_speech_length_us_ = time_us;
     122           0 :   }
     123             : 
     124           0 :   bool speech_input_complete() const {
     125           0 :     return speech_input_complete_;
     126             :   }
     127             : 
     128             :   // RMS background noise level in dB.
     129             :   float NoiseLevelDb() const { return energy_endpointer_.GetNoiseLevelDb(); }
     130             : 
     131             :  private:
     132             :   // Reset internal states. Helper method common to initial input utterance
     133             :   // and following input utternaces.
     134             :   void Reset();
     135             : 
     136             :   // Minimum allowable length of speech input.
     137             :   int64_t speech_input_minimum_length_us_;
     138             : 
     139             :   // The speechInputPossiblyComplete event signals that silence/noise has been
     140             :   // detected for a *short* amount of time after some speech has been detected.
     141             :   // This proporty specifies the time period.
     142             :   int64_t speech_input_possibly_complete_silence_length_us_;
     143             : 
     144             :   // The speechInputComplete event signals that silence/noise has been
     145             :   // detected for a *long* amount of time after some speech has been detected.
     146             :   // This property specifies the time period.
     147             :   int64_t speech_input_complete_silence_length_us_;
     148             : 
     149             :   // Same as above, this specifies the required silence period after speech
     150             :   // detection. This period is used instead of
     151             :   // speech_input_complete_silence_length_ when the utterance is longer than
     152             :   // long_speech_length_. This parameter is optional.
     153             :   int64_t long_speech_input_complete_silence_length_us_;
     154             : 
     155             :   // The period of time after which the endpointer should consider
     156             :   // long_speech_input_complete_silence_length_ as a valid silence period
     157             :   // instead of speech_input_complete_silence_length_. This parameter is
     158             :   // optional.
     159             :   int64_t long_speech_length_us_;
     160             : 
     161             :   // First speech onset time, used in determination of speech complete timeout.
     162             :   int64_t speech_start_time_us_;
     163             : 
     164             :   // Most recent end time, used in determination of speech complete timeout.
     165             :   int64_t speech_end_time_us_;
     166             : 
     167             :   int64_t audio_frame_time_us_;
     168             :   EpStatus old_ep_status_;
     169             :   bool waiting_for_speech_possibly_complete_timeout_;
     170             :   bool waiting_for_speech_complete_timeout_;
     171             :   bool speech_previously_detected_;
     172             :   bool speech_input_complete_;
     173             :   EnergyEndpointer energy_endpointer_;
     174             :   int sample_rate_;
     175             :   int32_t frame_size_;
     176             : };
     177             : 
     178             : }  // namespace mozilla
     179             : 
     180             : #endif  // CONTENT_BROWSER_SPEECH_ENDPOINTER_ENDPOINTER_H_

Generated by: LCOV version 1.13