LCOV - code coverage report
Current view: top level - dom/media/webspeech/recognition - SpeechRecognition.h (source / functions) Hit Total Coverage
Test: output.info Lines: 0 28 0.0 %
Date: 2017-07-14 16:53:18 Functions: 0 37 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
       2             : /* vim:set ts=2 sw=2 sts=2 et cindent: */
       3             : /* This Source Code Form is subject to the terms of the Mozilla Public
       4             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       5             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
       6             : 
       7             : #ifndef mozilla_dom_SpeechRecognition_h
       8             : #define mozilla_dom_SpeechRecognition_h
       9             : 
      10             : #include "mozilla/Attributes.h"
      11             : #include "mozilla/DOMEventTargetHelper.h"
      12             : #include "nsCOMPtr.h"
      13             : #include "nsString.h"
      14             : #include "nsWrapperCache.h"
      15             : #include "nsTArray.h"
      16             : #include "js/TypeDecls.h"
      17             : 
      18             : #include "DOMMediaStream.h"
      19             : #include "nsIDOMNavigatorUserMedia.h"
      20             : #include "nsITimer.h"
      21             : #include "MediaStreamGraph.h"
      22             : #include "AudioSegment.h"
      23             : #include "mozilla/WeakPtr.h"
      24             : 
      25             : #include "SpeechGrammarList.h"
      26             : #include "SpeechRecognitionResultList.h"
      27             : #include "SpeechStreamListener.h"
      28             : #include "nsISpeechRecognitionService.h"
      29             : #include "endpointer.h"
      30             : 
      31             : #include "mozilla/dom/BindingDeclarations.h"
      32             : #include "mozilla/dom/SpeechRecognitionError.h"
      33             : 
      34             : namespace mozilla {
      35             : 
      36             : class DOMMediaStream;
      37             : 
      38             : namespace dom {
      39             : 
      40             : #define SPEECH_RECOGNITION_TEST_EVENT_REQUEST_TOPIC "SpeechRecognitionTest:RequestEvent"
      41             : #define SPEECH_RECOGNITION_TEST_END_TOPIC "SpeechRecognitionTest:End"
      42             : 
      43             : class GlobalObject;
      44             : class SpeechEvent;
      45             : 
      46             : LogModule* GetSpeechRecognitionLog();
      47             : #define SR_LOG(...) MOZ_LOG(GetSpeechRecognitionLog(), mozilla::LogLevel::Debug, (__VA_ARGS__))
      48             : 
      49             : class SpeechRecognition final : public DOMEventTargetHelper,
      50             :                                 public nsIObserver,
      51             :                                 public SupportsWeakPtr<SpeechRecognition>
      52             : {
      53             : public:
      54           0 :   MOZ_DECLARE_WEAKREFERENCE_TYPENAME(SpeechRecognition)
      55             :   explicit SpeechRecognition(nsPIDOMWindowInner* aOwnerWindow);
      56             : 
      57             :   NS_DECL_ISUPPORTS_INHERITED
      58           0 :   NS_DECL_CYCLE_COLLECTION_CLASS_INHERITED(SpeechRecognition, DOMEventTargetHelper)
      59             : 
      60             :   NS_DECL_NSIOBSERVER
      61             : 
      62             :   nsISupports* GetParentObject() const;
      63             : 
      64             :   JSObject* WrapObject(JSContext* aCx, JS::Handle<JSObject*> aGivenProto) override;
      65             : 
      66             :   static bool IsAuthorized(JSContext* aCx, JSObject* aGlobal);
      67             : 
      68             :   static already_AddRefed<SpeechRecognition>
      69             :   Constructor(const GlobalObject& aGlobal, ErrorResult& aRv);
      70             : 
      71             :   already_AddRefed<SpeechGrammarList> Grammars() const;
      72             : 
      73             :   void SetGrammars(mozilla::dom::SpeechGrammarList& aArg);
      74             : 
      75             :   void GetLang(nsString& aRetVal) const;
      76             : 
      77             :   void SetLang(const nsAString& aArg);
      78             : 
      79             :   bool GetContinuous(ErrorResult& aRv) const;
      80             : 
      81             :   void SetContinuous(bool aArg, ErrorResult& aRv);
      82             : 
      83             :   bool InterimResults() const;
      84             : 
      85             :   void SetInterimResults(bool aArg);
      86             : 
      87             :   uint32_t MaxAlternatives() const;
      88             : 
      89             :   void SetMaxAlternatives(uint32_t aArg);
      90             : 
      91             :   void GetServiceURI(nsString& aRetVal, ErrorResult& aRv) const;
      92             : 
      93             :   void SetServiceURI(const nsAString& aArg, ErrorResult& aRv);
      94             : 
      95             :   void Start(const Optional<NonNull<DOMMediaStream>>& aStream,
      96             :              CallerType aCallerType, ErrorResult& aRv);
      97             : 
      98             :   void Stop();
      99             : 
     100             :   void Abort();
     101             : 
     102           0 :   IMPL_EVENT_HANDLER(audiostart)
     103           0 :   IMPL_EVENT_HANDLER(soundstart)
     104           0 :   IMPL_EVENT_HANDLER(speechstart)
     105           0 :   IMPL_EVENT_HANDLER(speechend)
     106           0 :   IMPL_EVENT_HANDLER(soundend)
     107           0 :   IMPL_EVENT_HANDLER(audioend)
     108           0 :   IMPL_EVENT_HANDLER(result)
     109           0 :   IMPL_EVENT_HANDLER(nomatch)
     110           0 :   IMPL_EVENT_HANDLER(error)
     111           0 :   IMPL_EVENT_HANDLER(start)
     112           0 :   IMPL_EVENT_HANDLER(end)
     113             : 
     114             :   enum EventType {
     115             :     EVENT_START,
     116             :     EVENT_STOP,
     117             :     EVENT_ABORT,
     118             :     EVENT_AUDIO_DATA,
     119             :     EVENT_AUDIO_ERROR,
     120             :     EVENT_RECOGNITIONSERVICE_INTERMEDIATE_RESULT,
     121             :     EVENT_RECOGNITIONSERVICE_FINAL_RESULT,
     122             :     EVENT_RECOGNITIONSERVICE_ERROR,
     123             :     EVENT_COUNT
     124             :   };
     125             : 
     126             :   void DispatchError(EventType aErrorType, SpeechRecognitionErrorCode aErrorCode, const nsAString& aMessage);
     127             :   uint32_t FillSamplesBuffer(const int16_t* aSamples, uint32_t aSampleCount);
     128             :   uint32_t SplitSamplesBuffer(const int16_t* aSamplesBuffer, uint32_t aSampleCount, nsTArray<RefPtr<SharedBuffer>>& aResult);
     129             :   AudioSegment* CreateAudioSegment(nsTArray<RefPtr<SharedBuffer>>& aChunks);
     130             :   void FeedAudioData(already_AddRefed<SharedBuffer> aSamples, uint32_t aDuration, MediaStreamListener* aProvider, TrackRate aTrackRate);
     131             : 
     132             :   friend class SpeechEvent;
     133             : private:
     134           0 :   virtual ~SpeechRecognition() {};
     135             : 
     136             :   enum FSMState {
     137             :     STATE_IDLE,
     138             :     STATE_STARTING,
     139             :     STATE_ESTIMATING,
     140             :     STATE_WAITING_FOR_SPEECH,
     141             :     STATE_RECOGNIZING,
     142             :     STATE_WAITING_FOR_RESULT,
     143             :     STATE_COUNT
     144             :   };
     145             : 
     146             :   void SetState(FSMState state);
     147             :   bool StateBetween(FSMState begin, FSMState end);
     148             : 
     149             :   bool SetRecognitionService(ErrorResult& aRv);
     150             :   bool ValidateAndSetGrammarList(ErrorResult& aRv);
     151             : 
     152             :   class GetUserMediaSuccessCallback : public nsIDOMGetUserMediaSuccessCallback
     153             :   {
     154             :   public:
     155             :     NS_DECL_ISUPPORTS
     156             :     NS_DECL_NSIDOMGETUSERMEDIASUCCESSCALLBACK
     157             : 
     158           0 :     explicit GetUserMediaSuccessCallback(SpeechRecognition* aRecognition)
     159           0 :       : mRecognition(aRecognition)
     160           0 :     {}
     161             : 
     162             :   private:
     163           0 :     virtual ~GetUserMediaSuccessCallback() {}
     164             : 
     165             :     RefPtr<SpeechRecognition> mRecognition;
     166             :   };
     167             : 
     168             :   class GetUserMediaErrorCallback : public nsIDOMGetUserMediaErrorCallback
     169             :   {
     170             :   public:
     171             :     NS_DECL_ISUPPORTS
     172             :     NS_DECL_NSIDOMGETUSERMEDIAERRORCALLBACK
     173             : 
     174           0 :     explicit GetUserMediaErrorCallback(SpeechRecognition* aRecognition)
     175           0 :       : mRecognition(aRecognition)
     176           0 :     {}
     177             : 
     178             :   private:
     179           0 :     virtual ~GetUserMediaErrorCallback() {}
     180             : 
     181             :     RefPtr<SpeechRecognition> mRecognition;
     182             :   };
     183             : 
     184             :   NS_IMETHOD StartRecording(DOMMediaStream* aDOMStream);
     185             :   NS_IMETHOD StopRecording();
     186             : 
     187             :   uint32_t ProcessAudioSegment(AudioSegment* aSegment, TrackRate aTrackRate);
     188             :   void NotifyError(SpeechEvent* aEvent);
     189             : 
     190             :   void ProcessEvent(SpeechEvent* aEvent);
     191             :   void Transition(SpeechEvent* aEvent);
     192             : 
     193             :   void Reset();
     194             :   void ResetAndEnd();
     195             :   void WaitForAudioData(SpeechEvent* aEvent);
     196             :   void StartedAudioCapture(SpeechEvent* aEvent);
     197             :   void StopRecordingAndRecognize(SpeechEvent* aEvent);
     198             :   void WaitForEstimation(SpeechEvent* aEvent);
     199             :   void DetectSpeech(SpeechEvent* aEvent);
     200             :   void WaitForSpeechEnd(SpeechEvent* aEvent);
     201             :   void NotifyFinalResult(SpeechEvent* aEvent);
     202             :   void DoNothing(SpeechEvent* aEvent);
     203             :   void AbortSilently(SpeechEvent* aEvent);
     204             :   void AbortError(SpeechEvent* aEvent);
     205             : 
     206             :   RefPtr<DOMMediaStream> mDOMStream;
     207             :   RefPtr<SpeechStreamListener> mSpeechListener;
     208             :   nsCOMPtr<nsISpeechRecognitionService> mRecognitionService;
     209             : 
     210             :   FSMState mCurrentState;
     211             : 
     212             :   Endpointer mEndpointer;
     213             :   uint32_t mEstimationSamples;
     214             : 
     215             :   uint32_t mAudioSamplesPerChunk;
     216             : 
     217             :   // buffer holds one chunk of mAudioSamplesPerChunk
     218             :   // samples before feeding it to mEndpointer
     219             :   RefPtr<SharedBuffer> mAudioSamplesBuffer;
     220             :   uint32_t mBufferedSamples;
     221             : 
     222             :   nsCOMPtr<nsITimer> mSpeechDetectionTimer;
     223             :   bool mAborted;
     224             : 
     225             :   nsString mLang;
     226             : 
     227             :   RefPtr<SpeechGrammarList> mSpeechGrammarList;
     228             : 
     229             :   // WebSpeechAPI (http://bit.ly/1gIl7DC) states:
     230             :   //
     231             :   // 1. Default value MUST be false
     232             :   // 2. If true, interim results SHOULD be returned
     233             :   // 3. If false, interim results MUST NOT be returned
     234             :   //
     235             :   // Pocketsphinx does not return interm results; so, defaulting
     236             :   // mInterimResults to false, then ignoring its subsequent value
     237             :   // is a conforming implementation.
     238             :   bool mInterimResults;
     239             : 
     240             :   // WebSpeechAPI (http://bit.ly/1JAiqeo) states:
     241             :   //
     242             :   // 1. Default value is 1
     243             :   // 2. Subsequent value is the "maximum number of SpeechRecognitionAlternatives per result"
     244             :   //
     245             :   // Pocketsphinx can only return at maximum a single SpeechRecognitionAlternative
     246             :   // per SpeechRecognitionResult. So defaulting mMaxAlternatives to 1, for all non
     247             :   // zero values ignoring mMaxAlternatives while for a 0 value returning no
     248             :   // SpeechRecognitionAlternative per result is a conforming implementation.
     249             :   uint32_t mMaxAlternatives;
     250             : 
     251             :   void ProcessTestEventRequest(nsISupports* aSubject, const nsAString& aEventName);
     252             : 
     253             :   const char* GetName(FSMState aId);
     254             :   const char* GetName(SpeechEvent* aId);
     255             : };
     256             : 
     257             : class SpeechEvent : public Runnable
     258             : {
     259             : public:
     260           0 :   SpeechEvent(SpeechRecognition* aRecognition,
     261             :               SpeechRecognition::EventType aType)
     262           0 :     : Runnable("dom::SpeechEvent")
     263             :     , mAudioSegment(0)
     264             :     , mRecognitionResultList(nullptr)
     265             :     , mError(nullptr)
     266             :     , mRecognition(aRecognition)
     267             :     , mType(aType)
     268           0 :     , mTrackRate(0)
     269             :   {
     270           0 :   }
     271             : 
     272             :   ~SpeechEvent();
     273             : 
     274             :   NS_IMETHOD Run() override;
     275             :   AudioSegment* mAudioSegment;
     276             :   RefPtr<SpeechRecognitionResultList> mRecognitionResultList; // TODO: make this a session being passed which also has index and stuff
     277             :   RefPtr<SpeechRecognitionError> mError;
     278             : 
     279             :   friend class SpeechRecognition;
     280             : private:
     281             :   SpeechRecognition* mRecognition;
     282             : 
     283             :   // for AUDIO_DATA events, keep a reference to the provider
     284             :   // of the data (i.e., the SpeechStreamListener) to ensure it
     285             :   // is kept alive (and keeps SpeechRecognition alive) until this
     286             :   // event gets processed.
     287             :   RefPtr<MediaStreamListener> mProvider;
     288             :   SpeechRecognition::EventType mType;
     289             :   TrackRate mTrackRate;
     290             : };
     291             : 
     292             : } // namespace dom
     293             : 
     294             : inline nsISupports*
     295           0 : ToSupports(dom::SpeechRecognition* aRec)
     296             : {
     297           0 :   return ToSupports(static_cast<DOMEventTargetHelper*>(aRec));
     298             : }
     299             : 
     300             : } // namespace mozilla
     301             : 
     302             : #endif

Generated by: LCOV version 1.13