Line data Source code
1 : /*
2 : * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
3 : *
4 : * Use of this source code is governed by a BSD-style license
5 : * that can be found in the LICENSE file in the root of the source
6 : * tree. An additional intellectual property rights grant can be found
7 : * in the file PATENTS. All contributing project authors may
8 : * be found in the AUTHORS file in the root of the source tree.
9 : */
10 :
11 : #ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_
12 : #define WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_
13 :
14 : #include <memory>
15 : #include <vector>
16 :
17 : #include "webrtc/base/array_view.h"
18 : #include "webrtc/base/buffer.h"
19 : #include "webrtc/base/constructormagic.h"
20 : #include "webrtc/base/optional.h"
21 : #include "webrtc/typedefs.h"
22 :
23 : namespace webrtc {
24 :
25 : // This is the interface class for decoders in NetEQ. Each codec type will have
26 : // and implementation of this class.
27 : class AudioDecoder {
28 : public:
29 : enum SpeechType {
30 : kSpeech = 1,
31 : kComfortNoise = 2
32 : };
33 :
34 : // Used by PacketDuration below. Save the value -1 for errors.
35 : enum { kNotImplemented = -2 };
36 :
37 0 : AudioDecoder() = default;
38 0 : virtual ~AudioDecoder() {} //= default;
39 :
40 0 : class EncodedAudioFrame {
41 : public:
42 : struct DecodeResult {
43 : size_t num_decoded_samples;
44 : SpeechType speech_type;
45 : };
46 :
47 0 : virtual ~EncodedAudioFrame() = default;
48 :
49 : // Returns the duration in samples-per-channel of this audio frame.
50 : // If no duration can be ascertained, returns zero.
51 : virtual size_t Duration() const = 0;
52 :
53 : // Decodes this frame of audio and writes the result in |decoded|.
54 : // |decoded| must be large enough to store as many samples as indicated by a
55 : // call to Duration() . On success, returns an rtc::Optional containing the
56 : // total number of samples across all channels, as well as whether the
57 : // decoder produced comfort noise or speech. On failure, returns an empty
58 : // rtc::Optional. Decode may be called at most once per frame object.
59 : virtual rtc::Optional<DecodeResult> Decode(
60 : rtc::ArrayView<int16_t> decoded) const = 0;
61 : };
62 :
63 0 : struct ParseResult {
64 : ParseResult();
65 : ParseResult(uint32_t timestamp,
66 : int priority,
67 : std::unique_ptr<EncodedAudioFrame> frame);
68 : ParseResult(ParseResult&& b);
69 : ~ParseResult();
70 :
71 : ParseResult& operator=(ParseResult&& b);
72 :
73 : // The timestamp of the frame is in samples per channel.
74 : uint32_t timestamp;
75 : // The relative priority of the frame compared to other frames of the same
76 : // payload and the same timeframe. A higher value means a lower priority.
77 : // The highest priority is zero - negative values are not allowed.
78 : int priority;
79 : std::unique_ptr<EncodedAudioFrame> frame;
80 : };
81 :
82 : // Let the decoder parse this payload and prepare zero or more decodable
83 : // frames. Each frame must be between 10 ms and 120 ms long. The caller must
84 : // ensure that the AudioDecoder object outlives any frame objects returned by
85 : // this call. The decoder is free to swap or move the data from the |payload|
86 : // buffer. |timestamp| is the input timestamp, in samples, corresponding to
87 : // the start of the payload.
88 : virtual std::vector<ParseResult> ParsePayload(rtc::Buffer&& payload,
89 : uint32_t timestamp);
90 :
91 : // Decodes |encode_len| bytes from |encoded| and writes the result in
92 : // |decoded|. The maximum bytes allowed to be written into |decoded| is
93 : // |max_decoded_bytes|. Returns the total number of samples across all
94 : // channels. If the decoder produced comfort noise, |speech_type|
95 : // is set to kComfortNoise, otherwise it is kSpeech. The desired output
96 : // sample rate is provided in |sample_rate_hz|, which must be valid for the
97 : // codec at hand.
98 : int Decode(const uint8_t* encoded,
99 : size_t encoded_len,
100 : int sample_rate_hz,
101 : size_t max_decoded_bytes,
102 : int16_t* decoded,
103 : SpeechType* speech_type);
104 :
105 : // Same as Decode(), but interfaces to the decoders redundant decode function.
106 : // The default implementation simply calls the regular Decode() method.
107 : int DecodeRedundant(const uint8_t* encoded,
108 : size_t encoded_len,
109 : int sample_rate_hz,
110 : size_t max_decoded_bytes,
111 : int16_t* decoded,
112 : SpeechType* speech_type);
113 :
114 : // Indicates if the decoder implements the DecodePlc method.
115 : virtual bool HasDecodePlc() const;
116 :
117 : // Calls the packet-loss concealment of the decoder to update the state after
118 : // one or several lost packets. The caller has to make sure that the
119 : // memory allocated in |decoded| should accommodate |num_frames| frames.
120 : virtual size_t DecodePlc(size_t num_frames, int16_t* decoded);
121 :
122 : // Resets the decoder state (empty buffers etc.).
123 : virtual void Reset() = 0;
124 :
125 : // Notifies the decoder of an incoming packet to NetEQ.
126 : virtual int IncomingPacket(const uint8_t* payload,
127 : size_t payload_len,
128 : uint16_t rtp_sequence_number,
129 : uint32_t rtp_timestamp,
130 : uint32_t arrival_timestamp);
131 :
132 : // Returns the last error code from the decoder.
133 : virtual int ErrorCode();
134 :
135 : // Returns the duration in samples-per-channel of the payload in |encoded|
136 : // which is |encoded_len| bytes long. Returns kNotImplemented if no duration
137 : // estimate is available, or -1 in case of an error.
138 : virtual int PacketDuration(const uint8_t* encoded, size_t encoded_len) const;
139 :
140 : // Returns the duration in samples-per-channel of the redandant payload in
141 : // |encoded| which is |encoded_len| bytes long. Returns kNotImplemented if no
142 : // duration estimate is available, or -1 in case of an error.
143 : virtual int PacketDurationRedundant(const uint8_t* encoded,
144 : size_t encoded_len) const;
145 :
146 : // Detects whether a packet has forward error correction. The packet is
147 : // comprised of the samples in |encoded| which is |encoded_len| bytes long.
148 : // Returns true if the packet has FEC and false otherwise.
149 : virtual bool PacketHasFec(const uint8_t* encoded, size_t encoded_len) const;
150 :
151 : // Returns the actual sample rate of the decoder's output. This value may not
152 : // change during the lifetime of the decoder.
153 : virtual int SampleRateHz() const = 0;
154 :
155 : // The number of channels in the decoder's output. This value may not change
156 : // during the lifetime of the decoder.
157 : virtual size_t Channels() const = 0;
158 :
159 : protected:
160 : static SpeechType ConvertSpeechType(int16_t type);
161 :
162 : virtual int DecodeInternal(const uint8_t* encoded,
163 : size_t encoded_len,
164 : int sample_rate_hz,
165 : int16_t* decoded,
166 : SpeechType* speech_type) = 0;
167 :
168 : virtual int DecodeRedundantInternal(const uint8_t* encoded,
169 : size_t encoded_len,
170 : int sample_rate_hz,
171 : int16_t* decoded,
172 : SpeechType* speech_type);
173 :
174 : private:
175 : RTC_DISALLOW_COPY_AND_ASSIGN(AudioDecoder);
176 : };
177 :
178 : } // namespace webrtc
179 : #endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_AUDIO_DECODER_H_
|