Line data Source code
1 : /*
2 : * Copyright (C) 2009 The Android Open Source Project
3 : *
4 : * Licensed under the Apache License, Version 2.0 (the "License");
5 : * you may not use this file except in compliance with the License.
6 : * You may obtain a copy of the License at
7 : *
8 : * http://www.apache.org/licenses/LICENSE-2.0
9 : *
10 : * Unless required by applicable law or agreed to in writing, software
11 : * distributed under the License is distributed on an "AS IS" BASIS,
12 : * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 : * See the License for the specific language governing permissions and
14 : * limitations under the License.
15 : */
16 :
17 : //#define LOG_NDEBUG 0
18 : #undef LOG_TAG
19 : #define LOG_TAG "MPEG4Extractor"
20 : #include <utils/Log.h>
21 :
22 : #include "include/MPEG4Extractor.h"
23 : #include "include/SampleTable.h"
24 : #include "include/ESDS.h"
25 :
26 : #include <algorithm>
27 : #include <ctype.h>
28 : #include <limits>
29 : #include <stdint.h>
30 : #include <stdlib.h>
31 : #include <string.h>
32 : #include <type_traits>
33 :
34 : #include <media/stagefright/foundation/ABitReader.h>
35 : #include <media/stagefright/foundation/ABuffer.h>
36 : #include <media/stagefright/foundation/ADebug.h>
37 : #include <media/stagefright/foundation/AMessage.h>
38 : #include <media/stagefright/MediaBuffer.h>
39 : #include <media/stagefright/MediaDefs.h>
40 : #include <media/stagefright/MediaSource.h>
41 : #include <media/stagefright/MetaData.h>
42 :
43 : static const uint32_t kMAX_ALLOCATION =
44 : (SIZE_MAX < INT32_MAX ? SIZE_MAX : INT32_MAX) - 128;
45 :
46 : namespace stagefright {
47 :
48 : static const int64_t OVERFLOW_ERROR = -INT64_MAX;
49 :
50 : // Calculate units*1,000,000/hz, trying to avoid overflow.
51 : // Return OVERFLOW_ERROR in case of unavoidable overflow, or div by hz==0.
52 0 : int64_t unitsToUs(int64_t units, int64_t hz) {
53 0 : if (hz == 0) {
54 0 : return OVERFLOW_ERROR;
55 : }
56 0 : const int64_t MAX_S = INT64_MAX / 1000000;
57 0 : if (std::abs(units) <= MAX_S) {
58 0 : return units * 1000000 / hz;
59 : }
60 : // Hard case, avoid overflow-inducing 'units*1M' by calculating:
61 : // (units / hz) * 1M + ((units % hz) * 1M) / hz.
62 : // ^-- ^-- ^-- overflows still possible
63 0 : int64_t units_div_hz = units / hz;
64 0 : int64_t units_rem_hz = units % hz;
65 0 : if (std::abs(units_div_hz) > MAX_S || std::abs(units_rem_hz) > MAX_S) {
66 0 : return OVERFLOW_ERROR;
67 : }
68 0 : int64_t quot_us = units_div_hz * 1000000;
69 0 : int64_t rem_us = (units_rem_hz * 1000000) / hz;
70 0 : if (std::abs(quot_us) > INT64_MAX - std::abs(rem_us)) {
71 0 : return OVERFLOW_ERROR;
72 : }
73 0 : return quot_us + rem_us;
74 : }
75 :
76 : class MPEG4Source : public MediaSource {
77 : public:
78 : MPEG4Source(const sp<MetaData> &format,
79 : uint32_t timeScale,
80 : const sp<SampleTable> &sampleTable);
81 :
82 : sp<MetaData> getFormat() override;
83 :
84 : nsTArray<Indice> exportIndex() override;
85 :
86 : protected:
87 : virtual ~MPEG4Source();
88 :
89 : private:
90 : sp<MetaData> mFormat;
91 : uint32_t mTimescale;
92 : sp<SampleTable> mSampleTable;
93 :
94 : MPEG4Source(const MPEG4Source &) = delete;
95 : MPEG4Source &operator=(const MPEG4Source &) = delete;
96 : };
97 :
98 : // This custom data source wraps an existing one and satisfies requests
99 : // falling entirely within a cached range from the cache while forwarding
100 : // all remaining requests to the wrapped datasource.
101 : // This is used to cache the full sampletable metadata for a single track,
102 : // possibly wrapping multiple times to cover all tracks, i.e.
103 : // Each MPEG4DataSource caches the sampletable metadata for a single track.
104 :
105 : struct MPEG4DataSource : public DataSource {
106 : MPEG4DataSource(const sp<DataSource> &source);
107 :
108 : status_t initCheck() const override;
109 : ssize_t readAt(off64_t offset, void *data, size_t size) override;
110 : status_t getSize(off64_t *size) override;
111 : uint32_t flags() override;
112 :
113 : status_t setCachedRange(off64_t offset, size_t size);
114 :
115 : protected:
116 : virtual ~MPEG4DataSource();
117 :
118 : private:
119 : Mutex mLock;
120 :
121 : sp<DataSource> mSource;
122 : off64_t mCachedOffset;
123 : size_t mCachedSize;
124 : uint8_t *mCache;
125 :
126 : void clearCache();
127 :
128 : MPEG4DataSource(const MPEG4DataSource &) = delete;
129 : MPEG4DataSource &operator=(const MPEG4DataSource &) = delete;
130 : };
131 :
132 0 : MPEG4DataSource::MPEG4DataSource(const sp<DataSource> &source)
133 : : mSource(source),
134 : mCachedOffset(0),
135 : mCachedSize(0),
136 0 : mCache(NULL) {
137 0 : }
138 :
139 0 : MPEG4DataSource::~MPEG4DataSource() {
140 0 : clearCache();
141 0 : }
142 :
143 0 : void MPEG4DataSource::clearCache() {
144 0 : if (mCache) {
145 0 : free(mCache);
146 0 : mCache = NULL;
147 : }
148 :
149 0 : mCachedOffset = 0;
150 0 : mCachedSize = 0;
151 0 : }
152 :
153 0 : status_t MPEG4DataSource::initCheck() const {
154 0 : return mSource->initCheck();
155 : }
156 :
157 0 : ssize_t MPEG4DataSource::readAt(off64_t offset, void *data, size_t size) {
158 0 : Mutex::Autolock autoLock(mLock);
159 :
160 0 : if (offset >= mCachedOffset
161 0 : && offset + size <= mCachedOffset + mCachedSize) {
162 0 : memcpy(data, &mCache[offset - mCachedOffset], size);
163 0 : return size;
164 : }
165 :
166 0 : return mSource->readAt(offset, data, size);
167 : }
168 :
169 0 : status_t MPEG4DataSource::getSize(off64_t *size) {
170 0 : return mSource->getSize(size);
171 : }
172 :
173 0 : uint32_t MPEG4DataSource::flags() {
174 0 : return mSource->flags();
175 : }
176 :
177 0 : status_t MPEG4DataSource::setCachedRange(off64_t offset, size_t size) {
178 0 : Mutex::Autolock autoLock(mLock);
179 :
180 0 : clearCache();
181 :
182 0 : mCache = (uint8_t *)malloc(size);
183 :
184 0 : if (mCache == NULL) {
185 0 : return -ENOMEM;
186 : }
187 :
188 0 : mCachedOffset = offset;
189 0 : mCachedSize = size;
190 :
191 0 : ssize_t err = mSource->readAt(mCachedOffset, mCache, mCachedSize);
192 :
193 0 : if (err < (ssize_t)size) {
194 0 : clearCache();
195 :
196 0 : return ERROR_IO;
197 : }
198 :
199 0 : return OK;
200 : }
201 :
202 : ////////////////////////////////////////////////////////////////////////////////
203 :
204 0 : static void hexdump(const void *_data, size_t size) {
205 0 : const uint8_t *data = (const uint8_t *)_data;
206 0 : size_t offset = 0;
207 0 : while (offset < size) {
208 0 : printf("0x%04x ", static_cast<unsigned int>(offset));
209 :
210 0 : size_t n = size - offset;
211 0 : if (n > 16) {
212 0 : n = 16;
213 : }
214 :
215 0 : for (size_t i = 0; i < 16; ++i) {
216 0 : if (i == 8) {
217 0 : printf(" ");
218 : }
219 :
220 0 : if (offset + i < size) {
221 0 : printf("%02x ", data[offset + i]);
222 : } else {
223 0 : printf(" ");
224 : }
225 : }
226 :
227 0 : printf(" ");
228 :
229 0 : for (size_t i = 0; i < n; ++i) {
230 0 : if (isprint(data[offset + i])) {
231 0 : printf("%c", data[offset + i]);
232 : } else {
233 0 : printf(".");
234 : }
235 : }
236 :
237 0 : printf("\n");
238 :
239 0 : offset += 16;
240 : }
241 0 : }
242 :
243 0 : static const char *FourCC2MIME(uint32_t fourcc) {
244 0 : switch (fourcc) {
245 : case FOURCC('m', 'p', '4', 'a'):
246 0 : return MEDIA_MIMETYPE_AUDIO_AAC;
247 :
248 : case FOURCC('s', 'a', 'm', 'r'):
249 0 : return MEDIA_MIMETYPE_AUDIO_AMR_NB;
250 :
251 : case FOURCC('s', 'a', 'w', 'b'):
252 0 : return MEDIA_MIMETYPE_AUDIO_AMR_WB;
253 :
254 : case FOURCC('.', 'm', 'p', '3'):
255 0 : return MEDIA_MIMETYPE_AUDIO_MPEG;
256 :
257 : case FOURCC('m', 'p', '4', 'v'):
258 0 : return MEDIA_MIMETYPE_VIDEO_MPEG4;
259 :
260 : case FOURCC('s', '2', '6', '3'):
261 : case FOURCC('h', '2', '6', '3'):
262 : case FOURCC('H', '2', '6', '3'):
263 0 : return MEDIA_MIMETYPE_VIDEO_H263;
264 :
265 : case FOURCC('a', 'v', 'c', '1'):
266 : case FOURCC('a', 'v', 'c', '3'):
267 0 : return MEDIA_MIMETYPE_VIDEO_AVC;
268 :
269 : case FOURCC('V', 'P', '6', 'F'):
270 0 : return MEDIA_MIMETYPE_VIDEO_VP6;
271 :
272 : case FOURCC('v', 'p', '0', '9'):
273 0 : return MEDIA_MIMETYPE_VIDEO_VP9;
274 :
275 : default:
276 0 : ALOGE("Unknown MIME type %08x", fourcc);
277 0 : return NULL;
278 : }
279 : }
280 :
281 0 : static bool AdjustChannelsAndRate(uint32_t fourcc, uint32_t *channels, uint32_t *rate) {
282 0 : const char* mime = FourCC2MIME(fourcc);
283 0 : if (!mime) {
284 0 : return false;
285 : }
286 0 : if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_NB, mime)) {
287 : // AMR NB audio is always mono, 8kHz
288 0 : *channels = 1;
289 0 : *rate = 8000;
290 0 : return true;
291 0 : } else if (!strcasecmp(MEDIA_MIMETYPE_AUDIO_AMR_WB, mime)) {
292 : // AMR WB audio is always mono, 16kHz
293 0 : *channels = 1;
294 0 : *rate = 16000;
295 0 : return true;
296 : }
297 0 : return false;
298 : }
299 :
300 0 : MPEG4Extractor::MPEG4Extractor(const sp<DataSource> &source)
301 : : mSidxDuration(0),
302 : mDataSource(source),
303 : mInitCheck(NO_INIT),
304 : mHasVideo(false),
305 : mHeaderTimescale(0),
306 : mFirstTrack(NULL),
307 : mLastTrack(NULL),
308 0 : mFileMetaData(new MetaData),
309 : mFirstSINF(NULL),
310 : mIsDrm(false),
311 0 : mDrmScheme(0)
312 : {
313 0 : }
314 :
315 0 : MPEG4Extractor::~MPEG4Extractor() {
316 0 : Track *track = mFirstTrack;
317 0 : while (track) {
318 0 : Track *next = track->next;
319 :
320 0 : delete track;
321 0 : track = next;
322 : }
323 0 : mFirstTrack = mLastTrack = NULL;
324 :
325 0 : SINF *sinf = mFirstSINF;
326 0 : while (sinf) {
327 0 : SINF *next = sinf->next;
328 0 : delete[] sinf->IPMPData;
329 : delete sinf;
330 0 : sinf = next;
331 : }
332 0 : mFirstSINF = NULL;
333 :
334 0 : for (size_t i = 0; i < mPssh.Length(); i++) {
335 0 : delete [] mPssh[i].data;
336 : }
337 0 : }
338 :
339 0 : uint32_t MPEG4Extractor::flags() const {
340 0 : return CAN_PAUSE | CAN_SEEK_BACKWARD | CAN_SEEK_FORWARD | CAN_SEEK;
341 : }
342 :
343 0 : sp<MetaData> MPEG4Extractor::getMetaData() {
344 : status_t err;
345 0 : if ((err = readMetaData()) != OK) {
346 0 : return NULL;
347 : }
348 :
349 0 : return mFileMetaData;
350 : }
351 :
352 0 : size_t MPEG4Extractor::countTracks() {
353 : status_t err;
354 0 : if ((err = readMetaData()) != OK) {
355 : ALOGV("MPEG4Extractor::countTracks: no tracks");
356 0 : return 0;
357 : }
358 :
359 0 : size_t n = 0;
360 0 : Track *track = mFirstTrack;
361 0 : while (track) {
362 0 : ++n;
363 0 : track = track->next;
364 : }
365 :
366 : ALOGV("MPEG4Extractor::countTracks: %d tracks", n);
367 0 : return n;
368 : }
369 :
370 0 : sp<MetaData> MPEG4Extractor::getTrackMetaData(
371 : size_t index, uint32_t flags) {
372 : status_t err;
373 0 : if ((err = readMetaData()) != OK) {
374 0 : return NULL;
375 : }
376 :
377 0 : Track *track = mFirstTrack;
378 0 : while (index > 0) {
379 0 : if (track == NULL) {
380 0 : return NULL;
381 : }
382 :
383 0 : track = track->next;
384 0 : --index;
385 : }
386 :
387 0 : if (track == NULL) {
388 0 : return NULL;
389 : }
390 :
391 0 : return track->meta;
392 : }
393 :
394 0 : static void MakeFourCCString(uint32_t x, char *s) {
395 0 : s[0] = x >> 24;
396 0 : s[1] = (x >> 16) & 0xff;
397 0 : s[2] = (x >> 8) & 0xff;
398 0 : s[3] = x & 0xff;
399 0 : s[4] = '\0';
400 0 : }
401 :
402 0 : status_t MPEG4Extractor::readMetaData() {
403 0 : if (mInitCheck != NO_INIT) {
404 0 : return mInitCheck;
405 : }
406 :
407 0 : off64_t offset = 0;
408 0 : status_t err = NO_INIT;
409 0 : while (!mFirstTrack) {
410 0 : err = parseChunk(&offset, 0);
411 : // The parseChunk function returns UNKNOWN_ERROR to skip
412 : // some boxes we don't want to handle. Filter that error
413 : // code but return others so e.g. I/O errors propagate.
414 0 : if (err != OK && err != static_cast<status_t>(UNKNOWN_ERROR)) {
415 0 : ALOGW("Error %d parsing chunck at offset %lld looking for first track",
416 0 : err, (long long)offset);
417 0 : break;
418 : }
419 : }
420 :
421 0 : if (mInitCheck == OK) {
422 0 : if (mHasVideo) {
423 0 : mFileMetaData->setCString(
424 0 : kKeyMIMEType, MEDIA_MIMETYPE_CONTAINER_MPEG4);
425 : } else {
426 0 : mFileMetaData->setCString(kKeyMIMEType, "audio/mp4");
427 : }
428 :
429 0 : mInitCheck = OK;
430 : } else {
431 0 : mInitCheck = err;
432 : }
433 :
434 0 : CHECK_NE(err, (status_t)NO_INIT);
435 :
436 : // copy pssh data into file metadata
437 0 : uint64_t psshsize = 0;
438 0 : for (size_t i = 0; i < mPssh.Length(); i++) {
439 0 : psshsize += 20 + mPssh[i].datalen;
440 0 : if (mPssh[i].datalen > kMAX_ALLOCATION - 20 ||
441 : psshsize > kMAX_ALLOCATION) {
442 0 : return ERROR_MALFORMED;
443 : }
444 : }
445 0 : if (psshsize) {
446 0 : char *buf = (char*)malloc(psshsize);
447 0 : if (!buf) {
448 0 : return -ENOMEM;
449 : }
450 0 : char *ptr = buf;
451 0 : for (size_t i = 0; i < mPssh.Length(); i++) {
452 0 : memcpy(ptr, mPssh[i].uuid, 20); // uuid + length
453 0 : memcpy(ptr + 20, mPssh[i].data, mPssh[i].datalen);
454 0 : ptr += (20 + mPssh[i].datalen);
455 : }
456 0 : mFileMetaData->setData(kKeyPssh, 'pssh', buf, psshsize);
457 0 : free(buf);
458 : }
459 0 : return mInitCheck;
460 : }
461 :
462 0 : char* MPEG4Extractor::getDrmTrackInfo(size_t trackID, int *len) {
463 0 : if (mFirstSINF == NULL) {
464 0 : return NULL;
465 : }
466 :
467 0 : SINF *sinf = mFirstSINF;
468 0 : while (sinf && (trackID != sinf->trackID)) {
469 0 : sinf = sinf->next;
470 : }
471 :
472 0 : if (sinf == NULL) {
473 0 : return NULL;
474 : }
475 :
476 0 : *len = sinf->len;
477 0 : return sinf->IPMPData;
478 : }
479 :
480 : // Reads an encoded integer 7 bits at a time until it encounters the high bit clear.
481 0 : static int32_t readSize(off64_t offset,
482 : const sp<DataSource> DataSource, uint8_t *numOfBytes) {
483 0 : uint32_t size = 0;
484 : uint8_t data;
485 0 : bool moreData = true;
486 0 : *numOfBytes = 0;
487 :
488 0 : while (moreData) {
489 0 : if (DataSource->readAt(offset, &data, 1) < 1) {
490 0 : return -1;
491 : }
492 0 : offset ++;
493 0 : moreData = (data >= 128) ? true : false;
494 0 : size = (size << 7) | (data & 0x7f); // Take last 7 bits
495 0 : (*numOfBytes) ++;
496 : }
497 :
498 0 : return size;
499 : }
500 :
501 0 : status_t MPEG4Extractor::parseDrmSINF(off64_t *offset, off64_t data_offset) {
502 : uint8_t updateIdTag;
503 0 : if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
504 0 : return ERROR_IO;
505 : }
506 0 : data_offset ++;
507 :
508 0 : if (0x01/*OBJECT_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
509 0 : return ERROR_MALFORMED;
510 : }
511 :
512 : uint8_t numOfBytes;
513 0 : int32_t size = readSize(data_offset, mDataSource, &numOfBytes);
514 0 : if (size < 0) {
515 0 : return ERROR_IO;
516 : }
517 0 : int32_t classSize = size;
518 0 : data_offset += numOfBytes;
519 :
520 0 : while(size >= 11 ) {
521 : uint8_t descriptorTag;
522 0 : if (mDataSource->readAt(data_offset, &descriptorTag, 1) < 1) {
523 0 : return ERROR_IO;
524 : }
525 0 : data_offset ++;
526 :
527 0 : if (0x11/*OBJECT_DESCRIPTOR_ID_TAG*/ != descriptorTag) {
528 0 : return ERROR_MALFORMED;
529 : }
530 :
531 : uint8_t buffer[8];
532 : //ObjectDescriptorID and ObjectDescriptor url flag
533 0 : if (mDataSource->readAt(data_offset, buffer, 2) < 2) {
534 0 : return ERROR_IO;
535 : }
536 0 : data_offset += 2;
537 :
538 0 : if ((buffer[1] >> 5) & 0x0001) { //url flag is set
539 0 : return ERROR_MALFORMED;
540 : }
541 :
542 0 : if (mDataSource->readAt(data_offset, buffer, 8) < 8) {
543 0 : return ERROR_IO;
544 : }
545 0 : data_offset += 8;
546 :
547 0 : if ((0x0F/*ES_ID_REF_TAG*/ != buffer[1])
548 0 : || ( 0x0A/*IPMP_DESCRIPTOR_POINTER_ID_TAG*/ != buffer[5])) {
549 0 : return ERROR_MALFORMED;
550 : }
551 :
552 0 : SINF *sinf = new SINF;
553 0 : sinf->trackID = U16_AT(&buffer[3]);
554 0 : sinf->IPMPDescriptorID = buffer[7];
555 0 : sinf->next = mFirstSINF;
556 0 : mFirstSINF = sinf;
557 :
558 0 : size -= (8 + 2 + 1);
559 : }
560 :
561 0 : if (size != 0) {
562 0 : return ERROR_MALFORMED;
563 : }
564 :
565 0 : if (mDataSource->readAt(data_offset, &updateIdTag, 1) < 1) {
566 0 : return ERROR_IO;
567 : }
568 0 : data_offset ++;
569 :
570 0 : if(0x05/*IPMP_DESCRIPTOR_UPDATE_ID_TAG*/ != updateIdTag) {
571 0 : return ERROR_MALFORMED;
572 : }
573 :
574 0 : size = readSize(data_offset, mDataSource, &numOfBytes);
575 0 : if (size < 0) {
576 0 : return ERROR_IO;
577 : }
578 0 : classSize = size;
579 0 : data_offset += numOfBytes;
580 :
581 0 : while (size > 0) {
582 : uint8_t tag;
583 : int32_t dataLen;
584 0 : if (mDataSource->readAt(data_offset, &tag, 1) < 1) {
585 0 : return ERROR_IO;
586 : }
587 0 : data_offset ++;
588 :
589 0 : if (0x0B/*IPMP_DESCRIPTOR_ID_TAG*/ == tag) {
590 : uint8_t id;
591 0 : dataLen = readSize(data_offset, mDataSource, &numOfBytes);
592 0 : if (dataLen < 0) {
593 0 : return ERROR_IO;
594 0 : } else if (dataLen < 4) {
595 0 : return ERROR_MALFORMED;
596 : }
597 0 : data_offset += numOfBytes;
598 :
599 0 : if (mDataSource->readAt(data_offset, &id, 1) < 1) {
600 0 : return ERROR_IO;
601 : }
602 0 : data_offset ++;
603 :
604 0 : SINF *sinf = mFirstSINF;
605 0 : while (sinf && (sinf->IPMPDescriptorID != id)) {
606 0 : sinf = sinf->next;
607 : }
608 0 : if (sinf == NULL) {
609 0 : return ERROR_MALFORMED;
610 : }
611 0 : sinf->len = dataLen - 3;
612 0 : sinf->IPMPData = new (fallible) char[sinf->len];
613 0 : if (!sinf->IPMPData) {
614 0 : return -ENOMEM;
615 : }
616 :
617 0 : if (mDataSource->readAt(data_offset + 2, sinf->IPMPData, sinf->len) < sinf->len) {
618 0 : return ERROR_IO;
619 : }
620 0 : data_offset += sinf->len;
621 :
622 0 : size -= (dataLen + numOfBytes + 1);
623 : }
624 : }
625 :
626 0 : if (size != 0) {
627 0 : return ERROR_MALFORMED;
628 : }
629 :
630 0 : return static_cast<status_t>(UNKNOWN_ERROR); // Return a dummy error.
631 : }
632 :
633 : struct PathAdder {
634 0 : PathAdder(nsTArray<uint32_t> *path, uint32_t chunkType)
635 0 : : mPath(path) {
636 0 : mPath->AppendElement(chunkType);
637 0 : }
638 :
639 0 : ~PathAdder() {
640 0 : mPath->RemoveElementAt(mPath->Length() - 1);
641 0 : }
642 :
643 : private:
644 : nsTArray<uint32_t> *mPath;
645 :
646 : PathAdder(const PathAdder &);
647 : PathAdder &operator=(const PathAdder &);
648 : };
649 :
650 0 : static bool underMetaDataPath(const nsTArray<uint32_t> &path) {
651 0 : return path.Length() >= 5
652 0 : && path[0] == FOURCC('m', 'o', 'o', 'v')
653 0 : && path[1] == FOURCC('u', 'd', 't', 'a')
654 0 : && path[2] == FOURCC('m', 'e', 't', 'a')
655 0 : && path[3] == FOURCC('i', 'l', 's', 't');
656 : }
657 :
658 0 : static bool ValidInputSize(int32_t size) {
659 : // Reject compressed samples larger than an uncompressed UHD
660 : // frame. This is a reasonable cut-off for a lossy codec,
661 : // combined with the current Firefox limit to 5k video.
662 0 : return (size > 0 && size <= 4 * (1920 * 1080) * 3 / 2);
663 : }
664 :
665 0 : status_t MPEG4Extractor::parseChunk(off64_t *offset, int depth) {
666 : ALOGV("entering parseChunk %lld/%d", *offset, depth);
667 : uint32_t hdr[2];
668 0 : if (mDataSource->readAt(*offset, hdr, 4) < 4) {
669 0 : return ERROR_IO;
670 : }
671 0 : if (!hdr[0]) {
672 0 : *offset += 4;
673 0 : return OK;
674 : }
675 0 : if (mDataSource->readAt(*offset + 4, hdr + 1, 4) < 4) {
676 0 : return ERROR_IO;
677 : }
678 0 : uint64_t chunk_size = ntohl(hdr[0]);
679 0 : uint32_t chunk_type = ntohl(hdr[1]);
680 0 : off64_t data_offset = *offset + 8;
681 :
682 0 : if (chunk_size == 1) {
683 0 : if (mDataSource->readAt(*offset + 8, &chunk_size, 8) < 8) {
684 0 : return ERROR_IO;
685 : }
686 0 : chunk_size = ntoh64(chunk_size);
687 0 : data_offset += 8;
688 :
689 0 : if (chunk_size < 16) {
690 : // The smallest valid chunk is 16 bytes long in this case.
691 0 : return ERROR_MALFORMED;
692 : }
693 0 : } else if (chunk_size < 8) {
694 : // The smallest valid chunk is 8 bytes long.
695 0 : return ERROR_MALFORMED;
696 : }
697 :
698 0 : if (chunk_size >= kMAX_ALLOCATION) {
699 : // Could cause an overflow later. Abort.
700 0 : return ERROR_MALFORMED;
701 : }
702 :
703 : char chunk[5];
704 0 : MakeFourCCString(chunk_type, chunk);
705 : ALOGV("chunk: %s @ %lld, %d", chunk, *offset, depth);
706 :
707 : #if 0
708 : static const char kWhitespace[] = " ";
709 : const char *indent = &kWhitespace[sizeof(kWhitespace) - 1 - 2 * depth];
710 : printf("%sfound chunk '%s' of size %lld\n", indent, chunk, chunk_size);
711 :
712 : char buffer[256];
713 : size_t n = chunk_size;
714 : if (n > sizeof(buffer)) {
715 : n = sizeof(buffer);
716 : }
717 : if (mDataSource->readAt(*offset, buffer, n)
718 : < (ssize_t)n) {
719 : return ERROR_IO;
720 : }
721 :
722 : hexdump(buffer, n);
723 : #endif
724 :
725 0 : PathAdder autoAdder(&mPath, chunk_type);
726 :
727 0 : off64_t chunk_data_size = *offset + chunk_size - data_offset;
728 :
729 0 : if (chunk_type != FOURCC('c', 'p', 'r', 't')
730 0 : && chunk_type != FOURCC('c', 'o', 'v', 'r')
731 0 : && mPath.Length() == 5 && underMetaDataPath(mPath)) {
732 0 : off64_t stop_offset = *offset + chunk_size;
733 0 : *offset = data_offset;
734 0 : while (*offset < stop_offset) {
735 0 : status_t err = parseChunk(offset, depth + 1);
736 0 : if (err != OK) {
737 0 : return err;
738 : }
739 : }
740 :
741 0 : if (*offset != stop_offset) {
742 0 : return ERROR_MALFORMED;
743 : }
744 :
745 0 : return OK;
746 : }
747 :
748 0 : switch(chunk_type) {
749 : case FOURCC('m', 'o', 'o', 'v'):
750 : case FOURCC('t', 'r', 'a', 'k'):
751 : case FOURCC('m', 'd', 'i', 'a'):
752 : case FOURCC('m', 'i', 'n', 'f'):
753 : case FOURCC('d', 'i', 'n', 'f'):
754 : case FOURCC('s', 't', 'b', 'l'):
755 : case FOURCC('m', 'v', 'e', 'x'):
756 : case FOURCC('m', 'o', 'o', 'f'):
757 : case FOURCC('t', 'r', 'a', 'f'):
758 : case FOURCC('m', 'f', 'r', 'a'):
759 : case FOURCC('i', 'l', 's', 't'):
760 : case FOURCC('s', 'i', 'n', 'f'):
761 : case FOURCC('s', 'c', 'h', 'i'):
762 : case FOURCC('e', 'd', 't', 's'):
763 : {
764 0 : if (chunk_type == FOURCC('s', 't', 'b', 'l')) {
765 : ALOGV("sampleTable chunk is %d bytes long.", (size_t)chunk_size);
766 :
767 0 : if (mDataSource->flags()
768 0 : & (DataSource::kWantsPrefetching
769 : | DataSource::kIsCachingDataSource)) {
770 : sp<MPEG4DataSource> cachedSource =
771 0 : new MPEG4DataSource(mDataSource);
772 :
773 0 : if (cachedSource->setCachedRange(*offset, chunk_size) == OK) {
774 0 : mDataSource = cachedSource;
775 : }
776 : }
777 :
778 0 : if (!mLastTrack) {
779 0 : return ERROR_MALFORMED;
780 : }
781 0 : mLastTrack->sampleTable = new SampleTable(mDataSource);
782 : }
783 :
784 0 : bool isTrack = false;
785 0 : if (chunk_type == FOURCC('t', 'r', 'a', 'k')) {
786 0 : isTrack = true;
787 :
788 0 : Track *track = new Track;
789 0 : track->next = NULL;
790 0 : if (mLastTrack) {
791 0 : mLastTrack->next = track;
792 : } else {
793 0 : mFirstTrack = track;
794 : }
795 0 : mLastTrack = track;
796 :
797 0 : track->meta = new MetaData;
798 0 : track->includes_expensive_metadata = false;
799 0 : track->skipTrack = false;
800 0 : track->timescale = 0;
801 0 : track->empty_duration = 0;
802 0 : track->segment_duration = 0;
803 0 : track->media_time = 0;
804 0 : track->meta->setCString(kKeyMIMEType, "application/octet-stream");
805 : }
806 :
807 0 : off64_t stop_offset = *offset + chunk_size;
808 0 : *offset = data_offset;
809 0 : while (*offset < stop_offset) {
810 0 : status_t err = parseChunk(offset, depth + 1);
811 0 : if (err != OK) {
812 0 : return err;
813 : }
814 : }
815 :
816 0 : if (*offset != stop_offset) {
817 0 : return ERROR_MALFORMED;
818 : }
819 :
820 0 : if (isTrack) {
821 0 : if (mLastTrack->skipTrack) {
822 0 : Track *cur = mFirstTrack;
823 :
824 0 : if (cur == mLastTrack) {
825 0 : delete cur;
826 0 : mFirstTrack = mLastTrack = NULL;
827 : } else {
828 0 : while (cur && cur->next != mLastTrack) {
829 0 : cur = cur->next;
830 : }
831 0 : cur->next = NULL;
832 0 : delete mLastTrack;
833 0 : mLastTrack = cur;
834 : }
835 :
836 0 : return OK;
837 : }
838 :
839 0 : status_t err = verifyTrack(mLastTrack);
840 :
841 0 : if (err != OK) {
842 0 : return err;
843 : }
844 0 : } else if (chunk_type == FOURCC('m', 'o', 'o', 'v')) {
845 0 : mInitCheck = OK;
846 :
847 0 : if (!mIsDrm) {
848 0 : return static_cast<status_t>(UNKNOWN_ERROR); // Return a dummy error.
849 : } else {
850 0 : return OK;
851 : }
852 : }
853 0 : break;
854 : }
855 :
856 : case FOURCC('e', 'l', 's', 't'):
857 : {
858 : // See 14496-12 8.6.6
859 : uint8_t version;
860 0 : if (mDataSource->readAt(data_offset, &version, 1) < 1) {
861 0 : return ERROR_IO;
862 : }
863 :
864 : uint32_t entry_count;
865 0 : if (!mDataSource->getUInt32(data_offset + 4, &entry_count)) {
866 0 : return ERROR_IO;
867 : }
868 :
869 0 : off64_t entriesoffset = data_offset + 8;
870 0 : bool nonEmptyCount = false;
871 0 : for (uint32_t i = 0; i < entry_count; i++) {
872 0 : if (mHeaderTimescale == 0) {
873 0 : ALOGW("ignoring edit list because timescale is 0");
874 0 : break;
875 : }
876 0 : if (entriesoffset - data_offset > chunk_size) {
877 0 : ALOGW("invalid edit list size");
878 0 : break;
879 : }
880 : uint64_t segment_duration;
881 : int64_t media_time;
882 0 : if (version == 1) {
883 0 : if (!mDataSource->getUInt64(entriesoffset, &segment_duration) ||
884 0 : !mDataSource->getUInt64(entriesoffset + 8, (uint64_t*)&media_time)) {
885 0 : return ERROR_IO;
886 : }
887 0 : entriesoffset += 16;
888 0 : } else if (version == 0) {
889 : uint32_t sd;
890 : int32_t mt;
891 0 : if (!mDataSource->getUInt32(entriesoffset, &sd) ||
892 0 : !mDataSource->getUInt32(entriesoffset + 4, (uint32_t*)&mt)) {
893 0 : return ERROR_IO;
894 : }
895 0 : entriesoffset += 8;
896 0 : segment_duration = sd;
897 0 : media_time = mt;
898 : } else {
899 0 : return ERROR_IO;
900 : }
901 0 : entriesoffset += 4; // ignore media_rate_integer and media_rate_fraction.
902 0 : if (media_time == -1 && i) {
903 0 : ALOGW("ignoring invalid empty edit");
904 0 : break;
905 0 : } else if (media_time == -1) {
906 : // Starting offsets for tracks (streams) are represented by an initial empty edit.
907 0 : if (!mLastTrack) {
908 0 : return ERROR_MALFORMED;
909 : }
910 0 : mLastTrack->empty_duration = segment_duration;
911 0 : continue;
912 0 : } else if (nonEmptyCount) {
913 : // we only support a single non-empty entry at the moment, for gapless playback
914 0 : ALOGW("multiple edit list entries, A/V sync will be wrong");
915 0 : break;
916 : } else {
917 0 : nonEmptyCount = true;
918 : }
919 :
920 0 : if (!mLastTrack) {
921 0 : return ERROR_MALFORMED;
922 : }
923 0 : mLastTrack->segment_duration = segment_duration;
924 0 : mLastTrack->media_time = media_time;
925 : }
926 0 : storeEditList();
927 0 : *offset += chunk_size;
928 0 : break;
929 : }
930 :
931 : case FOURCC('f', 'r', 'm', 'a'):
932 : {
933 : uint32_t original_fourcc;
934 0 : if (mDataSource->readAt(data_offset, &original_fourcc, 4) < 4) {
935 0 : return ERROR_IO;
936 : }
937 0 : original_fourcc = ntohl(original_fourcc);
938 : ALOGV("read original format: %d", original_fourcc);
939 0 : if (!mLastTrack) {
940 0 : return ERROR_MALFORMED;
941 : }
942 0 : const char* mime = FourCC2MIME(original_fourcc);
943 0 : if (!mime) {
944 0 : return ERROR_UNSUPPORTED;
945 : }
946 0 : mLastTrack->meta->setCString(kKeyMIMEType, mime);
947 0 : uint32_t num_channels = 0;
948 0 : uint32_t sample_rate = 0;
949 0 : if (AdjustChannelsAndRate(original_fourcc, &num_channels, &sample_rate)) {
950 0 : mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
951 0 : mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
952 : }
953 0 : *offset += chunk_size;
954 0 : break;
955 : }
956 :
957 : case FOURCC('s', 'c', 'h', 'm'):
958 : {
959 0 : if (!mDataSource->getUInt32(data_offset, &mDrmScheme)) {
960 0 : return ERROR_IO;
961 : }
962 :
963 0 : *offset += chunk_size;
964 0 : break;
965 : }
966 :
967 : case FOURCC('t', 'e', 'n', 'c'):
968 : {
969 0 : if (chunk_size < 32) {
970 0 : return ERROR_MALFORMED;
971 : }
972 :
973 : // tenc box contains 1 byte version, 3 byte flags, 3 byte default algorithm id, one byte
974 : // default IV size, 16 bytes default KeyID
975 : // (ISO 23001-7)
976 : char buf[4];
977 0 : memset(buf, 0, 4);
978 0 : if (mDataSource->readAt(data_offset + 4, buf + 1, 3) < 3) {
979 0 : return ERROR_IO;
980 : }
981 0 : uint32_t defaultAlgorithmId = ntohl(*((int32_t*)buf));
982 0 : if (defaultAlgorithmId > 1) {
983 : // only 0 (clear) and 1 (AES-128) are valid
984 0 : return ERROR_MALFORMED;
985 : }
986 :
987 0 : memset(buf, 0, 4);
988 0 : if (mDataSource->readAt(data_offset + 7, buf + 3, 1) < 1) {
989 0 : return ERROR_IO;
990 : }
991 0 : uint32_t defaultIVSize = ntohl(*((int32_t*)buf));
992 :
993 0 : if ((defaultAlgorithmId == 0 && defaultIVSize != 0) ||
994 0 : (defaultAlgorithmId != 0 && defaultIVSize == 0)) {
995 : // only unencrypted data must have 0 IV size
996 0 : return ERROR_MALFORMED;
997 0 : } else if (defaultIVSize != 0 &&
998 0 : defaultIVSize != 8 &&
999 : defaultIVSize != 16) {
1000 : // only supported sizes are 0, 8 and 16
1001 0 : return ERROR_MALFORMED;
1002 : }
1003 :
1004 : uint8_t defaultKeyId[16];
1005 :
1006 0 : if (mDataSource->readAt(data_offset + 8, &defaultKeyId, 16) < 16) {
1007 0 : return ERROR_IO;
1008 : }
1009 :
1010 0 : if (!mLastTrack) {
1011 0 : return ERROR_MALFORMED;
1012 : }
1013 0 : mLastTrack->meta->setInt32(kKeyCryptoMode, defaultAlgorithmId);
1014 0 : mLastTrack->meta->setInt32(kKeyCryptoDefaultIVSize, defaultIVSize);
1015 0 : mLastTrack->meta->setData(kKeyCryptoKey, 'tenc', defaultKeyId, 16);
1016 0 : *offset += chunk_size;
1017 0 : break;
1018 : }
1019 :
1020 : case FOURCC('t', 'k', 'h', 'd'):
1021 : {
1022 : status_t err;
1023 0 : if ((err = parseTrackHeader(data_offset, chunk_data_size)) != OK) {
1024 0 : return err;
1025 : }
1026 :
1027 0 : *offset += chunk_size;
1028 0 : break;
1029 : }
1030 :
1031 : case FOURCC('p', 's', 's', 'h'):
1032 : {
1033 : PsshInfo pssh;
1034 :
1035 : // We need the contents of the box header before data_offset. Make
1036 : // sure we don't underflow somehow.
1037 0 : CHECK(data_offset >= 8);
1038 :
1039 0 : uint32_t version = 0;
1040 0 : if (mDataSource->readAt(data_offset, &version, 4) < 4) {
1041 0 : return ERROR_IO;
1042 : }
1043 :
1044 0 : if (mDataSource->readAt(data_offset + 4, &pssh.uuid, 16) < 16) {
1045 0 : return ERROR_IO;
1046 : }
1047 :
1048 : // Copy the contents of the box (including header) verbatim.
1049 0 : pssh.datalen = chunk_data_size + 8;
1050 0 : pssh.data = new (fallible) uint8_t[pssh.datalen];
1051 0 : if (!pssh.data) {
1052 0 : return -ENOMEM;
1053 : }
1054 0 : if (mDataSource->readAt(data_offset - 8, pssh.data, pssh.datalen) < pssh.datalen) {
1055 0 : return ERROR_IO;
1056 : }
1057 :
1058 0 : mPssh.AppendElement(pssh);
1059 :
1060 0 : *offset += chunk_size;
1061 0 : break;
1062 : }
1063 :
1064 : case FOURCC('m', 'd', 'h', 'd'):
1065 : {
1066 0 : if (chunk_data_size < 4) {
1067 0 : return ERROR_MALFORMED;
1068 : }
1069 :
1070 : uint8_t version;
1071 0 : if (mDataSource->readAt(
1072 0 : data_offset, &version, sizeof(version))
1073 : < (ssize_t)sizeof(version)) {
1074 0 : return ERROR_IO;
1075 : }
1076 :
1077 : off64_t timescale_offset;
1078 :
1079 0 : if (version == 1) {
1080 0 : timescale_offset = data_offset + 4 + 16;
1081 0 : } else if (version == 0) {
1082 0 : timescale_offset = data_offset + 4 + 8;
1083 : } else {
1084 0 : return ERROR_IO;
1085 : }
1086 :
1087 : uint32_t timescale;
1088 0 : if (mDataSource->readAt(
1089 0 : timescale_offset, ×cale, sizeof(timescale))
1090 : < (ssize_t)sizeof(timescale)) {
1091 0 : return ERROR_IO;
1092 : }
1093 :
1094 0 : if (!mLastTrack) {
1095 0 : return ERROR_MALFORMED;
1096 : }
1097 0 : mLastTrack->timescale = ntohl(timescale);
1098 0 : if (!mLastTrack->timescale) {
1099 0 : return ERROR_MALFORMED;
1100 : }
1101 :
1102 : // Now that we've parsed the media timescale, we can interpret
1103 : // the edit list data.
1104 0 : storeEditList();
1105 :
1106 0 : int64_t duration = 0;
1107 0 : if (version == 1) {
1108 0 : if (mDataSource->readAt(
1109 0 : timescale_offset + 4, &duration, sizeof(duration))
1110 : < (ssize_t)sizeof(duration)) {
1111 0 : return ERROR_IO;
1112 : }
1113 : // Avoid duration sets to -1, which is incorrect.
1114 0 : if (duration != -1) {
1115 0 : duration = ntoh64(duration);
1116 : } else {
1117 0 : duration = 0;
1118 : }
1119 : } else {
1120 : uint32_t duration32;
1121 0 : if (mDataSource->readAt(
1122 0 : timescale_offset + 4, &duration32, sizeof(duration32))
1123 : < (ssize_t)sizeof(duration32)) {
1124 0 : return ERROR_IO;
1125 : }
1126 : // ffmpeg sets duration to -1, which is incorrect.
1127 0 : if (duration32 != 0xffffffff) {
1128 0 : duration = ntohl(duration32);
1129 : } else {
1130 0 : duration = 0;
1131 : }
1132 : }
1133 0 : if (duration < 0) {
1134 0 : return ERROR_MALFORMED;
1135 : }
1136 0 : int64_t duration_us = unitsToUs(duration, mLastTrack->timescale);
1137 0 : if (duration_us == OVERFLOW_ERROR) {
1138 0 : return ERROR_MALFORMED;
1139 : }
1140 0 : mLastTrack->meta->setInt64(kKeyDuration, duration_us);
1141 :
1142 : uint8_t lang[2];
1143 : off64_t lang_offset;
1144 0 : if (version == 1) {
1145 0 : lang_offset = timescale_offset + 4 + 8;
1146 0 : } else if (version == 0) {
1147 0 : lang_offset = timescale_offset + 4 + 4;
1148 : } else {
1149 0 : return ERROR_IO;
1150 : }
1151 :
1152 0 : if (mDataSource->readAt(lang_offset, &lang, sizeof(lang))
1153 : < (ssize_t)sizeof(lang)) {
1154 0 : return ERROR_IO;
1155 : }
1156 :
1157 : // To get the ISO-639-2/T three character language code
1158 : // 1 bit pad followed by 3 5-bits characters. Each character
1159 : // is packed as the difference between its ASCII value and 0x60.
1160 : char lang_code[4];
1161 0 : lang_code[0] = ((lang[0] >> 2) & 0x1f) + 0x60;
1162 0 : lang_code[1] = ((lang[0] & 0x3) << 3 | (lang[1] >> 5)) + 0x60;
1163 0 : lang_code[2] = (lang[1] & 0x1f) + 0x60;
1164 0 : lang_code[3] = '\0';
1165 :
1166 0 : mLastTrack->meta->setCString(
1167 0 : kKeyMediaLanguage, lang_code);
1168 :
1169 0 : *offset += chunk_size;
1170 0 : break;
1171 : }
1172 :
1173 : case FOURCC('s', 't', 's', 'd'):
1174 : {
1175 0 : if (chunk_data_size < 8) {
1176 0 : return ERROR_MALFORMED;
1177 : }
1178 :
1179 : uint8_t buffer[8];
1180 0 : if (chunk_data_size < (off64_t)sizeof(buffer)) {
1181 0 : return ERROR_MALFORMED;
1182 : }
1183 :
1184 0 : if (mDataSource->readAt(
1185 0 : data_offset, buffer, 8) < 8) {
1186 0 : return ERROR_IO;
1187 : }
1188 :
1189 0 : if (U32_AT(buffer) != 0) {
1190 : // Should be version 0, flags 0.
1191 0 : return ERROR_MALFORMED;
1192 : }
1193 :
1194 0 : uint32_t entry_count = U32_AT(&buffer[4]);
1195 :
1196 0 : if (entry_count > 1) {
1197 : // For 3GPP timed text, there could be multiple tx3g boxes contain
1198 : // multiple text display formats. These formats will be used to
1199 : // display the timed text.
1200 : // For encrypted files, there may also be more than one entry.
1201 : const char *mime;
1202 0 : if (!mLastTrack) {
1203 0 : return ERROR_MALFORMED;
1204 : }
1205 0 : CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1206 0 : if (strcasecmp(mime, MEDIA_MIMETYPE_TEXT_3GPP) &&
1207 0 : strcasecmp(mime, "application/octet-stream")) {
1208 : // For now we only support a single type of media per track.
1209 0 : mLastTrack->skipTrack = true;
1210 0 : *offset += chunk_size;
1211 0 : break;
1212 : }
1213 : }
1214 0 : off64_t stop_offset = *offset + chunk_size;
1215 0 : *offset = data_offset + 8;
1216 0 : for (uint32_t i = 0; i < entry_count; ++i) {
1217 0 : status_t err = parseChunk(offset, depth + 1);
1218 0 : if (err != OK) {
1219 0 : return err;
1220 : }
1221 : }
1222 :
1223 : // Some muxers add some padding after the stsd content. Skip it.
1224 0 : *offset = stop_offset;
1225 0 : break;
1226 : }
1227 :
1228 : case FOURCC('m', 'p', '4', 'a'):
1229 : case FOURCC('.', 'm', 'p', '3'):
1230 : case FOURCC('e', 'n', 'c', 'a'):
1231 : case FOURCC('s', 'a', 'm', 'r'):
1232 : case FOURCC('s', 'a', 'w', 'b'):
1233 : {
1234 : // QT's MP4 may have an empty MP4A atom within a MP4A atom.
1235 : // Ignore it.
1236 0 : if (chunk_data_size == 4) {
1237 0 : *offset += chunk_size;
1238 0 : break;
1239 : }
1240 : uint8_t buffer[8 + 20];
1241 0 : if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1242 : // Basic AudioSampleEntry size.
1243 0 : return ERROR_MALFORMED;
1244 : }
1245 :
1246 0 : if (mDataSource->readAt(
1247 0 : data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1248 0 : return ERROR_IO;
1249 : }
1250 :
1251 0 : uint16_t data_ref_index = U16_AT(&buffer[6]);
1252 0 : uint16_t qt_version = U16_AT(&buffer[8]);
1253 0 : uint32_t num_channels = U16_AT(&buffer[16]);
1254 :
1255 0 : uint16_t sample_size = U16_AT(&buffer[18]);
1256 0 : uint32_t sample_rate = U32_AT(&buffer[24]) >> 16;
1257 :
1258 0 : if (!mLastTrack) {
1259 0 : return ERROR_MALFORMED;
1260 : }
1261 0 : if (chunk_type != FOURCC('e', 'n', 'c', 'a')) {
1262 : // if the chunk type is enca, we'll get the type from the sinf/frma box later
1263 0 : const char* mime = FourCC2MIME(chunk_type);
1264 0 : if (!mime) {
1265 0 : return ERROR_UNSUPPORTED;
1266 : }
1267 0 : mLastTrack->meta->setCString(kKeyMIMEType, mime);
1268 0 : AdjustChannelsAndRate(chunk_type, &num_channels, &sample_rate);
1269 : }
1270 :
1271 0 : uint64_t skip = 0;
1272 0 : if (qt_version == 1) {
1273 : // Skip QTv1 extension
1274 : // uint32_t SamplesPerPacket
1275 : // uint32_t BytesPerPacket
1276 : // uint32_t BytesPerFrame
1277 : // uint32_t BytesPerSample
1278 0 : skip = 16;
1279 0 : } else if (qt_version == 2) {
1280 : // Skip QTv2 extension
1281 : // uint32_t Qt V2 StructSize
1282 : // double SampleRate
1283 : // uint32_t ChannelCount
1284 : // uint32_t Reserved
1285 : // uint32_t BitsPerChannel
1286 : // uint32_t LPCMFormatSpecificFlags
1287 : // uint32_t BytesPerAudioPacket
1288 : // uint32_t LPCMFramesPerAudioPacket
1289 : // if (Qt V2 StructSize > 72) {
1290 : // StructSize-72: Qt V2 extension
1291 : // }
1292 : uint32_t structSize32;
1293 0 : if (mDataSource->readAt(
1294 0 : data_offset + 28, &structSize32, sizeof(structSize32))
1295 : < (ssize_t)sizeof(structSize32)) {
1296 0 : return ERROR_IO;
1297 : }
1298 0 : uint32_t structSize = ntohl(structSize32);
1299 : // Read SampleRate.
1300 : uint64_t sample_rate64;
1301 0 : if (mDataSource->readAt(
1302 0 : data_offset + 32, &sample_rate64, sizeof(sample_rate64))
1303 : < (ssize_t)sizeof(sample_rate64)) {
1304 0 : return ERROR_IO;
1305 : }
1306 0 : uint64_t i_value = ntoh64(sample_rate64);
1307 0 : void* v_value = reinterpret_cast<void*>(&i_value);
1308 0 : sample_rate = uint32_t(*reinterpret_cast<double*>(v_value));
1309 : // Read ChannelCount.
1310 : uint32_t channel_count32;
1311 0 : if (mDataSource->readAt(
1312 0 : data_offset + 40, &channel_count32, sizeof(channel_count32))
1313 : < (ssize_t)sizeof(channel_count32)) {
1314 0 : return ERROR_IO;
1315 : }
1316 0 : num_channels = ntohl(channel_count32);
1317 :
1318 0 : skip += 36;
1319 0 : if (structSize > 72) {
1320 0 : skip += structSize - 72;
1321 : }
1322 : }
1323 : ALOGV("*** coding='%s' %d channels, size %d, rate %d\n",
1324 : chunk, num_channels, sample_size, sample_rate);
1325 0 : mLastTrack->meta->setInt32(kKeyChannelCount, num_channels);
1326 0 : mLastTrack->meta->setInt32(kKeySampleSize, sample_size);
1327 0 : mLastTrack->meta->setInt32(kKeySampleRate, sample_rate);
1328 :
1329 0 : off64_t stop_offset = *offset + chunk_size;
1330 0 : *offset = data_offset + sizeof(buffer) + skip;
1331 0 : while (*offset < stop_offset) {
1332 0 : status_t err = parseChunk(offset, depth + 1);
1333 0 : if (err != OK) {
1334 0 : return err;
1335 : }
1336 : }
1337 :
1338 0 : if (*offset != stop_offset) {
1339 0 : return ERROR_MALFORMED;
1340 : }
1341 0 : break;
1342 : }
1343 :
1344 : case FOURCC('m', 'p', '4', 'v'):
1345 : case FOURCC('e', 'n', 'c', 'v'):
1346 : case FOURCC('s', '2', '6', '3'):
1347 : case FOURCC('H', '2', '6', '3'):
1348 : case FOURCC('h', '2', '6', '3'):
1349 : case FOURCC('a', 'v', 'c', '1'):
1350 : case FOURCC('a', 'v', 'c', '3'):
1351 : case FOURCC('V', 'P', '6', 'F'):
1352 : {
1353 0 : mHasVideo = true;
1354 :
1355 : uint8_t buffer[78];
1356 0 : if (chunk_data_size < (ssize_t)sizeof(buffer)) {
1357 : // Basic VideoSampleEntry size.
1358 0 : return ERROR_MALFORMED;
1359 : }
1360 :
1361 0 : if (mDataSource->readAt(
1362 0 : data_offset, buffer, sizeof(buffer)) < (ssize_t)sizeof(buffer)) {
1363 0 : return ERROR_IO;
1364 : }
1365 :
1366 0 : uint16_t data_ref_index = U16_AT(&buffer[6]);
1367 0 : uint16_t width = U16_AT(&buffer[6 + 18]);
1368 0 : uint16_t height = U16_AT(&buffer[6 + 20]);
1369 :
1370 : // The video sample is not standard-compliant if it has invalid dimension.
1371 : // Use some default width and height value, and
1372 : // let the decoder figure out the actual width and height (and thus
1373 : // be prepared for INFO_FOMRAT_CHANGED event).
1374 0 : if (width == 0) width = 352;
1375 0 : if (height == 0) height = 288;
1376 :
1377 : // printf("*** coding='%s' width=%d height=%d\n",
1378 : // chunk, width, height);
1379 :
1380 0 : if (!mLastTrack) {
1381 0 : return ERROR_MALFORMED;
1382 : }
1383 0 : if (chunk_type != FOURCC('e', 'n', 'c', 'v')) {
1384 : // if the chunk type is encv, we'll get the type from the sinf/frma box later
1385 0 : const char* mime = FourCC2MIME(chunk_type);
1386 0 : if (!mime) {
1387 0 : return ERROR_UNSUPPORTED;
1388 : }
1389 0 : mLastTrack->meta->setCString(kKeyMIMEType, mime);
1390 : }
1391 0 : mLastTrack->meta->setInt32(kKeyWidth, width);
1392 0 : mLastTrack->meta->setInt32(kKeyHeight, height);
1393 :
1394 0 : off64_t stop_offset = *offset + chunk_size;
1395 0 : *offset = data_offset + sizeof(buffer);
1396 0 : while (*offset < stop_offset) {
1397 0 : status_t err = parseChunk(offset, depth + 1);
1398 0 : if (err != OK) {
1399 0 : return err;
1400 : }
1401 : // Some Apple QuickTime muxed videos appear to have some padding.
1402 : // Ignore it and assume we've reached the end.
1403 0 : if (stop_offset - *offset < 8) {
1404 0 : *offset = stop_offset;
1405 : }
1406 : }
1407 :
1408 0 : if (*offset != stop_offset) {
1409 0 : return ERROR_MALFORMED;
1410 : }
1411 0 : break;
1412 : }
1413 :
1414 : case FOURCC('s', 't', 'c', 'o'):
1415 : case FOURCC('c', 'o', '6', '4'):
1416 : {
1417 0 : if (!mLastTrack || !mLastTrack->sampleTable.get()) {
1418 0 : return ERROR_MALFORMED;
1419 : }
1420 : status_t err =
1421 0 : mLastTrack->sampleTable->setChunkOffsetParams(
1422 0 : chunk_type, data_offset, chunk_data_size);
1423 :
1424 0 : if (err != OK) {
1425 0 : return err;
1426 : }
1427 :
1428 0 : *offset += chunk_size;
1429 0 : break;
1430 : }
1431 :
1432 : case FOURCC('s', 't', 's', 'c'):
1433 : {
1434 0 : if (!mLastTrack || !mLastTrack->sampleTable.get()) {
1435 0 : return ERROR_MALFORMED;
1436 : }
1437 : status_t err =
1438 0 : mLastTrack->sampleTable->setSampleToChunkParams(
1439 0 : data_offset, chunk_data_size);
1440 :
1441 0 : if (err != OK) {
1442 0 : return err;
1443 : }
1444 :
1445 0 : *offset += chunk_size;
1446 0 : break;
1447 : }
1448 :
1449 : case FOURCC('s', 't', 's', 'z'):
1450 : case FOURCC('s', 't', 'z', '2'):
1451 : {
1452 0 : if (!mLastTrack || !mLastTrack->sampleTable.get()) {
1453 0 : return ERROR_MALFORMED;
1454 : }
1455 : status_t err =
1456 0 : mLastTrack->sampleTable->setSampleSizeParams(
1457 0 : chunk_type, data_offset, chunk_data_size);
1458 :
1459 0 : if (err != OK) {
1460 0 : return err;
1461 : }
1462 :
1463 : size_t max_size;
1464 0 : err = mLastTrack->sampleTable->getMaxSampleSize(&max_size);
1465 :
1466 0 : if (err != OK) {
1467 0 : return err;
1468 : }
1469 :
1470 0 : if (max_size != 0) {
1471 : // Assume that a given buffer only contains at most 10 chunks,
1472 : // each chunk originally prefixed with a 2 byte length will
1473 : // have a 4 byte header (0x00 0x00 0x00 0x01) after conversion,
1474 : // and thus will grow by 2 bytes per chunk.
1475 0 : mLastTrack->meta->setInt32(kKeyMaxInputSize, max_size + 10 * 2);
1476 : } else {
1477 : // No size was specified. Pick a conservatively large size.
1478 : int32_t width, height;
1479 0 : if (mLastTrack->meta->findInt32(kKeyWidth, &width) &&
1480 0 : mLastTrack->meta->findInt32(kKeyHeight, &height)) {
1481 0 : mLastTrack->meta->setInt32(kKeyMaxInputSize, width * height * 3 / 2);
1482 : } else {
1483 : ALOGV("No width or height, assuming worst case 1080p");
1484 0 : mLastTrack->meta->setInt32(kKeyMaxInputSize, 3110400);
1485 : }
1486 : }
1487 0 : *offset += chunk_size;
1488 :
1489 : // Calculate average frame rate.
1490 : const char *mime;
1491 0 : CHECK(mLastTrack->meta->findCString(kKeyMIMEType, &mime));
1492 0 : if (!strncasecmp("video/", mime, 6)) {
1493 0 : size_t nSamples = mLastTrack->sampleTable->countSamples();
1494 : int64_t durationUs;
1495 0 : if (mLastTrack->meta->findInt64(kKeyDuration, &durationUs)) {
1496 0 : if (durationUs > 0) {
1497 0 : int32_t frameRate = (nSamples * 1000000LL +
1498 0 : (durationUs >> 1)) / durationUs;
1499 0 : mLastTrack->meta->setInt32(kKeyFrameRate, frameRate);
1500 : }
1501 : }
1502 : }
1503 :
1504 0 : break;
1505 : }
1506 :
1507 : case FOURCC('s', 't', 't', 's'):
1508 : {
1509 0 : if (!mLastTrack || !mLastTrack->sampleTable.get()) {
1510 0 : return ERROR_MALFORMED;
1511 : }
1512 : status_t err =
1513 0 : mLastTrack->sampleTable->setTimeToSampleParams(
1514 0 : data_offset, chunk_data_size);
1515 :
1516 0 : if (err != OK) {
1517 0 : return err;
1518 : }
1519 :
1520 0 : *offset += chunk_size;
1521 0 : break;
1522 : }
1523 :
1524 : case FOURCC('c', 't', 't', 's'):
1525 : {
1526 0 : if (!mLastTrack || !mLastTrack->sampleTable.get()) {
1527 0 : return ERROR_MALFORMED;
1528 : }
1529 : status_t err =
1530 0 : mLastTrack->sampleTable->setCompositionTimeToSampleParams(
1531 0 : data_offset, chunk_data_size);
1532 :
1533 0 : if (err != OK) {
1534 0 : return err;
1535 : }
1536 :
1537 0 : *offset += chunk_size;
1538 0 : break;
1539 : }
1540 :
1541 : case FOURCC('s', 't', 's', 's'):
1542 : {
1543 0 : if (!mLastTrack || !mLastTrack->sampleTable.get()) {
1544 0 : return ERROR_MALFORMED;
1545 : }
1546 : status_t err =
1547 0 : mLastTrack->sampleTable->setSyncSampleParams(
1548 0 : data_offset, chunk_data_size);
1549 :
1550 0 : if (err != OK) {
1551 0 : return err;
1552 : }
1553 :
1554 0 : *offset += chunk_size;
1555 0 : break;
1556 : }
1557 :
1558 : case FOURCC('s', 'a', 'i', 'z'):
1559 : {
1560 0 : if (!mLastTrack || !mLastTrack->sampleTable.get()) {
1561 0 : return ERROR_MALFORMED;
1562 : }
1563 : status_t err =
1564 0 : mLastTrack->sampleTable->setSampleAuxiliaryInformationSizeParams(
1565 0 : data_offset, chunk_data_size, mDrmScheme);
1566 :
1567 0 : if (err != OK) {
1568 0 : return err;
1569 : }
1570 :
1571 0 : *offset += chunk_size;
1572 0 : break;
1573 : }
1574 :
1575 : case FOURCC('s', 'a', 'i', 'o'):
1576 : {
1577 0 : if (!mLastTrack || !mLastTrack->sampleTable.get()) {
1578 0 : return ERROR_MALFORMED;
1579 : }
1580 : status_t err =
1581 0 : mLastTrack->sampleTable->setSampleAuxiliaryInformationOffsetParams(
1582 0 : data_offset, chunk_data_size, mDrmScheme);
1583 :
1584 0 : if (err != OK) {
1585 0 : return err;
1586 : }
1587 :
1588 0 : *offset += chunk_size;
1589 0 : break;
1590 : }
1591 :
1592 : // @xyz
1593 : case FOURCC('\xA9', 'x', 'y', 'z'):
1594 : {
1595 : // Best case the total data length inside "@xyz" box
1596 : // would be 8, for instance "@xyz" + "\x00\x04\x15\xc7" + "0+0/",
1597 : // where "\x00\x04" is the text string length with value = 4,
1598 : // "\0x15\xc7" is the language code = en, and "0+0" is a
1599 : // location (string) value with longitude = 0 and latitude = 0.
1600 0 : if (chunk_data_size < 8) {
1601 0 : return ERROR_MALFORMED;
1602 : }
1603 :
1604 : // Worst case the location string length would be 18,
1605 : // for instance +90.0000-180.0000, without the trailing "/" and
1606 : // the string length + language code.
1607 : char buffer[18];
1608 :
1609 : // Substracting 5 from the data size is because the text string length +
1610 : // language code takes 4 bytes, and the trailing slash "/" takes 1 byte.
1611 0 : off64_t location_length = chunk_data_size - 5;
1612 0 : if (location_length >= (off64_t) sizeof(buffer)) {
1613 0 : return ERROR_MALFORMED;
1614 : }
1615 :
1616 0 : if (mDataSource->readAt(
1617 0 : data_offset + 4, buffer, location_length) < location_length) {
1618 0 : return ERROR_IO;
1619 : }
1620 :
1621 0 : buffer[location_length] = '\0';
1622 0 : mFileMetaData->setCString(kKeyLocation, buffer);
1623 0 : *offset += chunk_size;
1624 0 : break;
1625 : }
1626 :
1627 : case FOURCC('e', 's', 'd', 's'):
1628 : {
1629 0 : if (chunk_data_size < 4) {
1630 0 : return ERROR_MALFORMED;
1631 : }
1632 :
1633 : uint8_t buffer[256];
1634 0 : if (chunk_data_size > (off64_t)sizeof(buffer)) {
1635 0 : return ERROR_BUFFER_TOO_SMALL;
1636 : }
1637 :
1638 0 : if (mDataSource->readAt(
1639 0 : data_offset, buffer, chunk_data_size) < chunk_data_size) {
1640 0 : return ERROR_IO;
1641 : }
1642 :
1643 0 : if (U32_AT(buffer) != 0) {
1644 : // Should be version 0, flags 0.
1645 0 : return ERROR_MALFORMED;
1646 : }
1647 :
1648 0 : if (!mLastTrack) {
1649 0 : return ERROR_MALFORMED;
1650 : }
1651 0 : mLastTrack->meta->setData(
1652 0 : kKeyESDS, kTypeESDS, &buffer[4], chunk_data_size - 4);
1653 :
1654 0 : if (mPath.Length() >= 2
1655 0 : && (mPath[mPath.Length() - 2] == FOURCC('m', 'p', '4', 'a') ||
1656 0 : (mPath[mPath.Length() - 2] == FOURCC('e', 'n', 'c', 'a')) ||
1657 0 : (mPath[mPath.Length() - 2] == FOURCC('w', 'a', 'v', 'e')))) {
1658 : // Information from the ESDS must be relied on for proper
1659 : // setup of sample rate and channel count for MPEG4 Audio.
1660 : // The generic header appears to only contain generic
1661 : // information...
1662 :
1663 0 : status_t err = updateAudioTrackInfoFromESDS_MPEG4Audio(
1664 0 : &buffer[4], chunk_data_size - 4);
1665 :
1666 0 : if (err != OK) {
1667 0 : return err;
1668 : }
1669 : }
1670 :
1671 0 : *offset += chunk_size;
1672 0 : break;
1673 : }
1674 :
1675 : case FOURCC('a', 'v', 'c', 'C'):
1676 : {
1677 0 : if (chunk_data_size < 7) {
1678 0 : ALOGE("short avcC chunk (%" PRId64 " bytes)", int64_t(chunk_data_size));
1679 0 : return ERROR_MALFORMED;
1680 : }
1681 :
1682 0 : sp<ABuffer> buffer = new (fallible) ABuffer(chunk_data_size);
1683 0 : if (!buffer.get() || !buffer->data()) {
1684 0 : return -ENOMEM;
1685 : }
1686 :
1687 0 : if (mDataSource->readAt(
1688 0 : data_offset, buffer->data(), chunk_data_size) < chunk_data_size) {
1689 0 : return ERROR_IO;
1690 : }
1691 :
1692 0 : if (!mLastTrack) {
1693 0 : return ERROR_MALFORMED;
1694 : }
1695 0 : mLastTrack->meta->setData(
1696 0 : kKeyAVCC, kTypeAVCC, buffer->data(), chunk_data_size);
1697 :
1698 0 : *offset += chunk_size;
1699 0 : break;
1700 : }
1701 :
1702 : case FOURCC('d', '2', '6', '3'):
1703 : {
1704 : /*
1705 : * d263 contains a fixed 7 bytes part:
1706 : * vendor - 4 bytes
1707 : * version - 1 byte
1708 : * level - 1 byte
1709 : * profile - 1 byte
1710 : * optionally, "d263" box itself may contain a 16-byte
1711 : * bit rate box (bitr)
1712 : * average bit rate - 4 bytes
1713 : * max bit rate - 4 bytes
1714 : */
1715 : char buffer[23];
1716 0 : if (chunk_data_size != 7 &&
1717 : chunk_data_size != 23) {
1718 0 : ALOGE("Incorrect D263 box size %" PRId64, chunk_data_size);
1719 0 : return ERROR_MALFORMED;
1720 : }
1721 :
1722 0 : if (mDataSource->readAt(
1723 0 : data_offset, buffer, chunk_data_size) < chunk_data_size) {
1724 0 : return ERROR_IO;
1725 : }
1726 :
1727 0 : if (!mLastTrack) {
1728 0 : return ERROR_MALFORMED;
1729 : }
1730 0 : mLastTrack->meta->setData(kKeyD263, kTypeD263, buffer, chunk_data_size);
1731 :
1732 0 : *offset += chunk_size;
1733 0 : break;
1734 : }
1735 :
1736 : case FOURCC('m', 'e', 't', 'a'):
1737 : {
1738 : uint8_t buffer[4];
1739 0 : if (chunk_data_size < (off64_t)sizeof(buffer)) {
1740 0 : return ERROR_MALFORMED;
1741 : }
1742 :
1743 0 : if (mDataSource->readAt(
1744 0 : data_offset, buffer, 4) < 4) {
1745 0 : return ERROR_IO;
1746 : }
1747 :
1748 0 : if (U32_AT(buffer) != 0) {
1749 : // Should be version 0, flags 0.
1750 :
1751 : // If it's not, let's assume this is one of those
1752 : // apparently malformed chunks that don't have flags
1753 : // and completely different semantics than what's
1754 : // in the MPEG4 specs and skip it.
1755 0 : *offset += chunk_size;
1756 0 : return OK;
1757 : }
1758 :
1759 0 : off64_t stop_offset = *offset + chunk_size;
1760 0 : *offset = data_offset + sizeof(buffer);
1761 0 : while (*offset < stop_offset) {
1762 0 : status_t err = parseChunk(offset, depth + 1);
1763 0 : if (err != OK) {
1764 0 : return err;
1765 : }
1766 : }
1767 :
1768 0 : if (*offset != stop_offset) {
1769 0 : return ERROR_MALFORMED;
1770 : }
1771 0 : break;
1772 : }
1773 :
1774 : case FOURCC('m', 'e', 'a', 'n'):
1775 : case FOURCC('n', 'a', 'm', 'e'):
1776 : case FOURCC('d', 'a', 't', 'a'):
1777 : {
1778 0 : if (mPath.Length() == 6 && underMetaDataPath(mPath)) {
1779 0 : status_t err = parseMetaData(data_offset, chunk_data_size);
1780 :
1781 0 : if (err != OK) {
1782 0 : return err;
1783 : }
1784 : }
1785 :
1786 0 : *offset += chunk_size;
1787 0 : break;
1788 : }
1789 :
1790 : case FOURCC('m', 'v', 'h', 'd'):
1791 : {
1792 0 : if (chunk_data_size < 24) {
1793 0 : return ERROR_MALFORMED;
1794 : }
1795 :
1796 : uint8_t header[24];
1797 0 : if (mDataSource->readAt(
1798 0 : data_offset, header, sizeof(header))
1799 : < (ssize_t)sizeof(header)) {
1800 0 : return ERROR_IO;
1801 : }
1802 :
1803 0 : if (header[0] == 1) {
1804 0 : mHeaderTimescale = U32_AT(&header[20]);
1805 0 : } else if (header[0] != 0) {
1806 0 : return ERROR_MALFORMED;
1807 : } else {
1808 0 : mHeaderTimescale = U32_AT(&header[12]);
1809 : }
1810 :
1811 0 : *offset += chunk_size;
1812 0 : break;
1813 : }
1814 :
1815 : case FOURCC('m', 'e', 'h', 'd'):
1816 : {
1817 0 : if (chunk_data_size < 8) {
1818 0 : return ERROR_MALFORMED;
1819 : }
1820 :
1821 : uint8_t version;
1822 0 : if (mDataSource->readAt(
1823 0 : data_offset, &version, sizeof(version))
1824 : < (ssize_t)sizeof(version)) {
1825 0 : return ERROR_IO;
1826 : }
1827 0 : if (version > 1) {
1828 0 : break;
1829 : }
1830 0 : int64_t duration = 0;
1831 0 : if (version == 1) {
1832 0 : if (mDataSource->readAt(
1833 0 : data_offset + 4, &duration, sizeof(duration))
1834 : < (ssize_t)sizeof(duration)) {
1835 0 : return ERROR_IO;
1836 : }
1837 0 : duration = ntoh64(duration);
1838 : } else {
1839 : uint32_t duration32;
1840 0 : if (mDataSource->readAt(
1841 0 : data_offset + 4, &duration32, sizeof(duration32))
1842 : < (ssize_t)sizeof(duration32)) {
1843 0 : return ERROR_IO;
1844 : }
1845 0 : duration = ntohl(duration32);
1846 : }
1847 0 : if (duration < 0) {
1848 0 : return ERROR_MALFORMED;
1849 : }
1850 0 : int64_t duration_us = unitsToUs(duration, mHeaderTimescale);
1851 0 : if (duration_us == OVERFLOW_ERROR) {
1852 0 : return ERROR_MALFORMED;
1853 : }
1854 0 : if (duration && mHeaderTimescale) {
1855 0 : mFileMetaData->setInt64(kKeyMovieDuration, duration_us);
1856 : }
1857 :
1858 0 : *offset += chunk_size;
1859 0 : break;
1860 : }
1861 :
1862 : case FOURCC('m', 'd', 'a', 't'):
1863 : {
1864 : ALOGV("mdat chunk, drm: %d", mIsDrm);
1865 0 : if (!mIsDrm) {
1866 0 : *offset += chunk_size;
1867 0 : break;
1868 : }
1869 :
1870 0 : if (chunk_size < 8) {
1871 0 : return ERROR_MALFORMED;
1872 : }
1873 :
1874 0 : return parseDrmSINF(offset, data_offset);
1875 : }
1876 :
1877 : case FOURCC('h', 'd', 'l', 'r'):
1878 : {
1879 : uint32_t buffer;
1880 0 : if (mDataSource->readAt(
1881 0 : data_offset + 8, &buffer, 4) < 4) {
1882 0 : return ERROR_IO;
1883 : }
1884 :
1885 0 : uint32_t type = ntohl(buffer);
1886 : // For the 3GPP file format, the handler-type within the 'hdlr' box
1887 : // shall be 'text'. We also want to support 'sbtl' handler type
1888 : // for a practical reason as various MPEG4 containers use it.
1889 0 : if (type == FOURCC('t', 'e', 'x', 't') || type == FOURCC('s', 'b', 't', 'l')) {
1890 0 : if (!mLastTrack) {
1891 0 : return ERROR_MALFORMED;
1892 : }
1893 0 : mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_TEXT_3GPP);
1894 : }
1895 :
1896 0 : *offset += chunk_size;
1897 0 : break;
1898 : }
1899 :
1900 : case FOURCC('t', 'x', '3', 'g'):
1901 : {
1902 0 : if (!mLastTrack) {
1903 0 : return ERROR_MALFORMED;
1904 : }
1905 : uint32_t type;
1906 : const void *data;
1907 0 : size_t size = 0;
1908 0 : if (!mLastTrack->meta->findData(
1909 : kKeyTextFormatData, &type, &data, &size)) {
1910 0 : size = 0;
1911 : }
1912 :
1913 : // Make sure (size + chunk_size) isn't going to overflow.
1914 0 : if (size >= kMAX_ALLOCATION - chunk_size) {
1915 0 : return ERROR_MALFORMED;
1916 : }
1917 0 : uint8_t *buffer = new (fallible) uint8_t[size + chunk_size];
1918 0 : if (!buffer) {
1919 0 : return -ENOMEM;
1920 : }
1921 :
1922 0 : if (size > 0) {
1923 0 : memcpy(buffer, data, size);
1924 : }
1925 :
1926 0 : if ((size_t)(mDataSource->readAt(*offset, buffer + size, chunk_size))
1927 0 : < chunk_size) {
1928 0 : delete[] buffer;
1929 0 : buffer = NULL;
1930 :
1931 0 : return ERROR_IO;
1932 : }
1933 :
1934 0 : mLastTrack->meta->setData(
1935 0 : kKeyTextFormatData, 0, buffer, size + chunk_size);
1936 :
1937 0 : delete[] buffer;
1938 :
1939 0 : *offset += chunk_size;
1940 0 : break;
1941 : }
1942 :
1943 : case FOURCC('c', 'o', 'v', 'r'):
1944 : {
1945 0 : if (mFileMetaData != NULL) {
1946 : ALOGV("chunk_data_size = %lld and data_offset = %lld",
1947 : chunk_data_size, data_offset);
1948 0 : const int kSkipBytesOfDataBox = 16;
1949 0 : if (chunk_data_size <= kSkipBytesOfDataBox) {
1950 0 : return ERROR_MALFORMED;
1951 : }
1952 0 : sp<ABuffer> buffer = new (fallible) ABuffer(chunk_data_size + 1);
1953 0 : if (!buffer.get() || !buffer->data()) {
1954 0 : return -ENOMEM;
1955 : }
1956 0 : if (mDataSource->readAt(
1957 0 : data_offset, buffer->data(), chunk_data_size) != (ssize_t)chunk_data_size) {
1958 0 : return ERROR_IO;
1959 : }
1960 0 : mFileMetaData->setData(
1961 : kKeyAlbumArt, MetaData::TYPE_NONE,
1962 0 : buffer->data() + kSkipBytesOfDataBox, chunk_data_size - kSkipBytesOfDataBox);
1963 : }
1964 :
1965 0 : *offset += chunk_size;
1966 0 : break;
1967 : }
1968 :
1969 : case FOURCC('-', '-', '-', '-'):
1970 : {
1971 0 : mLastCommentMean.clear();
1972 0 : mLastCommentName.clear();
1973 0 : mLastCommentData.clear();
1974 0 : *offset += chunk_size;
1975 0 : break;
1976 : }
1977 :
1978 : case FOURCC('s', 'i', 'd', 'x'):
1979 : {
1980 0 : parseSegmentIndex(data_offset, chunk_data_size);
1981 0 : *offset += chunk_size;
1982 0 : return static_cast<status_t>(UNKNOWN_ERROR); // stop parsing after sidx
1983 : }
1984 :
1985 : case FOURCC('w', 'a', 'v', 'e'):
1986 : {
1987 0 : off64_t stop_offset = *offset + chunk_size;
1988 0 : *offset = data_offset;
1989 0 : while (*offset < stop_offset) {
1990 0 : status_t err = parseChunk(offset, depth + 1);
1991 0 : if (err != OK) {
1992 0 : return err;
1993 : }
1994 : }
1995 :
1996 0 : if (*offset != stop_offset) {
1997 0 : return ERROR_MALFORMED;
1998 : }
1999 0 : break;
2000 : }
2001 :
2002 : default:
2003 : {
2004 0 : *offset += chunk_size;
2005 0 : break;
2006 : }
2007 : }
2008 :
2009 0 : return OK;
2010 : }
2011 :
2012 0 : void MPEG4Extractor::storeEditList()
2013 : {
2014 0 : if (mHeaderTimescale == 0 ||
2015 0 : !mLastTrack ||
2016 0 : mLastTrack->timescale == 0) {
2017 0 : return;
2018 : }
2019 :
2020 0 : if (mLastTrack->segment_duration > uint64_t(INT64_MAX) ||
2021 0 : mLastTrack->empty_duration > uint64_t(INT64_MAX)) {
2022 0 : return;
2023 : }
2024 : uint64_t segment_duration =
2025 0 : uint64_t(unitsToUs(mLastTrack->segment_duration, mHeaderTimescale));
2026 : // media_time is measured in media time scale units.
2027 0 : int64_t media_time = unitsToUs(mLastTrack->media_time, mLastTrack->timescale);
2028 : // empty_duration is in the Movie Header Box's timescale.
2029 0 : int64_t empty_duration = unitsToUs(mLastTrack->empty_duration, mHeaderTimescale);
2030 0 : if (segment_duration == OVERFLOW_ERROR ||
2031 0 : media_time == OVERFLOW_ERROR ||
2032 : empty_duration == OVERFLOW_ERROR) {
2033 0 : return;
2034 : }
2035 0 : media_time -= empty_duration;
2036 0 : mLastTrack->meta->setInt64(kKeyMediaTime, media_time);
2037 :
2038 : int64_t duration;
2039 : int32_t samplerate;
2040 0 : if (mLastTrack->meta->findInt64(kKeyDuration, &duration) &&
2041 0 : mLastTrack->meta->findInt32(kKeySampleRate, &samplerate)) {
2042 :
2043 0 : int64_t delay = (media_time * samplerate + 500000) / 1000000;
2044 0 : mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2045 :
2046 0 : int64_t paddingus = duration - (segment_duration + media_time);
2047 0 : int64_t paddingsamples = (paddingus * samplerate + 500000) / 1000000;
2048 0 : mLastTrack->meta->setInt32(kKeyEncoderPadding, paddingsamples);
2049 : }
2050 : }
2051 :
2052 0 : status_t MPEG4Extractor::parseSegmentIndex(off64_t offset, size_t size) {
2053 : ALOGV("MPEG4Extractor::parseSegmentIndex");
2054 :
2055 0 : if (size < 12) {
2056 0 : return -EINVAL;
2057 : }
2058 :
2059 : uint32_t flags;
2060 0 : if (!mDataSource->getUInt32(offset, &flags)) {
2061 0 : return ERROR_MALFORMED;
2062 : }
2063 :
2064 0 : uint32_t version = flags >> 24;
2065 0 : flags &= 0xffffff;
2066 :
2067 : ALOGV("sidx version %d", version);
2068 :
2069 : uint32_t referenceId;
2070 0 : if (!mDataSource->getUInt32(offset + 4, &referenceId)) {
2071 0 : return ERROR_MALFORMED;
2072 : }
2073 :
2074 : uint32_t timeScale;
2075 0 : if (!mDataSource->getUInt32(offset + 8, &timeScale)) {
2076 0 : return ERROR_MALFORMED;
2077 : }
2078 0 : if (!timeScale) {
2079 0 : return ERROR_MALFORMED;
2080 : }
2081 : ALOGV("sidx refid/timescale: %d/%d", referenceId, timeScale);
2082 :
2083 : uint64_t earliestPresentationTime;
2084 : uint64_t firstOffset;
2085 :
2086 0 : offset += 12;
2087 0 : size -= 12;
2088 :
2089 0 : if (version == 0) {
2090 0 : if (size < 8) {
2091 0 : return -EINVAL;
2092 : }
2093 : uint32_t tmp;
2094 0 : if (!mDataSource->getUInt32(offset, &tmp)) {
2095 0 : return ERROR_MALFORMED;
2096 : }
2097 0 : earliestPresentationTime = tmp;
2098 0 : if (!mDataSource->getUInt32(offset + 4, &tmp)) {
2099 0 : return ERROR_MALFORMED;
2100 : }
2101 0 : firstOffset = tmp;
2102 0 : offset += 8;
2103 0 : size -= 8;
2104 : } else {
2105 0 : if (size < 16) {
2106 0 : return -EINVAL;
2107 : }
2108 0 : if (!mDataSource->getUInt64(offset, &earliestPresentationTime)) {
2109 0 : return ERROR_MALFORMED;
2110 : }
2111 0 : if (!mDataSource->getUInt64(offset + 8, &firstOffset)) {
2112 0 : return ERROR_MALFORMED;
2113 : }
2114 0 : offset += 16;
2115 0 : size -= 16;
2116 : }
2117 : ALOGV("sidx pres/off: %Ld/%Ld", earliestPresentationTime, firstOffset);
2118 :
2119 0 : if (size < 4) {
2120 0 : return -EINVAL;
2121 : }
2122 :
2123 : uint16_t referenceCount;
2124 0 : if (!mDataSource->getUInt16(offset + 2, &referenceCount)) {
2125 0 : return ERROR_MALFORMED;
2126 : }
2127 0 : offset += 4;
2128 0 : size -= 4;
2129 : ALOGV("refcount: %d", referenceCount);
2130 :
2131 0 : if (size < referenceCount * 12) {
2132 0 : return -EINVAL;
2133 : }
2134 :
2135 0 : int64_t total_duration = 0;
2136 0 : for (unsigned int i = 0; i < referenceCount; i++) {
2137 : uint32_t d1, d2, d3;
2138 :
2139 0 : if (!mDataSource->getUInt32(offset, &d1) || // size
2140 0 : !mDataSource->getUInt32(offset + 4, &d2) || // duration
2141 0 : !mDataSource->getUInt32(offset + 8, &d3)) { // flags
2142 0 : return ERROR_MALFORMED;
2143 : }
2144 :
2145 0 : if (d1 & 0x80000000) {
2146 0 : ALOGW("sub-sidx boxes not supported yet");
2147 : }
2148 0 : bool sap = d3 & 0x80000000;
2149 0 : uint32_t saptype = (d3 >> 28) & 0x3;
2150 0 : if (!sap || saptype > 2) {
2151 0 : ALOGW("not a stream access point, or unsupported type");
2152 : }
2153 0 : total_duration += d2;
2154 0 : offset += 12;
2155 : ALOGV(" item %d, %08x %08x %08x", i, d1, d2, d3);
2156 : SidxEntry se;
2157 0 : se.mSize = d1 & 0x7fffffff;
2158 0 : int64_t durationUs = unitsToUs(d2, timeScale);
2159 0 : if (durationUs == OVERFLOW_ERROR || durationUs > int64_t(UINT32_MAX)) {
2160 0 : return ERROR_MALFORMED;
2161 : }
2162 0 : se.mDurationUs = uint32_t(durationUs);
2163 0 : mSidxEntries.AppendElement(se);
2164 : }
2165 :
2166 0 : mSidxDuration = unitsToUs(total_duration, timeScale);
2167 0 : if (mSidxDuration == OVERFLOW_ERROR) {
2168 0 : return ERROR_MALFORMED;
2169 : }
2170 : ALOGV("duration: %lld", mSidxDuration);
2171 :
2172 0 : if (!mLastTrack) {
2173 0 : return ERROR_MALFORMED;
2174 : }
2175 : int64_t metaDuration;
2176 0 : if (!mLastTrack->meta->findInt64(kKeyDuration, &metaDuration) || metaDuration == 0) {
2177 0 : mLastTrack->meta->setInt64(kKeyDuration, mSidxDuration);
2178 : }
2179 0 : return OK;
2180 : }
2181 :
2182 0 : status_t MPEG4Extractor::parseTrackHeader(
2183 : off64_t data_offset, off64_t data_size) {
2184 0 : if (data_size < 4) {
2185 0 : return ERROR_MALFORMED;
2186 : }
2187 :
2188 : uint8_t version;
2189 0 : if (mDataSource->readAt(data_offset, &version, 1) < 1) {
2190 0 : return ERROR_IO;
2191 : }
2192 :
2193 0 : size_t dynSize = (version == 1) ? 36 : 24;
2194 :
2195 : uint8_t buffer[36 + 60];
2196 :
2197 0 : if (data_size != (off64_t)dynSize + 60) {
2198 0 : return ERROR_MALFORMED;
2199 : }
2200 :
2201 0 : if (mDataSource->readAt(
2202 0 : data_offset, buffer, data_size) < (ssize_t)data_size) {
2203 0 : return ERROR_IO;
2204 : }
2205 :
2206 : uint64_t ctime, mtime, duration;
2207 : int32_t id;
2208 :
2209 0 : if (version == 1) {
2210 0 : ctime = U64_AT(&buffer[4]);
2211 0 : mtime = U64_AT(&buffer[12]);
2212 0 : id = U32_AT(&buffer[20]);
2213 0 : duration = U64_AT(&buffer[28]);
2214 0 : } else if (version == 0) {
2215 0 : ctime = U32_AT(&buffer[4]);
2216 0 : mtime = U32_AT(&buffer[8]);
2217 0 : id = U32_AT(&buffer[12]);
2218 0 : duration = U32_AT(&buffer[20]);
2219 : } else {
2220 0 : return ERROR_UNSUPPORTED;
2221 : }
2222 :
2223 0 : if (!mLastTrack) {
2224 0 : return ERROR_MALFORMED;
2225 : }
2226 0 : mLastTrack->meta->setInt32(kKeyTrackID, id);
2227 :
2228 0 : size_t matrixOffset = dynSize + 16;
2229 0 : int32_t a00 = U32_AT(&buffer[matrixOffset]);
2230 0 : int32_t a01 = U32_AT(&buffer[matrixOffset + 4]);
2231 0 : int32_t dx = U32_AT(&buffer[matrixOffset + 8]);
2232 0 : int32_t a10 = U32_AT(&buffer[matrixOffset + 12]);
2233 0 : int32_t a11 = U32_AT(&buffer[matrixOffset + 16]);
2234 0 : int32_t dy = U32_AT(&buffer[matrixOffset + 20]);
2235 :
2236 : #if 0
2237 : ALOGI("x' = %.2f * x + %.2f * y + %.2f",
2238 : a00 / 65536.0f, a01 / 65536.0f, dx / 65536.0f);
2239 : ALOGI("y' = %.2f * x + %.2f * y + %.2f",
2240 : a10 / 65536.0f, a11 / 65536.0f, dy / 65536.0f);
2241 : #endif
2242 :
2243 : uint32_t rotationDegrees;
2244 :
2245 : static const int32_t kFixedOne = 0x10000;
2246 0 : if (a00 == kFixedOne && a01 == 0 && a10 == 0 && a11 == kFixedOne) {
2247 : // Identity, no rotation
2248 0 : rotationDegrees = 0;
2249 0 : } else if (a00 == 0 && a01 == kFixedOne && a10 == -kFixedOne && a11 == 0) {
2250 0 : rotationDegrees = 90;
2251 0 : } else if (a00 == 0 && a01 == -kFixedOne && a10 == kFixedOne && a11 == 0) {
2252 0 : rotationDegrees = 270;
2253 0 : } else if (a00 == -kFixedOne && a01 == 0 && a10 == 0 && a11 == -kFixedOne) {
2254 0 : rotationDegrees = 180;
2255 : } else {
2256 0 : ALOGW("We only support 0,90,180,270 degree rotation matrices");
2257 0 : rotationDegrees = 0;
2258 : }
2259 :
2260 0 : if (rotationDegrees != 0) {
2261 0 : mLastTrack->meta->setInt32(kKeyRotation, rotationDegrees);
2262 : }
2263 :
2264 : // Handle presentation display size, which could be different
2265 : // from the image size indicated by kKeyWidth and kKeyHeight.
2266 0 : uint32_t width = U32_AT(&buffer[dynSize + 52]);
2267 0 : uint32_t height = U32_AT(&buffer[dynSize + 56]);
2268 0 : mLastTrack->meta->setInt32(kKeyDisplayWidth, width >> 16);
2269 0 : mLastTrack->meta->setInt32(kKeyDisplayHeight, height >> 16);
2270 :
2271 0 : return OK;
2272 : }
2273 :
2274 0 : status_t MPEG4Extractor::parseMetaData(off64_t offset, size_t size) {
2275 0 : if (size < 4) {
2276 0 : return ERROR_MALFORMED;
2277 : }
2278 :
2279 0 : FallibleTArray<uint8_t> bufferBackend;
2280 0 : if (!bufferBackend.SetLength(size + 1, mozilla::fallible)) {
2281 : // OOM ignore metadata.
2282 0 : return OK;
2283 : }
2284 :
2285 0 : uint8_t *buffer = bufferBackend.Elements();
2286 0 : if (mDataSource->readAt(
2287 0 : offset, buffer, size) != (ssize_t)size) {
2288 0 : return ERROR_IO;
2289 : }
2290 :
2291 0 : uint32_t flags = U32_AT(buffer);
2292 :
2293 0 : uint32_t metadataKey = 0;
2294 : char chunk[5];
2295 0 : MakeFourCCString(mPath[4], chunk);
2296 : ALOGV("meta: %s @ %lld", chunk, offset);
2297 0 : switch (mPath[4]) {
2298 : case FOURCC(0xa9, 'a', 'l', 'b'):
2299 : {
2300 0 : metadataKey = kKeyAlbum;
2301 0 : break;
2302 : }
2303 : case FOURCC(0xa9, 'A', 'R', 'T'):
2304 : {
2305 0 : metadataKey = kKeyArtist;
2306 0 : break;
2307 : }
2308 : case FOURCC('a', 'A', 'R', 'T'):
2309 : {
2310 0 : metadataKey = kKeyAlbumArtist;
2311 0 : break;
2312 : }
2313 : case FOURCC(0xa9, 'd', 'a', 'y'):
2314 : {
2315 0 : metadataKey = kKeyYear;
2316 0 : break;
2317 : }
2318 : case FOURCC(0xa9, 'n', 'a', 'm'):
2319 : {
2320 0 : metadataKey = kKeyTitle;
2321 0 : break;
2322 : }
2323 : case FOURCC(0xa9, 'w', 'r', 't'):
2324 : {
2325 0 : metadataKey = kKeyWriter;
2326 0 : break;
2327 : }
2328 : case FOURCC('c', 'o', 'v', 'r'):
2329 : {
2330 0 : metadataKey = kKeyAlbumArt;
2331 0 : break;
2332 : }
2333 : case FOURCC('g', 'n', 'r', 'e'):
2334 : {
2335 0 : metadataKey = kKeyGenre;
2336 0 : break;
2337 : }
2338 : case FOURCC(0xa9, 'g', 'e', 'n'):
2339 : {
2340 0 : metadataKey = kKeyGenre;
2341 0 : break;
2342 : }
2343 : case FOURCC('c', 'p', 'i', 'l'):
2344 : {
2345 0 : if (size == 9 && flags == 21) {
2346 : char tmp[16];
2347 0 : sprintf(tmp, "%d",
2348 0 : (int)buffer[size - 1]);
2349 :
2350 0 : mFileMetaData->setCString(kKeyCompilation, tmp);
2351 : }
2352 0 : break;
2353 : }
2354 : case FOURCC('t', 'r', 'k', 'n'):
2355 : {
2356 0 : if (size == 16 && flags == 0) {
2357 : char tmp[16];
2358 0 : uint16_t* pTrack = (uint16_t*)&buffer[10];
2359 0 : uint16_t* pTotalTracks = (uint16_t*)&buffer[12];
2360 0 : sprintf(tmp, "%d/%d", ntohs(*pTrack), ntohs(*pTotalTracks));
2361 :
2362 0 : mFileMetaData->setCString(kKeyCDTrackNumber, tmp);
2363 : }
2364 0 : break;
2365 : }
2366 : case FOURCC('d', 'i', 's', 'k'):
2367 : {
2368 0 : if ((size == 14 || size == 16) && flags == 0) {
2369 : char tmp[16];
2370 0 : uint16_t* pDisc = (uint16_t*)&buffer[10];
2371 0 : uint16_t* pTotalDiscs = (uint16_t*)&buffer[12];
2372 0 : sprintf(tmp, "%d/%d", ntohs(*pDisc), ntohs(*pTotalDiscs));
2373 :
2374 0 : mFileMetaData->setCString(kKeyDiscNumber, tmp);
2375 : }
2376 0 : break;
2377 : }
2378 : case FOURCC('-', '-', '-', '-'):
2379 : {
2380 0 : buffer[size] = '\0';
2381 0 : switch (mPath[5]) {
2382 : case FOURCC('m', 'e', 'a', 'n'):
2383 0 : mLastCommentMean.setTo((const char *)buffer + 4);
2384 0 : break;
2385 : case FOURCC('n', 'a', 'm', 'e'):
2386 0 : mLastCommentName.setTo((const char *)buffer + 4);
2387 0 : break;
2388 : case FOURCC('d', 'a', 't', 'a'):
2389 0 : mLastCommentData.setTo((const char *)buffer + 8);
2390 0 : break;
2391 : }
2392 :
2393 : // Once we have a set of mean/name/data info, go ahead and process
2394 : // it to see if its something we are interested in. Whether or not
2395 : // were are interested in the specific tag, make sure to clear out
2396 : // the set so we can be ready to process another tuple should one
2397 : // show up later in the file.
2398 0 : if ((mLastCommentMean.length() != 0) &&
2399 0 : (mLastCommentName.length() != 0) &&
2400 0 : (mLastCommentData.length() != 0)) {
2401 :
2402 0 : if (mLastCommentMean == "com.apple.iTunes"
2403 0 : && mLastCommentName == "iTunSMPB") {
2404 : int32_t delay, padding;
2405 0 : if (sscanf(mLastCommentData,
2406 : " %*x %x %x %*x", &delay, &padding) == 2) {
2407 0 : if (!mLastTrack) {
2408 0 : return ERROR_MALFORMED;
2409 : }
2410 0 : mLastTrack->meta->setInt32(kKeyEncoderDelay, delay);
2411 0 : mLastTrack->meta->setInt32(kKeyEncoderPadding, padding);
2412 : }
2413 : }
2414 :
2415 0 : mLastCommentMean.clear();
2416 0 : mLastCommentName.clear();
2417 0 : mLastCommentData.clear();
2418 : }
2419 0 : break;
2420 : }
2421 :
2422 : default:
2423 0 : break;
2424 : }
2425 :
2426 0 : if (size >= 8 && metadataKey) {
2427 0 : if (metadataKey == kKeyAlbumArt) {
2428 0 : mFileMetaData->setData(
2429 : kKeyAlbumArt, MetaData::TYPE_NONE,
2430 0 : buffer + 8, size - 8);
2431 0 : } else if (metadataKey == kKeyGenre) {
2432 0 : if (flags == 0) {
2433 : // uint8_t genre code, iTunes genre codes are
2434 : // the standard id3 codes, except they start
2435 : // at 1 instead of 0 (e.g. Pop is 14, not 13)
2436 : // We use standard id3 numbering, so subtract 1.
2437 0 : int genrecode = (int)buffer[size - 1];
2438 0 : genrecode--;
2439 0 : if (genrecode < 0) {
2440 0 : genrecode = 255; // reserved for 'unknown genre'
2441 : }
2442 : char genre[10];
2443 0 : sprintf(genre, "%d", genrecode);
2444 :
2445 0 : mFileMetaData->setCString(metadataKey, genre);
2446 0 : } else if (flags == 1) {
2447 : // custom genre string
2448 0 : buffer[size] = '\0';
2449 :
2450 0 : mFileMetaData->setCString(
2451 0 : metadataKey, (const char *)buffer + 8);
2452 : }
2453 : } else {
2454 0 : buffer[size] = '\0';
2455 :
2456 0 : mFileMetaData->setCString(
2457 0 : metadataKey, (const char *)buffer + 8);
2458 : }
2459 : }
2460 :
2461 0 : return OK;
2462 : }
2463 :
2464 0 : sp<MediaSource> MPEG4Extractor::getTrack(size_t index) {
2465 : status_t err;
2466 0 : if ((err = readMetaData()) != OK) {
2467 0 : return NULL;
2468 : }
2469 :
2470 0 : Track *track = mFirstTrack;
2471 0 : while (index > 0) {
2472 0 : if (track == NULL) {
2473 0 : return NULL;
2474 : }
2475 :
2476 0 : track = track->next;
2477 0 : --index;
2478 : }
2479 :
2480 0 : if (track == NULL) {
2481 0 : return NULL;
2482 : }
2483 :
2484 : ALOGV("getTrack called, pssh: %d", mPssh.Length());
2485 :
2486 0 : return new MPEG4Source(track->meta, track->timescale, track->sampleTable);
2487 : }
2488 :
2489 : // static
2490 0 : status_t MPEG4Extractor::verifyTrack(Track *track) {
2491 : int32_t trackId;
2492 0 : if (!track->meta->findInt32(kKeyTrackID, &trackId)) {
2493 0 : return ERROR_MALFORMED;
2494 : }
2495 :
2496 : const char *mime;
2497 0 : if (!track->meta->findCString(kKeyMIMEType, &mime)) {
2498 0 : return ERROR_MALFORMED;
2499 : }
2500 :
2501 : uint32_t type;
2502 : const void *data;
2503 : size_t size;
2504 0 : if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_AVC)) {
2505 0 : if (!track->meta->findData(kKeyAVCC, &type, &data, &size)
2506 0 : || type != kTypeAVCC
2507 0 : || size < 7
2508 : // configurationVersion == 1?
2509 0 : || reinterpret_cast<const uint8_t*>(data)[0] != 1) {
2510 0 : return ERROR_MALFORMED;
2511 : }
2512 0 : } else if (!strcasecmp(mime, MEDIA_MIMETYPE_VIDEO_MPEG4)
2513 0 : || !strcasecmp(mime, MEDIA_MIMETYPE_AUDIO_AAC)) {
2514 0 : if (!track->meta->findData(kKeyESDS, &type, &data, &size)
2515 0 : || type != kTypeESDS) {
2516 0 : return ERROR_MALFORMED;
2517 : }
2518 : }
2519 :
2520 0 : if (!track->sampleTable.get() || !track->sampleTable->isValid()) {
2521 : // Make sure we have all the metadata we need.
2522 0 : return ERROR_MALFORMED;
2523 : }
2524 :
2525 : uint32_t keytype;
2526 : const void *key;
2527 : size_t keysize;
2528 0 : if (track->meta->findData(kKeyCryptoKey, &keytype, &key, &keysize)) {
2529 0 : if (keysize > 16) {
2530 0 : return ERROR_MALFORMED;
2531 : }
2532 : }
2533 :
2534 0 : return OK;
2535 : }
2536 :
2537 : typedef enum {
2538 : //AOT_NONE = -1,
2539 : //AOT_NULL_OBJECT = 0,
2540 : //AOT_AAC_MAIN = 1, /**< Main profile */
2541 : AOT_AAC_LC = 2, /**< Low Complexity object */
2542 : //AOT_AAC_SSR = 3,
2543 : //AOT_AAC_LTP = 4,
2544 : AOT_SBR = 5,
2545 : //AOT_AAC_SCAL = 6,
2546 : //AOT_TWIN_VQ = 7,
2547 : //AOT_CELP = 8,
2548 : //AOT_HVXC = 9,
2549 : //AOT_RSVD_10 = 10, /**< (reserved) */
2550 : //AOT_RSVD_11 = 11, /**< (reserved) */
2551 : //AOT_TTSI = 12, /**< TTSI Object */
2552 : //AOT_MAIN_SYNTH = 13, /**< Main Synthetic object */
2553 : //AOT_WAV_TAB_SYNTH = 14, /**< Wavetable Synthesis object */
2554 : //AOT_GEN_MIDI = 15, /**< General MIDI object */
2555 : //AOT_ALG_SYNTH_AUD_FX = 16, /**< Algorithmic Synthesis and Audio FX object */
2556 : AOT_ER_AAC_LC = 17, /**< Error Resilient(ER) AAC Low Complexity */
2557 : //AOT_RSVD_18 = 18, /**< (reserved) */
2558 : //AOT_ER_AAC_LTP = 19, /**< Error Resilient(ER) AAC LTP object */
2559 : AOT_ER_AAC_SCAL = 20, /**< Error Resilient(ER) AAC Scalable object */
2560 : //AOT_ER_TWIN_VQ = 21, /**< Error Resilient(ER) TwinVQ object */
2561 : AOT_ER_BSAC = 22, /**< Error Resilient(ER) BSAC object */
2562 : AOT_ER_AAC_LD = 23, /**< Error Resilient(ER) AAC LowDelay object */
2563 : //AOT_ER_CELP = 24, /**< Error Resilient(ER) CELP object */
2564 : //AOT_ER_HVXC = 25, /**< Error Resilient(ER) HVXC object */
2565 : //AOT_ER_HILN = 26, /**< Error Resilient(ER) HILN object */
2566 : //AOT_ER_PARA = 27, /**< Error Resilient(ER) Parametric object */
2567 : //AOT_RSVD_28 = 28, /**< might become SSC */
2568 : AOT_PS = 29, /**< PS, Parametric Stereo (includes SBR) */
2569 : //AOT_MPEGS = 30, /**< MPEG Surround */
2570 :
2571 : AOT_ESCAPE = 31, /**< Signal AOT uses more than 5 bits */
2572 :
2573 : //AOT_MP3ONMP4_L1 = 32, /**< MPEG-Layer1 in mp4 */
2574 : //AOT_MP3ONMP4_L2 = 33, /**< MPEG-Layer2 in mp4 */
2575 : //AOT_MP3ONMP4_L3 = 34, /**< MPEG-Layer3 in mp4 */
2576 : //AOT_RSVD_35 = 35, /**< might become DST */
2577 : //AOT_RSVD_36 = 36, /**< might become ALS */
2578 : //AOT_AAC_SLS = 37, /**< AAC + SLS */
2579 : //AOT_SLS = 38, /**< SLS */
2580 : //AOT_ER_AAC_ELD = 39, /**< AAC Enhanced Low Delay */
2581 :
2582 : //AOT_USAC = 42, /**< USAC */
2583 : //AOT_SAOC = 43, /**< SAOC */
2584 : //AOT_LD_MPEGS = 44, /**< Low Delay MPEG Surround */
2585 :
2586 : //AOT_RSVD50 = 50, /**< Interim AOT for Rsvd50 */
2587 : } AUDIO_OBJECT_TYPE;
2588 :
2589 0 : status_t MPEG4Extractor::updateAudioTrackInfoFromESDS_MPEG4Audio(
2590 : const void *esds_data, size_t esds_size) {
2591 0 : ESDS esds(esds_data, esds_size);
2592 :
2593 : uint8_t objectTypeIndication;
2594 0 : if (esds.getObjectTypeIndication(&objectTypeIndication) != OK) {
2595 0 : return ERROR_MALFORMED;
2596 : }
2597 :
2598 0 : if (objectTypeIndication == 0xe1) {
2599 : // This isn't MPEG4 audio at all, it's QCELP 14k...
2600 0 : if (mLastTrack == NULL)
2601 0 : return ERROR_MALFORMED;
2602 :
2603 0 : mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_QCELP);
2604 0 : return OK;
2605 : }
2606 :
2607 0 : if (objectTypeIndication == 0x6b || objectTypeIndication == 0x69) {
2608 : // The media subtype is MP3 audio
2609 0 : if (!mLastTrack) {
2610 0 : return ERROR_MALFORMED;
2611 : }
2612 0 : mLastTrack->meta->setCString(kKeyMIMEType, MEDIA_MIMETYPE_AUDIO_MPEG);
2613 : }
2614 :
2615 : const uint8_t *csd;
2616 : size_t csd_size;
2617 0 : if (esds.getCodecSpecificInfo(
2618 : (const void **)&csd, &csd_size) != OK) {
2619 0 : return ERROR_MALFORMED;
2620 : }
2621 :
2622 : #if 0
2623 : if (kUseHexDump) {
2624 : printf("ESD of size %zu\n", csd_size);
2625 : hexdump(csd, csd_size);
2626 : }
2627 : #endif
2628 :
2629 0 : if (csd_size == 0) {
2630 : // There's no further information, i.e. no codec specific data
2631 : // Let's assume that the information provided in the mpeg4 headers
2632 : // is accurate and hope for the best.
2633 :
2634 0 : return OK;
2635 : }
2636 :
2637 0 : if (csd_size < 2) {
2638 0 : return ERROR_MALFORMED;
2639 : }
2640 :
2641 : static uint32_t kSamplingRate[] = {
2642 : 96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050,
2643 : 16000, 12000, 11025, 8000, 7350
2644 : };
2645 :
2646 0 : ABitReader br(csd, csd_size);
2647 0 : if (br.numBitsLeft() < 5) {
2648 0 : return ERROR_MALFORMED;
2649 : }
2650 0 : uint32_t objectType = br.getBits(5);
2651 :
2652 0 : if (objectType == 31) { // AAC-ELD => additional 6 bits
2653 0 : if (br.numBitsLeft() < 6) {
2654 0 : return ERROR_MALFORMED;
2655 : }
2656 0 : objectType = 32 + br.getBits(6);
2657 : }
2658 :
2659 0 : if (mLastTrack == NULL)
2660 0 : return ERROR_MALFORMED;
2661 :
2662 0 : if (objectType >= 1 && objectType <= 4) {
2663 0 : mLastTrack->meta->setInt32(kKeyAACProfile, objectType);
2664 : }
2665 :
2666 : //keep AOT type
2667 0 : mLastTrack->meta->setInt32(kKeyAACAOT, objectType);
2668 :
2669 0 : if (br.numBitsLeft() < 4) {
2670 0 : return ERROR_MALFORMED;
2671 : }
2672 0 : uint32_t freqIndex = br.getBits(4);
2673 :
2674 0 : int32_t sampleRate = 0;
2675 0 : int32_t numChannels = 0;
2676 0 : if (freqIndex == 15) {
2677 0 : if (br.numBitsLeft() < 28) return ERROR_MALFORMED;
2678 0 : sampleRate = br.getBits(24);
2679 0 : numChannels = br.getBits(4);
2680 : } else {
2681 0 : if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
2682 0 : numChannels = br.getBits(4);
2683 :
2684 0 : if (freqIndex == 13 || freqIndex == 14) {
2685 0 : return ERROR_MALFORMED;
2686 : }
2687 :
2688 0 : sampleRate = kSamplingRate[freqIndex];
2689 : }
2690 :
2691 0 : if (objectType == AOT_SBR || objectType == AOT_PS) {//SBR specific config per 14496-3 table 1.13
2692 0 : if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
2693 0 : uint32_t extFreqIndex = br.getBits(4);
2694 : int32_t extSampleRate;
2695 0 : if (extFreqIndex == 15) {
2696 0 : if (csd_size < 8) {
2697 0 : return ERROR_MALFORMED;
2698 : }
2699 0 : if (br.numBitsLeft() < 24) return ERROR_MALFORMED;
2700 0 : extSampleRate = br.getBits(24);
2701 : } else {
2702 0 : if (extFreqIndex == 13 || extFreqIndex == 14) {
2703 0 : return ERROR_MALFORMED;
2704 : }
2705 0 : extSampleRate = kSamplingRate[extFreqIndex];
2706 : }
2707 : //TODO: save the extension sampling rate value in meta data =>
2708 : // mLastTrack->meta->setInt32(kKeyExtSampleRate, extSampleRate);
2709 : }
2710 :
2711 0 : switch (numChannels) {
2712 : // values defined in 14496-3_2009 amendment-4 Table 1.19 - Channel Configuration
2713 : case 0:
2714 : case 1:// FC
2715 : case 2:// FL FR
2716 : case 3:// FC, FL FR
2717 : case 4:// FC, FL FR, RC
2718 : case 5:// FC, FL FR, SL SR
2719 : case 6:// FC, FL FR, SL SR, LFE
2720 : //numChannels already contains the right value
2721 0 : break;
2722 : case 11:// FC, FL FR, SL SR, RC, LFE
2723 0 : numChannels = 7;
2724 0 : break;
2725 : case 7: // FC, FCL FCR, FL FR, SL SR, LFE
2726 : case 12:// FC, FL FR, SL SR, RL RR, LFE
2727 : case 14:// FC, FL FR, SL SR, LFE, FHL FHR
2728 0 : numChannels = 8;
2729 0 : break;
2730 : default:
2731 0 : return ERROR_UNSUPPORTED;
2732 : }
2733 :
2734 : {
2735 0 : if (objectType == AOT_SBR || objectType == AOT_PS) {
2736 0 : if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
2737 0 : objectType = br.getBits(5);
2738 :
2739 0 : if (objectType == AOT_ESCAPE) {
2740 0 : if (br.numBitsLeft() < 6) return ERROR_MALFORMED;
2741 0 : objectType = 32 + br.getBits(6);
2742 : }
2743 : }
2744 0 : if (objectType == AOT_AAC_LC || objectType == AOT_ER_AAC_LC ||
2745 0 : objectType == AOT_ER_AAC_LD || objectType == AOT_ER_AAC_SCAL ||
2746 : objectType == AOT_ER_BSAC) {
2747 0 : if (br.numBitsLeft() < 2) return ERROR_MALFORMED;
2748 0 : const int32_t frameLengthFlag = br.getBits(1);
2749 :
2750 0 : const int32_t dependsOnCoreCoder = br.getBits(1);
2751 :
2752 0 : if (dependsOnCoreCoder ) {
2753 0 : if (br.numBitsLeft() < 14) return ERROR_MALFORMED;
2754 0 : const int32_t coreCoderDelay = br.getBits(14);
2755 : }
2756 :
2757 0 : int32_t extensionFlag = -1;
2758 0 : if (br.numBitsLeft() > 0) {
2759 0 : extensionFlag = br.getBits(1);
2760 : } else {
2761 0 : switch (objectType) {
2762 : // 14496-3 4.5.1.1 extensionFlag
2763 : case AOT_AAC_LC:
2764 0 : extensionFlag = 0;
2765 0 : break;
2766 : case AOT_ER_AAC_LC:
2767 : case AOT_ER_AAC_SCAL:
2768 : case AOT_ER_BSAC:
2769 : case AOT_ER_AAC_LD:
2770 0 : extensionFlag = 1;
2771 0 : break;
2772 : default:
2773 0 : return ERROR_MALFORMED;
2774 : break;
2775 : }
2776 : ALOGW("csd missing extension flag; assuming %d for object type %u.",
2777 0 : extensionFlag, objectType);
2778 : }
2779 :
2780 0 : if (numChannels == 0) {
2781 0 : int32_t channelsEffectiveNum = 0;
2782 0 : int32_t channelsNum = 0;
2783 0 : if (br.numBitsLeft() < 32) {
2784 0 : return ERROR_MALFORMED;
2785 : }
2786 0 : const int32_t ElementInstanceTag = br.getBits(4);
2787 0 : const int32_t Profile = br.getBits(2);
2788 0 : const int32_t SamplingFrequencyIndex = br.getBits(4);
2789 0 : const int32_t NumFrontChannelElements = br.getBits(4);
2790 0 : const int32_t NumSideChannelElements = br.getBits(4);
2791 0 : const int32_t NumBackChannelElements = br.getBits(4);
2792 0 : const int32_t NumLfeChannelElements = br.getBits(2);
2793 0 : const int32_t NumAssocDataElements = br.getBits(3);
2794 0 : const int32_t NumValidCcElements = br.getBits(4);
2795 :
2796 0 : const int32_t MonoMixdownPresent = br.getBits(1);
2797 :
2798 0 : if (MonoMixdownPresent != 0) {
2799 0 : if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
2800 0 : const int32_t MonoMixdownElementNumber = br.getBits(4);
2801 : }
2802 :
2803 0 : if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
2804 0 : const int32_t StereoMixdownPresent = br.getBits(1);
2805 0 : if (StereoMixdownPresent != 0) {
2806 0 : if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
2807 0 : const int32_t StereoMixdownElementNumber = br.getBits(4);
2808 : }
2809 :
2810 0 : if (br.numBitsLeft() < 1) return ERROR_MALFORMED;
2811 0 : const int32_t MatrixMixdownIndexPresent = br.getBits(1);
2812 0 : if (MatrixMixdownIndexPresent != 0) {
2813 0 : if (br.numBitsLeft() < 3) return ERROR_MALFORMED;
2814 0 : const int32_t MatrixMixdownIndex = br.getBits(2);
2815 0 : const int32_t PseudoSurroundEnable = br.getBits(1);
2816 : }
2817 :
2818 : int i;
2819 0 : for (i=0; i < NumFrontChannelElements; i++) {
2820 0 : if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
2821 0 : const int32_t FrontElementIsCpe = br.getBits(1);
2822 0 : const int32_t FrontElementTagSelect = br.getBits(4);
2823 0 : channelsNum += FrontElementIsCpe ? 2 : 1;
2824 : }
2825 :
2826 0 : for (i=0; i < NumSideChannelElements; i++) {
2827 0 : if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
2828 0 : const int32_t SideElementIsCpe = br.getBits(1);
2829 0 : const int32_t SideElementTagSelect = br.getBits(4);
2830 0 : channelsNum += SideElementIsCpe ? 2 : 1;
2831 : }
2832 :
2833 0 : for (i=0; i < NumBackChannelElements; i++) {
2834 0 : if (br.numBitsLeft() < 5) return ERROR_MALFORMED;
2835 0 : const int32_t BackElementIsCpe = br.getBits(1);
2836 0 : const int32_t BackElementTagSelect = br.getBits(4);
2837 0 : channelsNum += BackElementIsCpe ? 2 : 1;
2838 : }
2839 0 : channelsEffectiveNum = channelsNum;
2840 :
2841 0 : for (i=0; i < NumLfeChannelElements; i++) {
2842 0 : if (br.numBitsLeft() < 4) return ERROR_MALFORMED;
2843 0 : const int32_t LfeElementTagSelect = br.getBits(4);
2844 0 : channelsNum += 1;
2845 : }
2846 : ALOGV("mpeg4 audio channelsNum = %d", channelsNum);
2847 : ALOGV("mpeg4 audio channelsEffectiveNum = %d", channelsEffectiveNum);
2848 0 : numChannels = channelsNum;
2849 : }
2850 : }
2851 : }
2852 :
2853 0 : if (numChannels == 0) {
2854 0 : return ERROR_UNSUPPORTED;
2855 : }
2856 :
2857 0 : if (mLastTrack == NULL)
2858 0 : return ERROR_MALFORMED;
2859 :
2860 : int32_t prevSampleRate;
2861 0 : CHECK(mLastTrack->meta->findInt32(kKeySampleRate, &prevSampleRate));
2862 :
2863 0 : if (prevSampleRate != sampleRate) {
2864 : ALOGV("mpeg4 audio sample rate different from previous setting. "
2865 : "was: %d, now: %d", prevSampleRate, sampleRate);
2866 : }
2867 :
2868 0 : mLastTrack->meta->setInt32(kKeySampleRate, sampleRate);
2869 :
2870 : int32_t prevChannelCount;
2871 0 : CHECK(mLastTrack->meta->findInt32(kKeyChannelCount, &prevChannelCount));
2872 :
2873 0 : if (prevChannelCount != numChannels) {
2874 : ALOGV("mpeg4 audio channel count different from previous setting. "
2875 : "was: %d, now: %d", prevChannelCount, numChannels);
2876 : }
2877 :
2878 0 : mLastTrack->meta->setInt32(kKeyChannelCount, numChannels);
2879 :
2880 0 : return OK;
2881 : }
2882 :
2883 : ////////////////////////////////////////////////////////////////////////////////
2884 :
2885 0 : MPEG4Source::MPEG4Source(
2886 : const sp<MetaData> &format,
2887 : uint32_t timeScale,
2888 0 : const sp<SampleTable> &sampleTable)
2889 : : mFormat(format),
2890 : mTimescale(timeScale),
2891 0 : mSampleTable(sampleTable) {
2892 0 : }
2893 :
2894 0 : MPEG4Source::~MPEG4Source() {
2895 0 : }
2896 :
2897 0 : sp<MetaData> MPEG4Source::getFormat() {
2898 0 : return mFormat;
2899 : }
2900 :
2901 : class CompositionSorter
2902 : {
2903 : public:
2904 0 : bool LessThan(MediaSource::Indice* aFirst, MediaSource::Indice* aSecond) const
2905 : {
2906 0 : return aFirst->start_composition < aSecond->start_composition;
2907 : }
2908 :
2909 0 : bool Equals(MediaSource::Indice* aFirst, MediaSource::Indice* aSecond) const
2910 : {
2911 0 : return aFirst->start_composition == aSecond->start_composition;
2912 : }
2913 : };
2914 :
2915 0 : nsTArray<MediaSource::Indice> MPEG4Source::exportIndex()
2916 : {
2917 0 : nsTArray<MediaSource::Indice> index;
2918 0 : if (!mTimescale || !mSampleTable.get()) {
2919 0 : return index;
2920 : }
2921 :
2922 0 : if (!index.SetCapacity(mSampleTable->countSamples(), mozilla::fallible)) {
2923 0 : return index;
2924 : }
2925 0 : for (uint32_t sampleIndex = 0; sampleIndex < mSampleTable->countSamples();
2926 : sampleIndex++) {
2927 : off64_t offset;
2928 : size_t size;
2929 : uint32_t compositionTime;
2930 : uint32_t duration;
2931 : bool isSyncSample;
2932 : uint32_t decodeTime;
2933 0 : if (mSampleTable->getMetaDataForSample(sampleIndex, &offset, &size,
2934 : &compositionTime, &duration,
2935 : &isSyncSample, &decodeTime) != OK) {
2936 0 : ALOGE("Unexpected sample table problem");
2937 0 : continue;
2938 : }
2939 :
2940 : Indice indice;
2941 0 : indice.start_offset = offset;
2942 0 : indice.end_offset = offset + size;
2943 0 : indice.start_composition = (compositionTime * 1000000ll) / mTimescale;
2944 : // end_composition is overwritten everywhere except the last frame, where
2945 : // the presentation duration is equal to the sample duration.
2946 0 : indice.end_composition =
2947 0 : (compositionTime * 1000000ll + duration * 1000000ll) / mTimescale;
2948 0 : indice.sync = isSyncSample;
2949 0 : indice.start_decode = (decodeTime * 1000000ll) / mTimescale;
2950 0 : index.AppendElement(indice);
2951 : }
2952 :
2953 : // Fix up composition durations so we don't end up with any unsightly gaps.
2954 0 : if (index.Length() != 0) {
2955 0 : nsTArray<Indice*> composition_order;
2956 0 : if (!composition_order.SetCapacity(index.Length(), mozilla::fallible)) {
2957 0 : return index;
2958 : }
2959 0 : for (uint32_t i = 0; i < index.Length(); i++) {
2960 0 : composition_order.AppendElement(&index[i]);
2961 : }
2962 :
2963 0 : composition_order.Sort(CompositionSorter());
2964 0 : for (uint32_t i = 0; i + 1 < composition_order.Length(); i++) {
2965 0 : composition_order[i]->end_composition =
2966 0 : composition_order[i + 1]->start_composition;
2967 : }
2968 : }
2969 :
2970 0 : return index;
2971 : }
2972 :
2973 : } // namespace stagefright
2974 :
2975 : #undef LOG_TAG
|