/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.android.exoplayer.parser.mp4; import com.google.android.exoplayer.MediaFormat; import com.google.android.exoplayer.ParserException; import com.google.android.exoplayer.SampleHolder; import com.google.android.exoplayer.parser.Extractor; import com.google.android.exoplayer.parser.SegmentIndex; import com.google.android.exoplayer.parser.mp4.Atom.ContainerAtom; import com.google.android.exoplayer.parser.mp4.Atom.LeafAtom; import com.google.android.exoplayer.upstream.NonBlockingInputStream; import com.google.android.exoplayer.util.Assertions; import com.google.android.exoplayer.util.CodecSpecificDataUtil; import com.google.android.exoplayer.util.MimeTypes; import android.annotation.SuppressLint; import android.media.MediaCodec; import android.media.MediaExtractor; import android.util.Pair; import java.nio.ByteBuffer; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.Stack; import java.util.UUID; /** * Facilitates the extraction of data from the fragmented mp4 container format. * <p> * This implementation only supports de-muxed (i.e. single track) streams. */ public final class FragmentedMp4Extractor implements Extractor { /** * Flag to work around an issue in some video streams where every frame is marked as a sync frame. * The workaround overrides the sync frame flags in the stream, forcing them to false except for * the first sample in each segment. * <p> * This flag does nothing if the stream is not a video stream. */ public static final int WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME = 1; private static final int READ_TERMINATING_RESULTS = RESULT_NEED_MORE_DATA | RESULT_END_OF_STREAM | RESULT_READ_SAMPLE | RESULT_NEED_SAMPLE_HOLDER; private static final byte[] NAL_START_CODE = new byte[] {0, 0, 0, 1}; private static final byte[] PIFF_SAMPLE_ENCRYPTION_BOX_EXTENDED_TYPE = new byte[] {-94, 57, 79, 82, 90, -101, 79, 20, -94, 68, 108, 66, 124, 100, -115, -12}; // Parser states private static final int STATE_READING_ATOM_HEADER = 0; private static final int STATE_READING_ATOM_PAYLOAD = 1; private static final int STATE_READING_ENCRYPTION_DATA = 2; private static final int STATE_READING_SAMPLE = 3; // Atom data offsets private static final int ATOM_HEADER_SIZE = 8; private static final int FULL_ATOM_HEADER_SIZE = 12; // Atoms that the parser cares about private static final Set<Integer> PARSED_ATOMS; static { HashSet<Integer> parsedAtoms = new HashSet<Integer>(); parsedAtoms.add(Atom.TYPE_avc1); parsedAtoms.add(Atom.TYPE_avc3); parsedAtoms.add(Atom.TYPE_esds); parsedAtoms.add(Atom.TYPE_hdlr); parsedAtoms.add(Atom.TYPE_mdat); parsedAtoms.add(Atom.TYPE_mdhd); parsedAtoms.add(Atom.TYPE_moof); parsedAtoms.add(Atom.TYPE_moov); parsedAtoms.add(Atom.TYPE_mp4a); parsedAtoms.add(Atom.TYPE_sidx); parsedAtoms.add(Atom.TYPE_stsd); parsedAtoms.add(Atom.TYPE_tfdt); parsedAtoms.add(Atom.TYPE_tfhd); parsedAtoms.add(Atom.TYPE_tkhd); parsedAtoms.add(Atom.TYPE_traf); parsedAtoms.add(Atom.TYPE_trak); parsedAtoms.add(Atom.TYPE_trex); parsedAtoms.add(Atom.TYPE_trun); parsedAtoms.add(Atom.TYPE_mvex); parsedAtoms.add(Atom.TYPE_mdia); parsedAtoms.add(Atom.TYPE_minf); parsedAtoms.add(Atom.TYPE_stbl); parsedAtoms.add(Atom.TYPE_pssh); parsedAtoms.add(Atom.TYPE_saiz); parsedAtoms.add(Atom.TYPE_uuid); parsedAtoms.add(Atom.TYPE_senc); PARSED_ATOMS = Collections.unmodifiableSet(parsedAtoms); } // Atoms that the parser considers to be containers private static final Set<Integer> CONTAINER_TYPES; static { HashSet<Integer> atomContainerTypes = new HashSet<Integer>(); atomContainerTypes.add(Atom.TYPE_moov); atomContainerTypes.add(Atom.TYPE_trak); atomContainerTypes.add(Atom.TYPE_mdia); atomContainerTypes.add(Atom.TYPE_minf); atomContainerTypes.add(Atom.TYPE_stbl); atomContainerTypes.add(Atom.TYPE_avcC); atomContainerTypes.add(Atom.TYPE_moof); atomContainerTypes.add(Atom.TYPE_traf); atomContainerTypes.add(Atom.TYPE_mvex); CONTAINER_TYPES = Collections.unmodifiableSet(atomContainerTypes); } private final int workaroundFlags; // Parser state private final ParsableByteArray atomHeader; private final byte[] extendedTypeScratch; private final Stack<ContainerAtom> containerAtoms; private final Stack<Integer> containerAtomEndPoints; private final TrackFragment fragmentRun; private int parserState; private int atomBytesRead; private int rootAtomBytesRead; private int atomType; private int atomSize; private ParsableByteArray atomData; private int pendingSeekTimeMs; private int sampleIndex; private int pendingSeekSyncSampleIndex; private int lastSyncSampleIndex; // Data parsed from moov and sidx atoms private final HashMap<UUID, byte[]> psshData; private SegmentIndex segmentIndex; private Track track; private DefaultSampleValues extendsDefaults; public FragmentedMp4Extractor() { this(0); } /** * @param workaroundFlags Flags to allow parsing of faulty streams. * {@link #WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME} is currently the only flag defined. */ public FragmentedMp4Extractor(int workaroundFlags) { this.workaroundFlags = workaroundFlags; parserState = STATE_READING_ATOM_HEADER; atomHeader = new ParsableByteArray(ATOM_HEADER_SIZE); extendedTypeScratch = new byte[16]; containerAtoms = new Stack<ContainerAtom>(); containerAtomEndPoints = new Stack<Integer>(); fragmentRun = new TrackFragment(); psshData = new HashMap<UUID, byte[]>(); } /** * Sideloads track information into the extractor. * * @param track The track to sideload. */ public void setTrack(Track track) { this.extendsDefaults = new DefaultSampleValues(0, 0, 0, 0); this.track = track; } /** * Sideloads pssh information into the extractor, so that it can be read through * {@link #getPsshInfo()}. * * @param uuid The UUID of the scheme for which information is being sideloaded. * @param data The corresponding data. */ public void putPsshInfo(UUID uuid, byte[] data) { // TODO: This is for SmoothStreaming. Consider using something other than // FragmentedMp4Extractor.getPsshInfo to obtain the pssh data for that use case, so that we can // remove this method. psshData.put(uuid, data); } @Override public Map<UUID, byte[]> getPsshInfo() { return psshData.isEmpty() ? null : psshData; } @Override public SegmentIndex getIndex() { return segmentIndex; } @Override public boolean hasRelativeIndexOffsets() { return true; } @Override public MediaFormat getFormat() { return track == null ? null : track.mediaFormat; } @Override public int read(NonBlockingInputStream inputStream, SampleHolder out) throws ParserException { try { int results = 0; while ((results & READ_TERMINATING_RESULTS) == 0) { switch (parserState) { case STATE_READING_ATOM_HEADER: results |= readAtomHeader(inputStream); break; case STATE_READING_ATOM_PAYLOAD: results |= readAtomPayload(inputStream); break; case STATE_READING_ENCRYPTION_DATA: results |= readEncryptionData(inputStream); break; default: results |= readOrSkipSample(inputStream, out); break; } } return results; } catch (Exception e) { throw new ParserException(e); } } @Override public boolean seekTo(long seekTimeUs, boolean allowNoop) { pendingSeekTimeMs = (int) (seekTimeUs / 1000); if (allowNoop && fragmentRun != null && pendingSeekTimeMs >= fragmentRun.getSamplePresentationTime(0) && pendingSeekTimeMs <= fragmentRun.getSamplePresentationTime(fragmentRun.length - 1)) { int sampleIndexFound = 0; int syncSampleIndexFound = 0; for (int i = 0; i < fragmentRun.length; i++) { if (fragmentRun.getSamplePresentationTime(i) <= pendingSeekTimeMs) { if (fragmentRun.sampleIsSyncFrameTable[i]) { syncSampleIndexFound = i; } sampleIndexFound = i; } } if (syncSampleIndexFound == lastSyncSampleIndex && sampleIndexFound >= sampleIndex) { pendingSeekTimeMs = 0; return false; } } containerAtoms.clear(); containerAtomEndPoints.clear(); enterState(STATE_READING_ATOM_HEADER); return true; } private void enterState(int state) { switch (state) { case STATE_READING_ATOM_HEADER: atomBytesRead = 0; if (containerAtomEndPoints.isEmpty()) { rootAtomBytesRead = 0; } break; } parserState = state; } private int readAtomHeader(NonBlockingInputStream inputStream) { int remainingBytes = ATOM_HEADER_SIZE - atomBytesRead; int bytesRead = inputStream.read(atomHeader.data, atomBytesRead, remainingBytes); if (bytesRead == -1) { return RESULT_END_OF_STREAM; } rootAtomBytesRead += bytesRead; atomBytesRead += bytesRead; if (atomBytesRead != ATOM_HEADER_SIZE) { return RESULT_NEED_MORE_DATA; } atomHeader.setPosition(0); atomSize = atomHeader.readInt(); atomType = atomHeader.readInt(); if (atomType == Atom.TYPE_mdat) { if (fragmentRun.sampleEncryptionDataNeedsFill) { enterState(STATE_READING_ENCRYPTION_DATA); } else { enterState(STATE_READING_SAMPLE); } return 0; } if (PARSED_ATOMS.contains(atomType)) { if (CONTAINER_TYPES.contains(atomType)) { enterState(STATE_READING_ATOM_HEADER); containerAtoms.add(new ContainerAtom(atomType)); containerAtomEndPoints.add(rootAtomBytesRead + atomSize - ATOM_HEADER_SIZE); } else { atomData = new ParsableByteArray(atomSize); System.arraycopy(atomHeader.data, 0, atomData.data, 0, ATOM_HEADER_SIZE); enterState(STATE_READING_ATOM_PAYLOAD); } } else { atomData = null; enterState(STATE_READING_ATOM_PAYLOAD); } return 0; } private int readAtomPayload(NonBlockingInputStream inputStream) { int bytesRead; if (atomData != null) { bytesRead = inputStream.read(atomData.data, atomBytesRead, atomSize - atomBytesRead); } else { bytesRead = inputStream.skip(atomSize - atomBytesRead); } if (bytesRead == -1) { return RESULT_END_OF_STREAM; } rootAtomBytesRead += bytesRead; atomBytesRead += bytesRead; if (atomBytesRead != atomSize) { return RESULT_NEED_MORE_DATA; } int results = 0; if (atomData != null) { results |= onLeafAtomRead(new LeafAtom(atomType, atomData)); } while (!containerAtomEndPoints.isEmpty() && containerAtomEndPoints.peek() == rootAtomBytesRead) { containerAtomEndPoints.pop(); results |= onContainerAtomRead(containerAtoms.pop()); } enterState(STATE_READING_ATOM_HEADER); return results; } private int onLeafAtomRead(LeafAtom leaf) { if (!containerAtoms.isEmpty()) { containerAtoms.peek().add(leaf); } else if (leaf.type == Atom.TYPE_sidx) { segmentIndex = parseSidx(leaf.data); return RESULT_READ_INDEX; } return 0; } private int onContainerAtomRead(ContainerAtom container) { if (container.type == Atom.TYPE_moov) { onMoovContainerAtomRead(container); return RESULT_READ_INIT; } else if (container.type == Atom.TYPE_moof) { onMoofContainerAtomRead(container); } else if (!containerAtoms.isEmpty()) { containerAtoms.peek().add(container); } return 0; } private void onMoovContainerAtomRead(ContainerAtom moov) { List<Atom> moovChildren = moov.children; int moovChildrenSize = moovChildren.size(); for (int i = 0; i < moovChildrenSize; i++) { Atom child = moovChildren.get(i); if (child.type == Atom.TYPE_pssh) { ParsableByteArray psshAtom = ((LeafAtom) child).data; psshAtom.setPosition(FULL_ATOM_HEADER_SIZE); UUID uuid = new UUID(psshAtom.readLong(), psshAtom.readLong()); int dataSize = psshAtom.readInt(); byte[] data = new byte[dataSize]; psshAtom.readBytes(data, 0, dataSize); psshData.put(uuid, data); } } ContainerAtom mvex = moov.getContainerAtomOfType(Atom.TYPE_mvex); extendsDefaults = parseTrex(mvex.getLeafAtomOfType(Atom.TYPE_trex).data); track = parseTrak(moov.getContainerAtomOfType(Atom.TYPE_trak)); } private void onMoofContainerAtomRead(ContainerAtom moof) { fragmentRun.reset(); parseMoof(track, extendsDefaults, moof, fragmentRun, workaroundFlags, extendedTypeScratch); sampleIndex = 0; lastSyncSampleIndex = 0; pendingSeekSyncSampleIndex = 0; if (pendingSeekTimeMs != 0) { for (int i = 0; i < fragmentRun.length; i++) { if (fragmentRun.sampleIsSyncFrameTable[i]) { if (fragmentRun.getSamplePresentationTime(i) <= pendingSeekTimeMs) { pendingSeekSyncSampleIndex = i; } } } pendingSeekTimeMs = 0; } } /** * Parses a trex atom (defined in 14496-12). */ private static DefaultSampleValues parseTrex(ParsableByteArray trex) { trex.setPosition(FULL_ATOM_HEADER_SIZE + 4); int defaultSampleDescriptionIndex = trex.readUnsignedIntToInt() - 1; int defaultSampleDuration = trex.readUnsignedIntToInt(); int defaultSampleSize = trex.readUnsignedIntToInt(); int defaultSampleFlags = trex.readInt(); return new DefaultSampleValues(defaultSampleDescriptionIndex, defaultSampleDuration, defaultSampleSize, defaultSampleFlags); } /** * Parses a trak atom (defined in 14496-12). */ private static Track parseTrak(ContainerAtom trak) { ContainerAtom mdia = trak.getContainerAtomOfType(Atom.TYPE_mdia); int trackType = parseHdlr(mdia.getLeafAtomOfType(Atom.TYPE_hdlr).data); Assertions.checkState(trackType == Track.TYPE_AUDIO || trackType == Track.TYPE_VIDEO); Pair<Integer, Long> header = parseTkhd(trak.getLeafAtomOfType(Atom.TYPE_tkhd).data); int id = header.first; // TODO: This value should be used to set a duration field on the Track object // instantiated below, however we've found examples where the value is 0. Revisit whether we // should set it anyway (and just have it be wrong for bad media streams). // long duration = header.second; long timescale = parseMdhd(mdia.getLeafAtomOfType(Atom.TYPE_mdhd).data); ContainerAtom stbl = mdia.getContainerAtomOfType(Atom.TYPE_minf) .getContainerAtomOfType(Atom.TYPE_stbl); Pair<MediaFormat, TrackEncryptionBox[]> sampleDescriptions = parseStsd(stbl.getLeafAtomOfType(Atom.TYPE_stsd).data); return new Track(id, trackType, timescale, sampleDescriptions.first, sampleDescriptions.second); } /** * Parses a tkhd atom (defined in 14496-12). * * @return A {@link Pair} consisting of the track id and duration. */ private static Pair<Integer, Long> parseTkhd(ParsableByteArray tkhd) { tkhd.setPosition(ATOM_HEADER_SIZE); int fullAtom = tkhd.readInt(); int version = parseFullAtomVersion(fullAtom); tkhd.skip(version == 0 ? 8 : 16); int trackId = tkhd.readInt(); tkhd.skip(4); long duration = version == 0 ? tkhd.readUnsignedInt() : tkhd.readUnsignedLongToLong(); return Pair.create(trackId, duration); } /** * Parses an hdlr atom (defined in 14496-12). * * @param hdlr The hdlr atom to parse. * @return The track type. */ private static int parseHdlr(ParsableByteArray hdlr) { hdlr.setPosition(FULL_ATOM_HEADER_SIZE + 4); return hdlr.readInt(); } /** * Parses an mdhd atom (defined in 14496-12). * * @param mdhd The mdhd atom to parse. * @return The media timescale, defined as the number of time units that pass in one second. */ private static long parseMdhd(ParsableByteArray mdhd) { mdhd.setPosition(ATOM_HEADER_SIZE); int fullAtom = mdhd.readInt(); int version = parseFullAtomVersion(fullAtom); mdhd.skip(version == 0 ? 8 : 16); return mdhd.readUnsignedInt(); } private static Pair<MediaFormat, TrackEncryptionBox[]> parseStsd(ParsableByteArray stsd) { stsd.setPosition(FULL_ATOM_HEADER_SIZE); int numberOfEntries = stsd.readInt(); MediaFormat mediaFormat = null; TrackEncryptionBox[] trackEncryptionBoxes = new TrackEncryptionBox[numberOfEntries]; for (int i = 0; i < numberOfEntries; i++) { int childStartPosition = stsd.getPosition(); int childAtomSize = stsd.readInt(); int childAtomType = stsd.readInt(); if (childAtomType == Atom.TYPE_avc1 || childAtomType == Atom.TYPE_avc3 || childAtomType == Atom.TYPE_encv) { Pair<MediaFormat, TrackEncryptionBox> avc = parseAvcFromParent(stsd, childStartPosition, childAtomSize); mediaFormat = avc.first; trackEncryptionBoxes[i] = avc.second; } else if (childAtomType == Atom.TYPE_mp4a || childAtomType == Atom.TYPE_enca) { Pair<MediaFormat, TrackEncryptionBox> mp4a = parseMp4aFromParent(stsd, childStartPosition, childAtomSize); mediaFormat = mp4a.first; trackEncryptionBoxes[i] = mp4a.second; } stsd.setPosition(childStartPosition + childAtomSize); } return Pair.create(mediaFormat, trackEncryptionBoxes); } private static Pair<MediaFormat, TrackEncryptionBox> parseAvcFromParent(ParsableByteArray parent, int position, int size) { parent.setPosition(position + ATOM_HEADER_SIZE); parent.skip(24); int width = parent.readUnsignedShort(); int height = parent.readUnsignedShort(); parent.skip(50); List<byte[]> initializationData = null; TrackEncryptionBox trackEncryptionBox = null; int childPosition = parent.getPosition(); while (childPosition - position < size) { parent.setPosition(childPosition); int childStartPosition = parent.getPosition(); int childAtomSize = parent.readInt(); int childAtomType = parent.readInt(); if (childAtomType == Atom.TYPE_avcC) { initializationData = parseAvcCFromParent(parent, childStartPosition); } else if (childAtomType == Atom.TYPE_sinf) { trackEncryptionBox = parseSinfFromParent(parent, childStartPosition, childAtomSize); } childPosition += childAtomSize; } MediaFormat format = MediaFormat.createVideoFormat(MimeTypes.VIDEO_H264, MediaFormat.NO_VALUE, width, height, initializationData); return Pair.create(format, trackEncryptionBox); } private static Pair<MediaFormat, TrackEncryptionBox> parseMp4aFromParent(ParsableByteArray parent, int position, int size) { parent.setPosition(position + ATOM_HEADER_SIZE); // Start of the mp4a atom (defined in 14496-14) parent.skip(16); int channelCount = parent.readUnsignedShort(); int sampleSize = parent.readUnsignedShort(); parent.skip(4); int sampleRate = parent.readUnsignedFixedPoint1616(); byte[] initializationData = null; TrackEncryptionBox trackEncryptionBox = null; int childPosition = parent.getPosition(); while (childPosition - position < size) { parent.setPosition(childPosition); int childStartPosition = parent.getPosition(); int childAtomSize = parent.readInt(); int childAtomType = parent.readInt(); if (childAtomType == Atom.TYPE_esds) { initializationData = parseEsdsFromParent(parent, childStartPosition); // TODO: Do we really need to do this? See [redacted] // Update sampleRate and channelCount from the AudioSpecificConfig initialization data. Pair<Integer, Integer> audioSpecificConfig = CodecSpecificDataUtil.parseAudioSpecificConfig(initializationData); sampleRate = audioSpecificConfig.first; channelCount = audioSpecificConfig.second; } else if (childAtomType == Atom.TYPE_sinf) { trackEncryptionBox = parseSinfFromParent(parent, childStartPosition, childAtomSize); } childPosition += childAtomSize; } MediaFormat format = MediaFormat.createAudioFormat("audio/mp4a-latm", sampleSize, channelCount, sampleRate, Collections.singletonList(initializationData)); return Pair.create(format, trackEncryptionBox); } private static List<byte[]> parseAvcCFromParent(ParsableByteArray parent, int position) { parent.setPosition(position + ATOM_HEADER_SIZE + 4); // Start of the AVCDecoderConfigurationRecord (defined in 14496-15) int nalUnitLength = (parent.readUnsignedByte() & 0x3) + 1; if (nalUnitLength != 4) { // readSample currently relies on a nalUnitLength of 4. // TODO: Consider handling the case where it isn't. throw new IllegalStateException(); } List<byte[]> initializationData = new ArrayList<byte[]>(); // TODO: We should try and parse these using CodecSpecificDataUtil.parseSpsNalUnit, and // expose the AVC profile and level somewhere useful; Most likely in MediaFormat. int numSequenceParameterSets = parent.readUnsignedByte() & 0x1F; for (int j = 0; j < numSequenceParameterSets; j++) { initializationData.add(parseChildNalUnit(parent)); } int numPictureParamterSets = parent.readUnsignedByte(); for (int j = 0; j < numPictureParamterSets; j++) { initializationData.add(parseChildNalUnit(parent)); } return initializationData; } private static byte[] parseChildNalUnit(ParsableByteArray atom) { int length = atom.readUnsignedShort(); int offset = atom.getPosition(); atom.skip(length); return CodecSpecificDataUtil.buildNalUnit(atom.data, offset, length); } private static TrackEncryptionBox parseSinfFromParent(ParsableByteArray parent, int position, int size) { int childPosition = position + ATOM_HEADER_SIZE; TrackEncryptionBox trackEncryptionBox = null; while (childPosition - position < size) { parent.setPosition(childPosition); int childAtomSize = parent.readInt(); int childAtomType = parent.readInt(); if (childAtomType == Atom.TYPE_frma) { parent.readInt(); // dataFormat. } else if (childAtomType == Atom.TYPE_schm) { parent.skip(4); parent.readInt(); // schemeType. Expect cenc parent.readInt(); // schemeVersion. Expect 0x00010000 } else if (childAtomType == Atom.TYPE_schi) { trackEncryptionBox = parseSchiFromParent(parent, childPosition, childAtomSize); } childPosition += childAtomSize; } return trackEncryptionBox; } private static TrackEncryptionBox parseSchiFromParent(ParsableByteArray parent, int position, int size) { int childPosition = position + ATOM_HEADER_SIZE; while (childPosition - position < size) { parent.setPosition(childPosition); int childAtomSize = parent.readInt(); int childAtomType = parent.readInt(); if (childAtomType == Atom.TYPE_tenc) { parent.skip(4); int firstInt = parent.readInt(); boolean defaultIsEncrypted = (firstInt >> 8) == 1; int defaultInitVectorSize = firstInt & 0xFF; byte[] defaultKeyId = new byte[16]; parent.readBytes(defaultKeyId, 0, defaultKeyId.length); return new TrackEncryptionBox(defaultIsEncrypted, defaultInitVectorSize, defaultKeyId); } childPosition += childAtomSize; } return null; } private static byte[] parseEsdsFromParent(ParsableByteArray parent, int position) { parent.setPosition(position + ATOM_HEADER_SIZE + 4); // Start of the ES_Descriptor (defined in 14496-1) parent.skip(1); // ES_Descriptor tag int varIntByte = parent.readUnsignedByte(); while (varIntByte > 127) { varIntByte = parent.readUnsignedByte(); } parent.skip(2); // ES_ID int flags = parent.readUnsignedByte(); if ((flags & 0x80 /* streamDependenceFlag */) != 0) { parent.skip(2); } if ((flags & 0x40 /* URL_Flag */) != 0) { parent.skip(parent.readUnsignedShort()); } if ((flags & 0x20 /* OCRstreamFlag */) != 0) { parent.skip(2); } // Start of the DecoderConfigDescriptor (defined in 14496-1) parent.skip(1); // DecoderConfigDescriptor tag varIntByte = parent.readUnsignedByte(); while (varIntByte > 127) { varIntByte = parent.readUnsignedByte(); } parent.skip(13); // Start of AudioSpecificConfig (defined in 14496-3) parent.skip(1); // AudioSpecificConfig tag varIntByte = parent.readUnsignedByte(); int varInt = varIntByte & 0x7F; while (varIntByte > 127) { varIntByte = parent.readUnsignedByte(); varInt = varInt << 8; varInt |= varIntByte & 0x7F; } byte[] initializationData = new byte[varInt]; parent.readBytes(initializationData, 0, varInt); return initializationData; } private static void parseMoof(Track track, DefaultSampleValues extendsDefaults, ContainerAtom moof, TrackFragment out, int workaroundFlags, byte[] extendedTypeScratch) { parseTraf(track, extendsDefaults, moof.getContainerAtomOfType(Atom.TYPE_traf), out, workaroundFlags, extendedTypeScratch); } /** * Parses a traf atom (defined in 14496-12). */ private static void parseTraf(Track track, DefaultSampleValues extendsDefaults, ContainerAtom traf, TrackFragment out, int workaroundFlags, byte[] extendedTypeScratch) { LeafAtom tfdtAtom = traf.getLeafAtomOfType(Atom.TYPE_tfdt); long decodeTime = tfdtAtom == null ? 0 : parseTfdt(traf.getLeafAtomOfType(Atom.TYPE_tfdt).data); LeafAtom tfhd = traf.getLeafAtomOfType(Atom.TYPE_tfhd); DefaultSampleValues fragmentHeader = parseTfhd(extendsDefaults, tfhd.data); out.sampleDescriptionIndex = fragmentHeader.sampleDescriptionIndex; LeafAtom trun = traf.getLeafAtomOfType(Atom.TYPE_trun); parseTrun(track, fragmentHeader, decodeTime, workaroundFlags, trun.data, out); LeafAtom saiz = traf.getLeafAtomOfType(Atom.TYPE_saiz); if (saiz != null) { TrackEncryptionBox trackEncryptionBox = track.sampleDescriptionEncryptionBoxes[fragmentHeader.sampleDescriptionIndex]; parseSaiz(trackEncryptionBox, saiz.data, out); } LeafAtom senc = traf.getLeafAtomOfType(Atom.TYPE_senc); if (senc != null) { parseSenc(senc.data, out); } LeafAtom uuid = traf.getLeafAtomOfType(Atom.TYPE_uuid); if (uuid != null) { parseUuid(uuid.data, out, extendedTypeScratch); } } private static void parseSaiz(TrackEncryptionBox encryptionBox, ParsableByteArray saiz, TrackFragment out) { int vectorSize = encryptionBox.initializationVectorSize; saiz.setPosition(ATOM_HEADER_SIZE); int fullAtom = saiz.readInt(); int flags = parseFullAtomFlags(fullAtom); if ((flags & 0x01) == 1) { saiz.skip(8); } int defaultSampleInfoSize = saiz.readUnsignedByte(); int sampleCount = saiz.readUnsignedIntToInt(); if (sampleCount != out.length) { throw new IllegalStateException("Length mismatch: " + sampleCount + ", " + out.length); } int totalSize = 0; if (defaultSampleInfoSize == 0) { boolean[] sampleHasSubsampleEncryptionTable = out.sampleHasSubsampleEncryptionTable; for (int i = 0; i < sampleCount; i++) { int sampleInfoSize = saiz.readUnsignedByte(); totalSize += sampleInfoSize; sampleHasSubsampleEncryptionTable[i] = sampleInfoSize > vectorSize; } } else { boolean subsampleEncryption = defaultSampleInfoSize > vectorSize; totalSize += defaultSampleInfoSize * sampleCount; Arrays.fill(out.sampleHasSubsampleEncryptionTable, 0, sampleCount, subsampleEncryption); } out.initEncryptionData(totalSize); } /** * Parses a tfhd atom (defined in 14496-12). * * @param extendsDefaults Default sample values from the trex atom. * @return The parsed default sample values. */ private static DefaultSampleValues parseTfhd(DefaultSampleValues extendsDefaults, ParsableByteArray tfhd) { tfhd.setPosition(ATOM_HEADER_SIZE); int fullAtom = tfhd.readInt(); int flags = parseFullAtomFlags(fullAtom); tfhd.skip(4); // trackId if ((flags & 0x01 /* base_data_offset_present */) != 0) { tfhd.skip(8); } int defaultSampleDescriptionIndex = ((flags & 0x02 /* default_sample_description_index_present */) != 0) ? tfhd.readUnsignedIntToInt() - 1 : extendsDefaults.sampleDescriptionIndex; int defaultSampleDuration = ((flags & 0x08 /* default_sample_duration_present */) != 0) ? tfhd.readUnsignedIntToInt() : extendsDefaults.duration; int defaultSampleSize = ((flags & 0x10 /* default_sample_size_present */) != 0) ? tfhd.readUnsignedIntToInt() : extendsDefaults.size; int defaultSampleFlags = ((flags & 0x20 /* default_sample_flags_present */) != 0) ? tfhd.readUnsignedIntToInt() : extendsDefaults.flags; return new DefaultSampleValues(defaultSampleDescriptionIndex, defaultSampleDuration, defaultSampleSize, defaultSampleFlags); } /** * Parses a tfdt atom (defined in 14496-12). * * @return baseMediaDecodeTime. The sum of the decode durations of all earlier samples in the * media, expressed in the media's timescale. */ private static long parseTfdt(ParsableByteArray tfdt) { tfdt.setPosition(ATOM_HEADER_SIZE); int fullAtom = tfdt.readInt(); int version = parseFullAtomVersion(fullAtom); return version == 1 ? tfdt.readUnsignedLongToLong() : tfdt.readUnsignedInt(); } /** * Parses a trun atom (defined in 14496-12). * * @param track The corresponding track. * @param defaultSampleValues Default sample values. * @param decodeTime The decode time. * @param trun The trun atom to parse. * @param out The {@TrackFragment} into which parsed data should be placed. */ private static void parseTrun(Track track, DefaultSampleValues defaultSampleValues, long decodeTime, int workaroundFlags, ParsableByteArray trun, TrackFragment out) { trun.setPosition(ATOM_HEADER_SIZE); int fullAtom = trun.readInt(); int flags = parseFullAtomFlags(fullAtom); int sampleCount = trun.readUnsignedIntToInt(); if ((flags & 0x01 /* data_offset_present */) != 0) { trun.skip(4); } boolean firstSampleFlagsPresent = (flags & 0x04 /* first_sample_flags_present */) != 0; int firstSampleFlags = defaultSampleValues.flags; if (firstSampleFlagsPresent) { firstSampleFlags = trun.readUnsignedIntToInt(); } boolean sampleDurationsPresent = (flags & 0x100 /* sample_duration_present */) != 0; boolean sampleSizesPresent = (flags & 0x200 /* sample_size_present */) != 0; boolean sampleFlagsPresent = (flags & 0x400 /* sample_flags_present */) != 0; boolean sampleCompositionTimeOffsetsPresent = (flags & 0x800 /* sample_composition_time_offsets_present */) != 0; out.initTables(sampleCount); int[] sampleSizeTable = out.sampleSizeTable; int[] sampleDecodingTimeTable = out.sampleDecodingTimeTable; int[] sampleCompositionTimeOffsetTable = out.sampleCompositionTimeOffsetTable; boolean[] sampleIsSyncFrameTable = out.sampleIsSyncFrameTable; long timescale = track.timescale; long cumulativeTime = decodeTime; boolean workaroundEveryVideoFrameIsSyncFrame = track.type == Track.TYPE_VIDEO && ((workaroundFlags & WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME) == WORKAROUND_EVERY_VIDEO_FRAME_IS_SYNC_FRAME); for (int i = 0; i < sampleCount; i++) { // Use trun values if present, otherwise tfhd, otherwise trex. int sampleDuration = sampleDurationsPresent ? trun.readUnsignedIntToInt() : defaultSampleValues.duration; int sampleSize = sampleSizesPresent ? trun.readUnsignedIntToInt() : defaultSampleValues.size; int sampleFlags = (i == 0 && firstSampleFlagsPresent) ? firstSampleFlags : sampleFlagsPresent ? trun.readInt() : defaultSampleValues.flags; if (sampleCompositionTimeOffsetsPresent) { // The BMFF spec (ISO 14496-12) states that sample offsets should be unsigned integers in // version 0 trun boxes, however a significant number of streams violate the spec and use // signed integers instead. It's safe to always parse sample offsets as signed integers // here, because unsigned integers will still be parsed correctly (unless their top bit is // set, which is never true in practice because sample offsets are always small). int sampleOffset = trun.readInt(); sampleCompositionTimeOffsetTable[i] = (int) ((sampleOffset * 1000) / timescale); } else { sampleCompositionTimeOffsetTable[i] = 0; } sampleDecodingTimeTable[i] = (int) ((cumulativeTime * 1000) / timescale); sampleSizeTable[i] = sampleSize; sampleIsSyncFrameTable[i] = ((sampleFlags >> 16) & 0x1) == 0 && (!workaroundEveryVideoFrameIsSyncFrame || i == 0); cumulativeTime += sampleDuration; } } private static void parseUuid(ParsableByteArray uuid, TrackFragment out, byte[] extendedTypeScratch) { uuid.setPosition(ATOM_HEADER_SIZE); uuid.readBytes(extendedTypeScratch, 0, 16); // Currently this parser only supports Microsoft's PIFF SampleEncryptionBox. if (!Arrays.equals(extendedTypeScratch, PIFF_SAMPLE_ENCRYPTION_BOX_EXTENDED_TYPE)) { return; } // Except for the extended type, this box is identical to a SENC box. See "Portable encoding of // audio-video objects: The Protected Interoperable File Format (PIFF), John A. Bocharov et al, // Section 5.3.2.1." parseSenc(uuid, 16, out); } private static void parseSenc(ParsableByteArray senc, TrackFragment out) { parseSenc(senc, 0, out); } private static void parseSenc(ParsableByteArray senc, int offset, TrackFragment out) { senc.setPosition(ATOM_HEADER_SIZE + offset); int fullAtom = senc.readInt(); int flags = parseFullAtomFlags(fullAtom); if ((flags & 0x01 /* override_track_encryption_box_parameters */) != 0) { // TODO: Implement this. throw new IllegalStateException("Overriding TrackEncryptionBox parameters is unsupported"); } boolean subsampleEncryption = (flags & 0x02 /* use_subsample_encryption */) != 0; int sampleCount = senc.readUnsignedIntToInt(); if (sampleCount != out.length) { throw new IllegalStateException("Length mismatch: " + sampleCount + ", " + out.length); } Arrays.fill(out.sampleHasSubsampleEncryptionTable, 0, sampleCount, subsampleEncryption); out.initEncryptionData(senc.length() - senc.getPosition()); out.fillEncryptionData(senc); } /** * Parses a sidx atom (defined in 14496-12). */ private static SegmentIndex parseSidx(ParsableByteArray atom) { atom.setPosition(ATOM_HEADER_SIZE); int fullAtom = atom.readInt(); int version = parseFullAtomVersion(fullAtom); atom.skip(4); long timescale = atom.readUnsignedInt(); long earliestPresentationTime; long firstOffset; if (version == 0) { earliestPresentationTime = atom.readUnsignedInt(); firstOffset = atom.readUnsignedInt(); } else { earliestPresentationTime = atom.readUnsignedLongToLong(); firstOffset = atom.readUnsignedLongToLong(); } atom.skip(2); int referenceCount = atom.readUnsignedShort(); int[] sizes = new int[referenceCount]; long[] offsets = new long[referenceCount]; long[] durationsUs = new long[referenceCount]; long[] timesUs = new long[referenceCount]; long offset = firstOffset; long time = earliestPresentationTime; for (int i = 0; i < referenceCount; i++) { int firstInt = atom.readInt(); int type = 0x80000000 & firstInt; if (type != 0) { throw new IllegalStateException("Unhandled indirect reference"); } long referenceDuration = atom.readUnsignedInt(); sizes[i] = 0x7fffffff & firstInt; offsets[i] = offset; // Calculate time and duration values such that any rounding errors are consistent. i.e. That // timesUs[i] + durationsUs[i] == timesUs[i + 1]. timesUs[i] = (time * 1000000L) / timescale; long nextTimeUs = ((time + referenceDuration) * 1000000L) / timescale; durationsUs[i] = nextTimeUs - timesUs[i]; time += referenceDuration; atom.skip(4); offset += sizes[i]; } return new SegmentIndex(atom.length(), sizes, offsets, durationsUs, timesUs); } private int readEncryptionData(NonBlockingInputStream inputStream) { boolean success = fragmentRun.fillEncryptionData(inputStream); if (!success) { return RESULT_NEED_MORE_DATA; } enterState(STATE_READING_SAMPLE); return 0; } /** * Attempts to read or skip the next sample in the current mdat atom. * <p> * If there are no more samples in the current mdat atom then the parser state is transitioned * to {@link #STATE_READING_ATOM_HEADER} and 0 is returned. * <p> * If there's a pending seek to a sync frame, and if the next sample is before that frame, then * the sample is skipped. Otherwise it is read. * <p> * It is possible for a sample to be read or skipped in part if there is insufficent data * available from the {@link NonBlockingInputStream}. In this case the remainder of the sample * can be read in a subsequent call passing the same {@link SampleHolder}. * * @param inputStream The stream from which to read the sample. * @param out The holder into which to write the sample. * @return A combination of RESULT_* flags indicating the result of the call. */ private int readOrSkipSample(NonBlockingInputStream inputStream, SampleHolder out) { if (sampleIndex >= fragmentRun.length) { // We've run out of samples in the current mdat atom. enterState(STATE_READING_ATOM_HEADER); return 0; } int sampleSize = fragmentRun.sampleSizeTable[sampleIndex]; if (inputStream.getAvailableByteCount() < sampleSize) { return RESULT_NEED_MORE_DATA; } if (sampleIndex < pendingSeekSyncSampleIndex) { return skipSample(inputStream, sampleSize); } return readSample(inputStream, sampleSize, out); } private int skipSample(NonBlockingInputStream inputStream, int sampleSize) { if (fragmentRun.definesEncryptionData) { ParsableByteArray sampleEncryptionData = fragmentRun.sampleEncryptionData; TrackEncryptionBox encryptionBox = track.sampleDescriptionEncryptionBoxes[fragmentRun.sampleDescriptionIndex]; int vectorSize = encryptionBox.initializationVectorSize; boolean subsampleEncryption = fragmentRun.sampleHasSubsampleEncryptionTable[sampleIndex]; sampleEncryptionData.skip(vectorSize); int subsampleCount = subsampleEncryption ? sampleEncryptionData.readUnsignedShort() : 1; if (subsampleEncryption) { sampleEncryptionData.skip((2 + 4) * subsampleCount); } } inputStream.skip(sampleSize); sampleIndex++; enterState(STATE_READING_SAMPLE); return 0; } @SuppressLint("InlinedApi") private int readSample(NonBlockingInputStream inputStream, int sampleSize, SampleHolder out) { if (out == null) { return RESULT_NEED_SAMPLE_HOLDER; } ByteBuffer outputData = out.data; out.timeUs = fragmentRun.getSamplePresentationTime(sampleIndex) * 1000L; out.flags = 0; if (fragmentRun.sampleIsSyncFrameTable[sampleIndex]) { out.flags |= MediaExtractor.SAMPLE_FLAG_SYNC; lastSyncSampleIndex = sampleIndex; } if (out.allowDataBufferReplacement && (out.data == null || out.data.capacity() < sampleSize)) { outputData = ByteBuffer.allocate(sampleSize); out.data = outputData; } if (fragmentRun.definesEncryptionData) { readSampleEncryptionData(fragmentRun.sampleEncryptionData, out); } if (outputData == null) { inputStream.skip(sampleSize); out.size = 0; } else { inputStream.read(outputData, sampleSize); if (track.type == Track.TYPE_VIDEO) { // The mp4 file contains length-prefixed NAL units, but the decoder wants start code // delimited content. Replace length prefixes with start codes. int sampleOffset = outputData.position() - sampleSize; int position = sampleOffset; while (position < sampleOffset + sampleSize) { outputData.position(position); int length = readUnsignedIntToInt(outputData); outputData.position(position); outputData.put(NAL_START_CODE); position += length + 4; } outputData.position(sampleOffset + sampleSize); } out.size = sampleSize; } sampleIndex++; enterState(STATE_READING_SAMPLE); return RESULT_READ_SAMPLE; } @SuppressLint("InlinedApi") private void readSampleEncryptionData(ParsableByteArray sampleEncryptionData, SampleHolder out) { TrackEncryptionBox encryptionBox = track.sampleDescriptionEncryptionBoxes[fragmentRun.sampleDescriptionIndex]; byte[] keyId = encryptionBox.keyId; boolean isEncrypted = encryptionBox.isEncrypted; int vectorSize = encryptionBox.initializationVectorSize; boolean subsampleEncryption = fragmentRun.sampleHasSubsampleEncryptionTable[sampleIndex]; byte[] vector = out.cryptoInfo.iv; if (vector == null || vector.length != 16) { vector = new byte[16]; } sampleEncryptionData.readBytes(vector, 0, vectorSize); int subsampleCount = subsampleEncryption ? sampleEncryptionData.readUnsignedShort() : 1; int[] clearDataSizes = out.cryptoInfo.numBytesOfClearData; if (clearDataSizes == null || clearDataSizes.length < subsampleCount) { clearDataSizes = new int[subsampleCount]; } int[] encryptedDataSizes = out.cryptoInfo.numBytesOfEncryptedData; if (encryptedDataSizes == null || encryptedDataSizes.length < subsampleCount) { encryptedDataSizes = new int[subsampleCount]; } if (subsampleEncryption) { for (int i = 0; i < subsampleCount; i++) { clearDataSizes[i] = sampleEncryptionData.readUnsignedShort(); encryptedDataSizes[i] = sampleEncryptionData.readUnsignedIntToInt(); } } else { clearDataSizes[0] = 0; encryptedDataSizes[0] = fragmentRun.sampleSizeTable[sampleIndex]; } out.cryptoInfo.set(subsampleCount, clearDataSizes, encryptedDataSizes, keyId, vector, isEncrypted ? MediaCodec.CRYPTO_MODE_AES_CTR : MediaCodec.CRYPTO_MODE_UNENCRYPTED); if (isEncrypted) { out.flags |= MediaExtractor.SAMPLE_FLAG_ENCRYPTED; } } /** * Parses the version number out of the additional integer component of a full atom. */ private static int parseFullAtomVersion(int fullAtomInt) { return 0x000000FF & (fullAtomInt >> 24); } /** * Parses the atom flags out of the additional integer component of a full atom. */ private static int parseFullAtomFlags(int fullAtomInt) { return 0x00FFFFFF & fullAtomInt; } /** * Reads an unsigned integer into an integer. This method is suitable for use when it can be * assumed that the top bit will always be set to zero. * * @throws IllegalArgumentException If the top bit of the input data is set. */ private static int readUnsignedIntToInt(ByteBuffer data) { int result = 0xFF & data.get(); for (int i = 1; i < 4; i++) { result <<= 8; result |= 0xFF & data.get(); } if (result < 0) { throw new IllegalArgumentException("Top bit not zero: " + result); } return result; } }