13c91b464f
préparation du template intégration du fond animé fusion du style background et style pcp
843 lines
27 KiB
TypeScript
843 lines
27 KiB
TypeScript
import { Box, free, ftyp, mdat, mfra, moof, moov } from './box';
|
|
import { deepClone, intoTimescale, last, TransformationMatrix } from './misc';
|
|
import { ArrayBufferTarget, FileSystemWritableFileStreamTarget, StreamTarget, Target } from './target';
|
|
import {
|
|
Writer,
|
|
ArrayBufferTargetWriter,
|
|
StreamTargetWriter,
|
|
ChunkedStreamTargetWriter,
|
|
FileSystemWritableFileStreamTargetWriter
|
|
} from './writer';
|
|
|
|
export const GLOBAL_TIMESCALE = 1000;
|
|
export const SUPPORTED_VIDEO_CODECS = ['avc', 'hevc', 'vp9', 'av1'] as const;
|
|
export const SUPPORTED_AUDIO_CODECS = ['aac', 'opus'] as const;
|
|
const TIMESTAMP_OFFSET = 2_082_844_800; // Seconds between Jan 1 1904 and Jan 1 1970
|
|
const FIRST_TIMESTAMP_BEHAVIORS = ['strict', 'offset', 'cross-track-offset'] as const;
|
|
|
|
interface VideoOptions {
|
|
codec: typeof SUPPORTED_VIDEO_CODECS[number],
|
|
width: number,
|
|
height: number,
|
|
rotation?: 0 | 90 | 180 | 270 | TransformationMatrix
|
|
}
|
|
|
|
interface AudioOptions {
|
|
codec: typeof SUPPORTED_AUDIO_CODECS[number],
|
|
numberOfChannels: number,
|
|
sampleRate: number
|
|
}
|
|
|
|
type Mp4MuxerOptions<T extends Target> = {
|
|
target: T,
|
|
video?: VideoOptions,
|
|
audio?: AudioOptions,
|
|
fastStart: false | 'in-memory' | 'fragmented' | {
|
|
expectedVideoChunks?: number,
|
|
expectedAudioChunks?: number
|
|
},
|
|
firstTimestampBehavior?: typeof FIRST_TIMESTAMP_BEHAVIORS[number]
|
|
};
|
|
|
|
export interface Track {
|
|
id: number,
|
|
info: {
|
|
type: 'video',
|
|
codec: VideoOptions['codec'],
|
|
width: number,
|
|
height: number,
|
|
rotation: 0 | 90 | 180 | 270 | TransformationMatrix,
|
|
decoderConfig: VideoDecoderConfig
|
|
} | {
|
|
type: 'audio',
|
|
codec: AudioOptions['codec'],
|
|
numberOfChannels: number,
|
|
sampleRate: number,
|
|
decoderConfig: AudioDecoderConfig
|
|
},
|
|
timescale: number,
|
|
samples: Sample[],
|
|
|
|
firstDecodeTimestamp: number,
|
|
lastDecodeTimestamp: number,
|
|
|
|
timeToSampleTable: { sampleCount: number, sampleDelta: number }[];
|
|
compositionTimeOffsetTable: { sampleCount: number, sampleCompositionTimeOffset: number }[];
|
|
lastTimescaleUnits: number,
|
|
lastSample: Sample,
|
|
|
|
finalizedChunks: Chunk[],
|
|
currentChunk: Chunk,
|
|
compactlyCodedChunkTable: {
|
|
firstChunk: number,
|
|
samplesPerChunk: number
|
|
}[]
|
|
}
|
|
|
|
export type VideoTrack = Track & { info: { type: 'video' } };
|
|
export type AudioTrack = Track & { info: { type: 'audio' } };
|
|
|
|
export interface Sample {
|
|
presentationTimestamp: number,
|
|
decodeTimestamp: number,
|
|
duration: number,
|
|
data: Uint8Array,
|
|
size: number,
|
|
type: 'key' | 'delta',
|
|
timescaleUnitsToNextSample: number
|
|
}
|
|
|
|
interface Chunk {
|
|
startTimestamp: number,
|
|
samples: Sample[],
|
|
offset?: number,
|
|
// In the case of a fragmented file, this indicates the position of the moof box pointing to the data in this chunk
|
|
moofOffset?: number
|
|
}
|
|
|
|
export class Muxer<T extends Target> {
|
|
target: T;
|
|
|
|
#options: Mp4MuxerOptions<T>;
|
|
#writer: Writer;
|
|
#ftypSize: number;
|
|
#mdat: Box;
|
|
|
|
#videoTrack: Track = null;
|
|
#audioTrack: Track = null;
|
|
#creationTime = Math.floor(Date.now() / 1000) + TIMESTAMP_OFFSET;
|
|
#finalizedChunks: Chunk[] = [];
|
|
|
|
// Fields for fragmented MP4:
|
|
#nextFragmentNumber = 1;
|
|
#videoSampleQueue: Sample[] = [];
|
|
#audioSampleQueue: Sample[] = [];
|
|
|
|
#finalized = false;
|
|
|
|
constructor(options: Mp4MuxerOptions<T>) {
|
|
this.#validateOptions(options);
|
|
|
|
// Don't want these to be modified from the outside while processing:
|
|
options.video = deepClone(options.video);
|
|
options.audio = deepClone(options.audio);
|
|
options.fastStart = deepClone(options.fastStart);
|
|
|
|
this.target = options.target;
|
|
this.#options = {
|
|
firstTimestampBehavior: 'strict',
|
|
...options
|
|
};
|
|
|
|
if (options.target instanceof ArrayBufferTarget) {
|
|
this.#writer = new ArrayBufferTargetWriter(options.target);
|
|
} else if (options.target instanceof StreamTarget) {
|
|
this.#writer = options.target.options?.chunked
|
|
? new ChunkedStreamTargetWriter(options.target)
|
|
: new StreamTargetWriter(options.target);
|
|
} else if (options.target instanceof FileSystemWritableFileStreamTarget) {
|
|
this.#writer = new FileSystemWritableFileStreamTargetWriter(options.target);
|
|
} else {
|
|
throw new Error(`Invalid target: ${options.target}`);
|
|
}
|
|
|
|
this.#prepareTracks();
|
|
this.#writeHeader();
|
|
}
|
|
|
|
#validateOptions(options: Mp4MuxerOptions<T>) {
|
|
if (options.video) {
|
|
if (!SUPPORTED_VIDEO_CODECS.includes(options.video.codec)) {
|
|
throw new Error(`Unsupported video codec: ${options.video.codec}`);
|
|
}
|
|
|
|
const videoRotation = options.video.rotation;
|
|
if (typeof videoRotation === 'number' && ![0, 90, 180, 270].includes(videoRotation)) {
|
|
throw new Error(`Invalid video rotation: ${videoRotation}. Has to be 0, 90, 180 or 270.`);
|
|
} else if (
|
|
Array.isArray(videoRotation) &&
|
|
(videoRotation.length !== 9 || videoRotation.some(value => typeof value !== 'number'))
|
|
) {
|
|
throw new Error(`Invalid video transformation matrix: ${videoRotation.join()}`);
|
|
}
|
|
}
|
|
|
|
if (options.audio && !SUPPORTED_AUDIO_CODECS.includes(options.audio.codec)) {
|
|
throw new Error(`Unsupported audio codec: ${options.audio.codec}`);
|
|
}
|
|
|
|
if (options.firstTimestampBehavior && !FIRST_TIMESTAMP_BEHAVIORS.includes(options.firstTimestampBehavior)) {
|
|
throw new Error(`Invalid first timestamp behavior: ${options.firstTimestampBehavior}`);
|
|
}
|
|
|
|
if (typeof options.fastStart === 'object') {
|
|
if (options.video && options.fastStart.expectedVideoChunks === undefined) {
|
|
throw new Error(`'fastStart' is an object but is missing property 'expectedVideoChunks'.`);
|
|
}
|
|
|
|
if (options.audio && options.fastStart.expectedAudioChunks === undefined) {
|
|
throw new Error(`'fastStart' is an object but is missing property 'expectedAudioChunks'.`);
|
|
}
|
|
} else if (![false, 'in-memory', 'fragmented'].includes(options.fastStart)) {
|
|
throw new Error(`'fastStart' option must be false, 'in-memory', 'fragmented' or an object.`);
|
|
}
|
|
}
|
|
|
|
#writeHeader() {
|
|
this.#writer.writeBox(ftyp({
|
|
holdsAvc: this.#options.video?.codec === 'avc',
|
|
fragmented: this.#options.fastStart === 'fragmented'
|
|
}));
|
|
|
|
this.#ftypSize = this.#writer.pos;
|
|
|
|
if (this.#options.fastStart === 'in-memory') {
|
|
this.#mdat = mdat(false);
|
|
} else if (this.#options.fastStart === 'fragmented') {
|
|
// We write the moov box once we write out the first fragment to make sure we get the decoder configs
|
|
} else {
|
|
if (typeof this.#options.fastStart === 'object') {
|
|
let moovSizeUpperBound = this.#computeMoovSizeUpperBound();
|
|
this.#writer.seek(this.#writer.pos + moovSizeUpperBound);
|
|
}
|
|
|
|
this.#mdat = mdat(true); // Reserve large size by default, can refine this when finalizing.
|
|
this.#writer.writeBox(this.#mdat);
|
|
}
|
|
|
|
this.#maybeFlushStreamingTargetWriter();
|
|
}
|
|
|
|
#computeMoovSizeUpperBound() {
|
|
if (typeof this.#options.fastStart !== 'object') return;
|
|
|
|
let upperBound = 0;
|
|
let sampleCounts = [
|
|
this.#options.fastStart.expectedVideoChunks,
|
|
this.#options.fastStart.expectedAudioChunks
|
|
];
|
|
|
|
for (let n of sampleCounts) {
|
|
if (!n) continue;
|
|
|
|
// Given the max allowed sample count, compute the space they'll take up in the Sample Table Box, assuming
|
|
// the worst case for each individual box:
|
|
|
|
// stts box - since it is compactly coded, the maximum length of this table will be 2/3n
|
|
upperBound += (4 + 4) * Math.ceil(2/3 * n);
|
|
// stss box - 1 entry per sample
|
|
upperBound += 4 * n;
|
|
// stsc box - since it is compactly coded, the maximum length of this table will be 2/3n
|
|
upperBound += (4 + 4 + 4) * Math.ceil(2/3 * n);
|
|
// stsz box - 1 entry per sample
|
|
upperBound += 4 * n;
|
|
// co64 box - we assume 1 sample per chunk and 64-bit chunk offsets
|
|
upperBound += 8 * n;
|
|
}
|
|
|
|
upperBound += 4096; // Assume a generous 4 kB for everything else: Track metadata, codec descriptors, etc.
|
|
|
|
return upperBound;
|
|
}
|
|
|
|
#prepareTracks() {
|
|
if (this.#options.video) {
|
|
this.#videoTrack = {
|
|
id: 1,
|
|
info: {
|
|
type: 'video',
|
|
codec: this.#options.video.codec,
|
|
width: this.#options.video.width,
|
|
height: this.#options.video.height,
|
|
rotation: this.#options.video.rotation ?? 0,
|
|
decoderConfig: null
|
|
},
|
|
timescale: 11520, // Timescale used by FFmpeg, contains many common frame rates as factors
|
|
samples: [],
|
|
finalizedChunks: [],
|
|
currentChunk: null,
|
|
firstDecodeTimestamp: undefined,
|
|
lastDecodeTimestamp: -1,
|
|
timeToSampleTable: [],
|
|
compositionTimeOffsetTable: [],
|
|
lastTimescaleUnits: null,
|
|
lastSample: null,
|
|
compactlyCodedChunkTable: []
|
|
};
|
|
}
|
|
|
|
if (this.#options.audio) {
|
|
// For the case that we don't get any further decoder details, we can still make a pretty educated guess:
|
|
let guessedCodecPrivate = this.#generateMpeg4AudioSpecificConfig(
|
|
2, // Object type for AAC-LC, since it's the most common
|
|
this.#options.audio.sampleRate,
|
|
this.#options.audio.numberOfChannels
|
|
);
|
|
|
|
this.#audioTrack = {
|
|
id: this.#options.video ? 2 : 1,
|
|
info: {
|
|
type: 'audio',
|
|
codec: this.#options.audio.codec,
|
|
numberOfChannels: this.#options.audio.numberOfChannels,
|
|
sampleRate: this.#options.audio.sampleRate,
|
|
decoderConfig: {
|
|
codec: this.#options.audio.codec,
|
|
description: guessedCodecPrivate,
|
|
numberOfChannels: this.#options.audio.numberOfChannels,
|
|
sampleRate: this.#options.audio.sampleRate
|
|
}
|
|
},
|
|
timescale: this.#options.audio.sampleRate,
|
|
samples: [],
|
|
finalizedChunks: [],
|
|
currentChunk: null,
|
|
firstDecodeTimestamp: undefined,
|
|
lastDecodeTimestamp: -1,
|
|
timeToSampleTable: [],
|
|
compositionTimeOffsetTable: [],
|
|
lastTimescaleUnits: null,
|
|
lastSample: null,
|
|
compactlyCodedChunkTable: []
|
|
};
|
|
}
|
|
}
|
|
|
|
// https://wiki.multimedia.cx/index.php/MPEG-4_Audio
|
|
#generateMpeg4AudioSpecificConfig(objectType: number, sampleRate: number, numberOfChannels: number) {
|
|
let frequencyIndices =
|
|
[96000, 88200, 64000, 48000, 44100, 32000, 24000, 22050, 16000, 12000, 11025, 8000, 7350];
|
|
let frequencyIndex = frequencyIndices.indexOf(sampleRate);
|
|
let channelConfig = numberOfChannels;
|
|
|
|
let configBits = '';
|
|
configBits += objectType.toString(2).padStart(5, '0');
|
|
|
|
configBits += frequencyIndex.toString(2).padStart(4, '0');
|
|
if (frequencyIndex === 15) configBits += sampleRate.toString(2).padStart(24, '0');
|
|
|
|
configBits += channelConfig.toString(2).padStart(4, '0');
|
|
|
|
// Pad with 0 bits to fit into a multiple of bytes
|
|
let paddingLength = Math.ceil(configBits.length / 8) * 8;
|
|
configBits = configBits.padEnd(paddingLength, '0');
|
|
|
|
let configBytes = new Uint8Array(configBits.length / 8);
|
|
for (let i = 0; i < configBits.length; i += 8) {
|
|
configBytes[i / 8] = parseInt(configBits.slice(i, i + 8), 2);
|
|
}
|
|
|
|
return configBytes;
|
|
}
|
|
|
|
addVideoChunk(
|
|
sample: EncodedVideoChunk,
|
|
meta?: EncodedVideoChunkMetadata,
|
|
timestamp?: number,
|
|
compositionTimeOffset?: number
|
|
) {
|
|
let data = new Uint8Array(sample.byteLength);
|
|
sample.copyTo(data);
|
|
|
|
this.addVideoChunkRaw(
|
|
data, sample.type, timestamp ?? sample.timestamp, sample.duration, meta, compositionTimeOffset
|
|
);
|
|
}
|
|
|
|
addVideoChunkRaw(
|
|
data: Uint8Array,
|
|
type: 'key' | 'delta',
|
|
timestamp: number,
|
|
duration: number,
|
|
meta?: EncodedVideoChunkMetadata,
|
|
compositionTimeOffset?: number
|
|
) {
|
|
this.#ensureNotFinalized();
|
|
if (!this.#options.video) throw new Error('No video track declared.');
|
|
|
|
if (
|
|
typeof this.#options.fastStart === 'object' &&
|
|
this.#videoTrack.samples.length === this.#options.fastStart.expectedVideoChunks
|
|
) {
|
|
throw new Error(`Cannot add more video chunks than specified in 'fastStart' (${
|
|
this.#options.fastStart.expectedVideoChunks
|
|
}).`);
|
|
}
|
|
|
|
let videoSample = this.#createSampleForTrack(
|
|
this.#videoTrack, data, type, timestamp, duration, meta, compositionTimeOffset
|
|
);
|
|
|
|
// Check if we need to interleave the samples in the case of a fragmented file
|
|
if (this.#options.fastStart === 'fragmented' && this.#audioTrack) {
|
|
// Add all audio samples with a timestamp smaller than the incoming video sample
|
|
while (
|
|
this.#audioSampleQueue.length > 0 &&
|
|
this.#audioSampleQueue[0].decodeTimestamp <= videoSample.decodeTimestamp
|
|
) {
|
|
let audioSample = this.#audioSampleQueue.shift();
|
|
this.#addSampleToTrack(this.#audioTrack, audioSample);
|
|
}
|
|
|
|
// Depending on the last audio sample, either add the video sample to the file or enqueue it
|
|
if (videoSample.decodeTimestamp <= this.#audioTrack.lastDecodeTimestamp) {
|
|
this.#addSampleToTrack(this.#videoTrack, videoSample);
|
|
} else {
|
|
this.#videoSampleQueue.push(videoSample);
|
|
}
|
|
} else {
|
|
this.#addSampleToTrack(this.#videoTrack, videoSample);
|
|
}
|
|
}
|
|
|
|
addAudioChunk(sample: EncodedAudioChunk, meta?: EncodedAudioChunkMetadata, timestamp?: number) {
|
|
let data = new Uint8Array(sample.byteLength);
|
|
sample.copyTo(data);
|
|
|
|
this.addAudioChunkRaw(data, sample.type, timestamp ?? sample.timestamp, sample.duration, meta);
|
|
}
|
|
|
|
addAudioChunkRaw(
|
|
data: Uint8Array,
|
|
type: 'key' | 'delta',
|
|
timestamp: number,
|
|
duration: number,
|
|
meta?: EncodedAudioChunkMetadata
|
|
) {
|
|
this.#ensureNotFinalized();
|
|
if (!this.#options.audio) throw new Error('No audio track declared.');
|
|
|
|
if (
|
|
typeof this.#options.fastStart === 'object' &&
|
|
this.#audioTrack.samples.length === this.#options.fastStart.expectedAudioChunks
|
|
) {
|
|
throw new Error(`Cannot add more audio chunks than specified in 'fastStart' (${
|
|
this.#options.fastStart.expectedAudioChunks
|
|
}).`);
|
|
}
|
|
|
|
let audioSample = this.#createSampleForTrack(this.#audioTrack, data, type, timestamp, duration, meta);
|
|
|
|
// Check if we need to interleave the samples in the case of a fragmented file
|
|
if (this.#options.fastStart === 'fragmented' && this.#videoTrack) {
|
|
// Add all video samples with a timestamp smaller than the incoming audio sample
|
|
while (
|
|
this.#videoSampleQueue.length > 0 &&
|
|
this.#videoSampleQueue[0].decodeTimestamp <= audioSample.decodeTimestamp
|
|
) {
|
|
let videoSample = this.#videoSampleQueue.shift();
|
|
this.#addSampleToTrack(this.#videoTrack, videoSample);
|
|
}
|
|
|
|
// Depending on the last video sample, either add the audio sample to the file or enqueue it
|
|
if (audioSample.decodeTimestamp <= this.#videoTrack.lastDecodeTimestamp) {
|
|
this.#addSampleToTrack(this.#audioTrack, audioSample);
|
|
} else {
|
|
this.#audioSampleQueue.push(audioSample);
|
|
}
|
|
} else {
|
|
this.#addSampleToTrack(this.#audioTrack, audioSample);
|
|
}
|
|
}
|
|
|
|
#createSampleForTrack(
|
|
track: Track,
|
|
data: Uint8Array,
|
|
type: 'key' | 'delta',
|
|
timestamp: number,
|
|
duration: number,
|
|
meta?: EncodedVideoChunkMetadata | EncodedAudioChunkMetadata,
|
|
compositionTimeOffset?: number
|
|
) {
|
|
let presentationTimestampInSeconds = timestamp / 1e6;
|
|
let decodeTimestampInSeconds = (timestamp - (compositionTimeOffset ?? 0)) / 1e6;
|
|
let durationInSeconds = duration / 1e6;
|
|
|
|
let adjusted = this.#validateTimestamp(presentationTimestampInSeconds, decodeTimestampInSeconds, track);
|
|
presentationTimestampInSeconds = adjusted.presentationTimestamp;
|
|
decodeTimestampInSeconds = adjusted.decodeTimestamp;
|
|
|
|
if (meta?.decoderConfig) {
|
|
if (track.info.decoderConfig === null) {
|
|
track.info.decoderConfig = meta.decoderConfig;
|
|
} else {
|
|
Object.assign(track.info.decoderConfig, meta.decoderConfig);
|
|
}
|
|
}
|
|
|
|
let sample: Sample = {
|
|
presentationTimestamp: presentationTimestampInSeconds,
|
|
decodeTimestamp: decodeTimestampInSeconds,
|
|
duration: durationInSeconds,
|
|
data: data,
|
|
size: data.byteLength,
|
|
type: type,
|
|
// Will be refined once the next sample comes in
|
|
timescaleUnitsToNextSample: intoTimescale(durationInSeconds, track.timescale)
|
|
};
|
|
|
|
return sample;
|
|
}
|
|
|
|
#addSampleToTrack(
|
|
track: Track,
|
|
sample: Sample
|
|
) {
|
|
if (this.#options.fastStart !== 'fragmented') {
|
|
track.samples.push(sample);
|
|
}
|
|
|
|
const sampleCompositionTimeOffset =
|
|
intoTimescale(sample.presentationTimestamp - sample.decodeTimestamp, track.timescale);
|
|
|
|
if (track.lastTimescaleUnits !== null) {
|
|
let timescaleUnits = intoTimescale(sample.decodeTimestamp, track.timescale, false);
|
|
let delta = Math.round(timescaleUnits - track.lastTimescaleUnits);
|
|
track.lastTimescaleUnits += delta;
|
|
track.lastSample.timescaleUnitsToNextSample = delta;
|
|
|
|
if (this.#options.fastStart !== 'fragmented') {
|
|
let lastTableEntry = last(track.timeToSampleTable);
|
|
if (lastTableEntry.sampleCount === 1) {
|
|
// If we hit this case, we're the second sample
|
|
lastTableEntry.sampleDelta = delta;
|
|
lastTableEntry.sampleCount++;
|
|
} else if (lastTableEntry.sampleDelta === delta) {
|
|
// Simply increment the count
|
|
lastTableEntry.sampleCount++;
|
|
} else {
|
|
// The delta has changed, subtract one from the previous run and create a new run with the new delta
|
|
lastTableEntry.sampleCount--;
|
|
track.timeToSampleTable.push({
|
|
sampleCount: 2,
|
|
sampleDelta: delta
|
|
});
|
|
}
|
|
|
|
const lastCompositionTimeOffsetTableEntry = last(track.compositionTimeOffsetTable);
|
|
if (lastCompositionTimeOffsetTableEntry.sampleCompositionTimeOffset === sampleCompositionTimeOffset) {
|
|
// Simply increment the count
|
|
lastCompositionTimeOffsetTableEntry.sampleCount++;
|
|
} else {
|
|
// The composition time offset has changed, so create a new entry with the new composition time
|
|
// offset
|
|
track.compositionTimeOffsetTable.push({
|
|
sampleCount: 1,
|
|
sampleCompositionTimeOffset: sampleCompositionTimeOffset
|
|
});
|
|
}
|
|
}
|
|
} else {
|
|
track.lastTimescaleUnits = 0;
|
|
|
|
if (this.#options.fastStart !== 'fragmented') {
|
|
track.timeToSampleTable.push({
|
|
sampleCount: 1,
|
|
sampleDelta: intoTimescale(sample.duration, track.timescale)
|
|
});
|
|
track.compositionTimeOffsetTable.push({
|
|
sampleCount: 1,
|
|
sampleCompositionTimeOffset: sampleCompositionTimeOffset
|
|
});
|
|
}
|
|
}
|
|
|
|
track.lastSample = sample;
|
|
|
|
let beginNewChunk = false;
|
|
if (!track.currentChunk) {
|
|
beginNewChunk = true;
|
|
} else {
|
|
let currentChunkDuration = sample.presentationTimestamp - track.currentChunk.startTimestamp;
|
|
|
|
if (this.#options.fastStart === 'fragmented') {
|
|
let mostImportantTrack = this.#videoTrack ?? this.#audioTrack;
|
|
if (track === mostImportantTrack && sample.type === 'key' && currentChunkDuration >= 1.0) {
|
|
beginNewChunk = true;
|
|
this.#finalizeFragment();
|
|
}
|
|
} else {
|
|
beginNewChunk = currentChunkDuration >= 0.5; // Chunk is long enough, we need a new one
|
|
}
|
|
}
|
|
|
|
if (beginNewChunk) {
|
|
if (track.currentChunk) {
|
|
this.#finalizeCurrentChunk(track);
|
|
}
|
|
|
|
track.currentChunk = {
|
|
startTimestamp: sample.presentationTimestamp,
|
|
samples: []
|
|
};
|
|
}
|
|
|
|
track.currentChunk.samples.push(sample);
|
|
}
|
|
|
|
#validateTimestamp(presentationTimestamp: number, decodeTimestamp: number, track: Track) {
|
|
// Check first timestamp behavior
|
|
const strictTimestampBehavior = this.#options.firstTimestampBehavior === 'strict';
|
|
const noLastDecodeTimestamp = track.lastDecodeTimestamp === -1;
|
|
const timestampNonZero = decodeTimestamp !== 0;
|
|
if (strictTimestampBehavior && noLastDecodeTimestamp && timestampNonZero) {
|
|
throw new Error(
|
|
`The first chunk for your media track must have a timestamp of 0 (received DTS=${decodeTimestamp}).` +
|
|
`Non-zero first timestamps are often caused by directly piping frames or audio data from a ` +
|
|
`MediaStreamTrack into the encoder. Their timestamps are typically relative to the age of the` +
|
|
`document, which is probably what you want.\n\nIf you want to offset all timestamps of a track such ` +
|
|
`that the first one is zero, set firstTimestampBehavior: 'offset' in the options.\n`
|
|
);
|
|
} else if (
|
|
this.#options.firstTimestampBehavior === 'offset' ||
|
|
this.#options.firstTimestampBehavior === 'cross-track-offset'
|
|
) {
|
|
if (track.firstDecodeTimestamp === undefined) {
|
|
track.firstDecodeTimestamp = decodeTimestamp;
|
|
}
|
|
|
|
let baseDecodeTimestamp: number;
|
|
if (this.#options.firstTimestampBehavior === 'offset') {
|
|
baseDecodeTimestamp = track.firstDecodeTimestamp;
|
|
} else {
|
|
// Since each track may have its firstDecodeTimestamp set independently, but the tracks' timestamps come
|
|
// from the same clock, we should subtract the earlier of the (up to) two tracks' first timestamps to
|
|
// ensure A/V sync.
|
|
baseDecodeTimestamp = Math.min(
|
|
this.#videoTrack?.firstDecodeTimestamp ?? Infinity,
|
|
this.#audioTrack?.firstDecodeTimestamp ?? Infinity
|
|
);
|
|
}
|
|
|
|
decodeTimestamp -= baseDecodeTimestamp;
|
|
presentationTimestamp -= baseDecodeTimestamp;
|
|
}
|
|
|
|
if (decodeTimestamp < track.lastDecodeTimestamp) {
|
|
throw new Error(
|
|
`Timestamps must be monotonically increasing ` +
|
|
`(DTS went from ${track.lastDecodeTimestamp * 1e6} to ${decodeTimestamp * 1e6}).`
|
|
);
|
|
}
|
|
|
|
track.lastDecodeTimestamp = decodeTimestamp;
|
|
|
|
return { presentationTimestamp, decodeTimestamp };
|
|
}
|
|
|
|
#finalizeCurrentChunk(track: Track) {
|
|
if (this.#options.fastStart === 'fragmented') {
|
|
throw new Error("Can't finalize individual chunks 'fastStart' is set to 'fragmented'.");
|
|
}
|
|
|
|
if (!track.currentChunk) return;
|
|
|
|
track.finalizedChunks.push(track.currentChunk);
|
|
this.#finalizedChunks.push(track.currentChunk);
|
|
|
|
if (
|
|
track.compactlyCodedChunkTable.length === 0
|
|
|| last(track.compactlyCodedChunkTable).samplesPerChunk !== track.currentChunk.samples.length
|
|
) {
|
|
track.compactlyCodedChunkTable.push({
|
|
firstChunk: track.finalizedChunks.length, // 1-indexed
|
|
samplesPerChunk: track.currentChunk.samples.length
|
|
});
|
|
}
|
|
|
|
if (this.#options.fastStart === 'in-memory') {
|
|
track.currentChunk.offset = 0; // We'll compute the proper offset when finalizing
|
|
return;
|
|
}
|
|
|
|
// Write out the data
|
|
track.currentChunk.offset = this.#writer.pos;
|
|
for (let sample of track.currentChunk.samples) {
|
|
this.#writer.write(sample.data);
|
|
sample.data = null; // Can be GC'd
|
|
}
|
|
|
|
this.#maybeFlushStreamingTargetWriter();
|
|
}
|
|
|
|
#finalizeFragment(flushStreamingWriter = true) {
|
|
if (this.#options.fastStart !== 'fragmented') {
|
|
throw new Error("Can't finalize a fragment unless 'fastStart' is set to 'fragmented'.");
|
|
}
|
|
|
|
let tracks = [this.#videoTrack, this.#audioTrack].filter((track) => track && track.currentChunk);
|
|
if (tracks.length === 0) return;
|
|
|
|
let fragmentNumber = this.#nextFragmentNumber++;
|
|
|
|
if (fragmentNumber === 1) {
|
|
// Write the moov box now that we have all decoder configs
|
|
let movieBox = moov(tracks, this.#creationTime, true);
|
|
this.#writer.writeBox(movieBox);
|
|
}
|
|
|
|
// Write out an initial moof box; will be overwritten later once actual chunk offsets are known
|
|
let moofOffset = this.#writer.pos;
|
|
let moofBox = moof(fragmentNumber, tracks);
|
|
this.#writer.writeBox(moofBox);
|
|
|
|
// Create the mdat box
|
|
{
|
|
let mdatBox = mdat(false); // Initially assume no fragment is larger than 4 GiB
|
|
let totalTrackSampleSize = 0;
|
|
|
|
// Compute the size of the mdat box
|
|
for (let track of tracks) {
|
|
for (let sample of track.currentChunk.samples) {
|
|
totalTrackSampleSize += sample.size;
|
|
}
|
|
}
|
|
|
|
let mdatSize = this.#writer.measureBox(mdatBox) + totalTrackSampleSize;
|
|
if (mdatSize >= 2**32) {
|
|
// Fragment is larger than 4 GiB, we need to use the large size
|
|
mdatBox.largeSize = true;
|
|
mdatSize = this.#writer.measureBox(mdatBox) + totalTrackSampleSize;
|
|
}
|
|
|
|
mdatBox.size = mdatSize;
|
|
this.#writer.writeBox(mdatBox);
|
|
}
|
|
|
|
// Write sample data
|
|
for (let track of tracks) {
|
|
track.currentChunk.offset = this.#writer.pos;
|
|
track.currentChunk.moofOffset = moofOffset;
|
|
|
|
for (let sample of track.currentChunk.samples) {
|
|
this.#writer.write(sample.data);
|
|
sample.data = null; // Can be GC'd
|
|
}
|
|
}
|
|
|
|
// Now that we set the actual chunk offsets, fix the moof box
|
|
let endPos = this.#writer.pos;
|
|
this.#writer.seek(this.#writer.offsets.get(moofBox));
|
|
let newMoofBox = moof(fragmentNumber, tracks);
|
|
this.#writer.writeBox(newMoofBox);
|
|
this.#writer.seek(endPos);
|
|
|
|
for (let track of tracks) {
|
|
track.finalizedChunks.push(track.currentChunk);
|
|
this.#finalizedChunks.push(track.currentChunk);
|
|
track.currentChunk = null;
|
|
}
|
|
|
|
if (flushStreamingWriter) {
|
|
this.#maybeFlushStreamingTargetWriter();
|
|
}
|
|
}
|
|
|
|
#maybeFlushStreamingTargetWriter() {
|
|
if (this.#writer instanceof StreamTargetWriter) {
|
|
this.#writer.flush();
|
|
}
|
|
}
|
|
|
|
#ensureNotFinalized() {
|
|
if (this.#finalized) {
|
|
throw new Error('Cannot add new video or audio chunks after the file has been finalized.');
|
|
}
|
|
}
|
|
|
|
/** Finalizes the file, making it ready for use. Must be called after all video and audio chunks have been added. */
|
|
finalize() {
|
|
if (this.#finalized) {
|
|
throw new Error('Cannot finalize a muxer more than once.');
|
|
}
|
|
|
|
if (this.#options.fastStart === 'fragmented') {
|
|
for (let videoSample of this.#videoSampleQueue) this.#addSampleToTrack(this.#videoTrack, videoSample);
|
|
for (let audioSample of this.#audioSampleQueue) this.#addSampleToTrack(this.#audioTrack, audioSample);
|
|
|
|
this.#finalizeFragment(false); // Don't flush the last fragment as we will flush it with the mfra box soon
|
|
} else {
|
|
if (this.#videoTrack) this.#finalizeCurrentChunk(this.#videoTrack);
|
|
if (this.#audioTrack) this.#finalizeCurrentChunk(this.#audioTrack);
|
|
}
|
|
|
|
let tracks = [this.#videoTrack, this.#audioTrack].filter(Boolean);
|
|
|
|
if (this.#options.fastStart === 'in-memory') {
|
|
let mdatSize: number;
|
|
|
|
// We know how many chunks there are, but computing the chunk positions requires an iterative approach:
|
|
// In order to know where the first chunk should go, we first need to know the size of the moov box. But we
|
|
// cannot write a proper moov box without first knowing all chunk positions. So, we generate a tentative
|
|
// moov box with placeholder values (0) for the chunk offsets to be able to compute its size. If it then
|
|
// turns out that appending all chunks exceeds 4 GiB, we need to repeat this process, now with the co64 box
|
|
// being used in the moov box instead, which will make it larger. After that, we definitely know the final
|
|
// size of the moov box and can compute the proper chunk positions.
|
|
|
|
for (let i = 0; i < 2; i++) {
|
|
let movieBox = moov(tracks, this.#creationTime);
|
|
let movieBoxSize = this.#writer.measureBox(movieBox);
|
|
mdatSize = this.#writer.measureBox(this.#mdat);
|
|
let currentChunkPos = this.#writer.pos + movieBoxSize + mdatSize;
|
|
|
|
for (let chunk of this.#finalizedChunks) {
|
|
chunk.offset = currentChunkPos;
|
|
for (let { data } of chunk.samples) {
|
|
currentChunkPos += data.byteLength;
|
|
mdatSize += data.byteLength;
|
|
}
|
|
}
|
|
|
|
if (currentChunkPos < 2**32) break;
|
|
if (mdatSize >= 2**32) this.#mdat.largeSize = true;
|
|
}
|
|
|
|
let movieBox = moov(tracks, this.#creationTime);
|
|
this.#writer.writeBox(movieBox);
|
|
|
|
this.#mdat.size = mdatSize;
|
|
this.#writer.writeBox(this.#mdat);
|
|
|
|
for (let chunk of this.#finalizedChunks) {
|
|
for (let sample of chunk.samples) {
|
|
this.#writer.write(sample.data);
|
|
sample.data = null;
|
|
}
|
|
}
|
|
} else if (this.#options.fastStart === 'fragmented') {
|
|
// Append the mfra box to the end of the file for better random access
|
|
let startPos = this.#writer.pos;
|
|
let mfraBox = mfra(tracks);
|
|
this.#writer.writeBox(mfraBox);
|
|
|
|
// Patch the 'size' field of the mfro box at the end of the mfra box now that we know its actual size
|
|
let mfraBoxSize = this.#writer.pos - startPos;
|
|
this.#writer.seek(this.#writer.pos - 4);
|
|
this.#writer.writeU32(mfraBoxSize);
|
|
} else {
|
|
let mdatPos = this.#writer.offsets.get(this.#mdat);
|
|
let mdatSize = this.#writer.pos - mdatPos;
|
|
this.#mdat.size = mdatSize;
|
|
this.#mdat.largeSize = mdatSize >= 2**32; // Only use the large size if we need it
|
|
this.#writer.patchBox(this.#mdat);
|
|
|
|
let movieBox = moov(tracks, this.#creationTime);
|
|
|
|
if (typeof this.#options.fastStart === 'object') {
|
|
this.#writer.seek(this.#ftypSize);
|
|
this.#writer.writeBox(movieBox);
|
|
|
|
let remainingBytes = mdatPos - this.#writer.pos;
|
|
this.#writer.writeBox(free(remainingBytes));
|
|
} else {
|
|
this.#writer.writeBox(movieBox);
|
|
}
|
|
}
|
|
|
|
this.#maybeFlushStreamingTargetWriter();
|
|
this.#writer.finalize();
|
|
|
|
this.#finalized = true;
|
|
}
|
|
}
|