https://www.ffmpeg.org/ffmpeg.html
The transcoding process in ffmpeg
for each output can be described by the following diagram:
_______ ______________
| | | |
| input | demuxer | encoded data | decoder
| file | ---------> | packets | -----+
|_______| |______________| |
av_read_frame AVPacket v
---------> _________
| |
| decoded |AVFrame
| frames |
|_________|
________ ______________ |
| | | | |fliter
| output | <-------- | encoded data | <----+
| file | muxer | packets | encoder
|________| |______________|
AVPacket
ffmpeg
calls the libavformat library (containing demuxers) to read input files and get packets containing encoded data from them. When there are multiple input files, ffmpeg
tries to keep them synchronized by tracking lowest timestamp on any active input stream.
Encoded packets are then passed to the decoder (unless streamcopy is selected for the stream, see further for a description). The decoder produces uncompressed frames (raw video/PCM audio/…) which can be processed further by filtering (see next section). After filtering, the frames are passed to the encoder, which encodes them and outputs encoded packets. Finally those are passed to the muxer, which writes the encoded packets to the output file.
Before encoding, ffmpeg
can process raw audio and video frames using filters from the libavfilter library. Several chained filters form a filter graph. ffmpeg
distinguishes between two types of filtergraphs: simple and complex.
Simple filtergraphs are those that have exactly one input and output, both of the same type. In the above diagram they can be represented by simply inserting an additional step between decoding and encoding:
_________ ______________
| | | |
| decoded | | encoded data |
| frames |\ _ | packets |
|_________| \ /||______________|
\ __________ /
simple _\|| | / encoder
filtergraph | filtered |/
| frames |
|__________|
Complex filtergraphs are those which cannot be described as simply a linear processing chain applied to one stream. This is the case, for example, when the graph has more than one input and/or output, or when output stream type is different from input. They can be represented with the following diagram:
_________
| |
| input 0 |\ __________
|_________| \ | |
\ _________ /| output 0 |
\ | | / |__________|
_________ \| complex | /
| | | |/
| input 1 |---->| filter |\
|_________| | | \ __________
/| graph | \ | |
/ | | \| output 1 |
_________ / |_________| |__________|
| | /
| input 2 |/
|_________|
graph LR
AVFormatContext-->AVCodecContext-->AVFilterContext
static AVFormatContext *ifmt_ctx;//DEMUX,比如把输入的input.mp4拆分为audio和video的ES数据
static AVFormatContext *ofmt_ctx;//MUX,比如把编码好的audio/video ES数据打包成output.ts
typedef struct FilteringContext {
AVFilterContext *buffersink_ctx;
AVFilterContext *buffersrc_ctx;
AVFilterGraph *filter_graph;
AVPacket *enc_pkt;
AVFrame *filtered_frame;
} FilteringContext;
static FilteringContext *filter_ctx;
typedef struct StreamContext {
AVCodecContext *dec_ctx;
AVCodecContext *enc_ctx;
AVFrame *dec_frame;
} StreamContext;
static StreamContext *stream_ctx;
int main(int argc, char **argv)
{
int ret;
AVPacket *packet = NULL;
unsigned int stream_index;
unsigned int i;
if (argc != 3) {
av_log(NULL, AV_LOG_ERROR, "Usage: %s <input file> <output file>\n", argv[0]);
return 1;
}
if ((ret = open_input_file(argv[1])) < 0)
goto end;
if ((ret = open_output_file(argv[2])) < 0)
goto end;
if ((ret = init_filters()) < 0)
goto end;
if (!(packet = av_packet_alloc()))
goto end;
/* read all packets */
while (1) {
if ((ret = av_read_frame(ifmt_ctx, packet)) < 0)
break;
stream_index = packet->stream_index;
av_log(NULL, AV_LOG_DEBUG, "Demuxer gave frame of stream_index %u\n",
stream_index);
if (filter_ctx[stream_index].filter_graph) {
StreamContext *stream = &stream_ctx[stream_index];
av_log(NULL, AV_LOG_DEBUG, "Going to reencode&filter the frame\n");
av_packet_rescale_ts(packet,
ifmt_ctx->streams[stream_index]->time_base,
stream->dec_ctx->time_base);
ret = avcodec_send_packet(stream->dec_ctx, packet);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Decoding failed\n");
break;
}
while (ret >= 0) {
ret = avcodec_receive_frame(stream->dec_ctx, stream->dec_frame);
if (ret == AVERROR_EOF || ret == AVERROR(EAGAIN))
break;
else if (ret < 0)
goto end;
stream->dec_frame->pts = stream->dec_frame->best_effort_timestamp;
ret = filter_encode_write_frame(stream->dec_frame, stream_index);
if (ret < 0)
goto end;
}
} else {
/* remux this frame without reencoding */
av_packet_rescale_ts(packet,
ifmt_ctx->streams[stream_index]->time_base,
ofmt_ctx->streams[stream_index]->time_base);
ret = av_interleaved_write_frame(ofmt_ctx, packet);
if (ret < 0)
goto end;
}
av_packet_unref(packet);
}
/* flush filters and encoders */
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
/* flush filter */
if (!filter_ctx[i].filter_graph)
continue;
ret = filter_encode_write_frame(NULL, i);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Flushing filter failed\n");
goto end;
}
/* flush encoder */
ret = flush_encoder(i);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Flushing encoder failed\n");
goto end;
}
}
av_write_trailer(ofmt_ctx);
end:
av_packet_free(&packet);
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
avcodec_free_context(&stream_ctx[i].dec_ctx);
if (ofmt_ctx && ofmt_ctx->nb_streams > i && ofmt_ctx->streams[i] && stream_ctx[i].enc_ctx)
avcodec_free_context(&stream_ctx[i].enc_ctx);
if (filter_ctx && filter_ctx[i].filter_graph) {
avfilter_graph_free(&filter_ctx[i].filter_graph);
av_packet_free(&filter_ctx[i].enc_pkt);
av_frame_free(&filter_ctx[i].filtered_frame);
}
av_frame_free(&stream_ctx[i].dec_frame);
}
av_free(filter_ctx);
av_free(stream_ctx);
avformat_close_input(&ifmt_ctx);
if (ofmt_ctx && !(ofmt_ctx->oformat->flags & AVFMT_NOFILE))
avio_closep(&ofmt_ctx->pb);
avformat_free_context(ofmt_ctx);
if (ret < 0)
av_log(NULL, AV_LOG_ERROR, "Error occurred: %s\n", av_err2str(ret));
return ret ? 1 : 0;
}
http://lazybing.github.io/blog/2017/01/01/ffmpeg-sdk-learning/ –FFMpeg 解封装实现
//打开并解析要转码的源文件(如input.mp4),并分配相应的decoder
//比如,源文件是input.mp4(H264+AAC),那么就有2个stream,分配2个decoder,H264 decoder & AAC decoder
static int open_input_file(const char *filename)
{
int ret;
unsigned int i;
ifmt_ctx = NULL;
//open input file, and allocate format context
if ((ret = avformat_open_input(&ifmt_ctx, filename, NULL, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open input file\n");
return ret;
}
//retrive stream information
if ((ret = avformat_find_stream_info(ifmt_ctx, NULL)) < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot find stream information\n");
return ret;
}
stream_ctx = av_mallocz_array(ifmt_ctx->nb_streams, sizeof(*stream_ctx));
if (!stream_ctx)
return AVERROR(ENOMEM);
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
AVStream *stream = ifmt_ctx->streams[i];
//Find the AVCodec Depending on the CODEC_ID
const AVCodec *dec = avcodec_find_decoder(stream->codecpar->codec_id);
AVCodecContext *codec_ctx;
if (!dec) {
av_log(NULL, AV_LOG_ERROR, "Failed to find decoder for stream #%u\n", i);
return AVERROR_DECODER_NOT_FOUND;
}
//Allocate the AVCodecContext
codec_ctx = avcodec_alloc_context3(dec);
if (!codec_ctx) {
av_log(NULL, AV_LOG_ERROR, "Failed to allocate the decoder context for stream #%u\n", i);
return AVERROR(ENOMEM);
}
ret = avcodec_parameters_to_context(codec_ctx, stream->codecpar);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Failed to copy decoder parameters to input decoder context "
"for stream #%u\n", i);
return ret;
}
/* Reencode video & audio and remux subtitles etc. */
if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO
|| codec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
if (codec_ctx->codec_type == AVMEDIA_TYPE_VIDEO)
codec_ctx->framerate = av_guess_frame_rate(ifmt_ctx, stream, NULL);
/* Open decoder */
ret = avcodec_open2(codec_ctx, dec, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Failed to open decoder for stream #%u\n", i);
return ret;
}
}
stream_ctx[i].dec_ctx = codec_ctx;
//Allocate AVFrame to Store the Decode Data
stream_ctx[i].dec_frame = av_frame_alloc();
if (!stream_ctx[i].dec_frame)
return AVERROR(ENOMEM);
}
//输出format日志信息
av_dump_format(ifmt_ctx, 0, filename, 0);
return 0;
}
AVFormatContext
结构对于给定的需要 AV 分离的输入文件,使用avformat_open_input
打开输入文件,并分配AVFormatContext
结构
//ps:指向由用户提供的AVFormatContext结构体,该结构体通过avformat_alloc_context分配,如果它是一个 NULL,该结构在此函数内分配并负值给 ps。
//filename:指向需要打开的流的名称。
//fmt:如果是 non-NULL,该参数指定输入的文件格式,否则输入文件的格式自动根据文件本身自动获取。
//options:此处可以为 NULL。
//返回值:成功返回0,否则返回 AVERROR。
int avformat_open_input(AVFormatContext **ps, const char *filename,
const AVInputFormat *fmt, AVDictionary **options)
{
int avformat_find_stream_info(AVFormatContext *ic, AVDictionary **options)
{
//该函数参数为AVCodecID指定了请求的解码器,成功返回解码器,否则返回 NULL。
const AVCodec *avcodec_find_decoder(enum AVCodecID id)
{
return find_codec(id, av_codec_is_decoder);
}
int av_codec_is_decoder(const AVCodec *codec)
{
return codec && (codec->decode || codec->receive_frame);
}
static const AVCodec *find_codec(enum AVCodecID id, int (*x)(const AVCodec *))
{
const AVCodec *p, *experimental = NULL;
void *i = 0;
id = remap_deprecated_codec_id(id);
while ((p = av_codec_iterate(&i))) {
if (!x(p))
continue;
if (p->id == id) {
if (p->capabilities & AV_CODEC_CAP_EXPERIMENTAL && !experimental) {
experimental = p;
} else
return p;
}
}
return experimental;
}
const AVCodec *av_codec_iterate(void **opaque)
{
uintptr_t i = (uintptr_t)*opaque;
const AVCodec *c = codec_list[i];
ff_thread_once(&av_codec_static_init, av_codec_init_static);
if (c)
*opaque = (void*)(i + 1);
return c;
}
//Open the Decoder
//该函数的主要作用是根据给定的AVCodec初始化AVCodecContext,在使用该函数之前,待初始化的AVCodecContext结构需要先使用avcodec_alloc_context3分配好。其中的参数 AVCodec可以通过avcodec_find_decoder_by_nameavcodec_find_encoder_by_nameavcodec_find_decoder或avcodec_find_endcoder来获取。在进行真正的解码之前,必须调用该函数。
//avctx:即将初始化的AVCodecContext结构体。
//codec:打开的解码器,如果它是non-NULL codec,并在之前传递给了avcodec_alloc_context3或avcodec_get_context_defaults3,该参数必须为 NULL 或之前传递的 CODEC。
//Options:此处我们设置为 NULL。
//返回值:成功返回0,出错返回一个负值。
int attribute_align_arg avcodec_open2(AVCodecContext *avctx, const AVCodec *codec, AVDictionary **options)
{
–FFMpeg 封装实现
//打开输出文件,分配相应的encoder
static int open_output_file(const char *filename)
{
AVStream *out_stream;
AVStream *in_stream;
AVCodecContext *dec_ctx, *enc_ctx;
const AVCodec *encoder;
int ret;
unsigned int i;
ofmt_ctx = NULL;
avformat_alloc_output_context2(&ofmt_ctx, NULL, NULL, filename);
if (!ofmt_ctx) {
av_log(NULL, AV_LOG_ERROR, "Could not create output context\n");
return AVERROR_UNKNOWN;
}
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
out_stream = avformat_new_stream(ofmt_ctx, NULL);
if (!out_stream) {
av_log(NULL, AV_LOG_ERROR, "Failed allocating output stream\n");
return AVERROR_UNKNOWN;
}
in_stream = ifmt_ctx->streams[i];
dec_ctx = stream_ctx[i].dec_ctx;
if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO
|| dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
/* in this example, we choose transcoding to same codec */
encoder = avcodec_find_encoder(dec_ctx->codec_id);
if (!encoder) {
av_log(NULL, AV_LOG_FATAL, "Necessary encoder not found\n");
return AVERROR_INVALIDDATA;
}
enc_ctx = avcodec_alloc_context3(encoder);
if (!enc_ctx) {
av_log(NULL, AV_LOG_FATAL, "Failed to allocate the encoder context\n");
return AVERROR(ENOMEM);
}
/* In this example, we transcode to same properties (picture size,
* sample rate etc.). These properties can be changed for output
* streams easily using filters */
if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) {
enc_ctx->height = dec_ctx->height;
enc_ctx->width = dec_ctx->width;
enc_ctx->sample_aspect_ratio = dec_ctx->sample_aspect_ratio;
/* take first format from list of supported formats */
if (encoder->pix_fmts)
enc_ctx->pix_fmt = encoder->pix_fmts[0];
else
enc_ctx->pix_fmt = dec_ctx->pix_fmt;
/* video time_base can be set to whatever is handy and supported by encoder */
enc_ctx->time_base = av_inv_q(dec_ctx->framerate);
} else {
enc_ctx->sample_rate = dec_ctx->sample_rate;
enc_ctx->channel_layout = dec_ctx->channel_layout;
enc_ctx->channels = av_get_channel_layout_nb_channels(enc_ctx->channel_layout);
/* take first format from list of supported formats */
enc_ctx->sample_fmt = encoder->sample_fmts[0];
enc_ctx->time_base = (AVRational){1, enc_ctx->sample_rate};
}
if (ofmt_ctx->oformat->flags & AVFMT_GLOBALHEADER)
enc_ctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
/* Third parameter can be used to pass settings to encoder */
ret = avcodec_open2(enc_ctx, encoder, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot open video encoder for stream #%u\n", i);
return ret;
}
ret = avcodec_parameters_from_context(out_stream->codecpar, enc_ctx);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Failed to copy encoder parameters to output stream #%u\n", i);
return ret;
}
out_stream->time_base = enc_ctx->time_base;
stream_ctx[i].enc_ctx = enc_ctx;
}
}
av_dump_format(ofmt_ctx, 0, filename, 1);
if (!(ofmt_ctx->oformat->flags & AVFMT_NOFILE)) {
ret = avio_open(&ofmt_ctx->pb, filename, AVIO_FLAG_WRITE);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Could not open output file '%s'", filename);
return ret;
}
}
/* init muxer, write output file header */
ret = avformat_write_header(ofmt_ctx, NULL);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Error occurred when opening output file\n");
return ret;
}
return 0;
该函数完成向 AVFormatContext 结构体中所代表的媒体文件中添加数据流
//s:AVFormatContext 结构,表示要封装生成的视频文件。
//c:视频或音频流的编码器的指针。
//返回值:指向生成的 stream 对象的指针;失败则返回 NULL
AVStream *avformat_new_stream(AVFormatContext *s, const AVCodec *c)
{
const AVCodec *avcodec_find_encoder(enum AVCodecID id)
{
return find_codec(id, av_codec_is_encoder);
}
int avio_open(AVIOContext **s, const char *filename, int flags)
{
return avio_open2(s, filename, flags, NULL, NULL);
}
int avformat_write_header(AVFormatContext *s, AVDictionary **options)
{
static int init_filters(void)
{
const char *filter_spec;
unsigned int i;
int ret;
filter_ctx = av_malloc_array(ifmt_ctx->nb_streams, sizeof(*filter_ctx));
if (!filter_ctx)
return AVERROR(ENOMEM);
for (i = 0; i < ifmt_ctx->nb_streams; i++) {
filter_ctx[i].buffersrc_ctx = NULL;
filter_ctx[i].buffersink_ctx = NULL;
filter_ctx[i].filter_graph = NULL;
if (!(ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_AUDIO
|| ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO))
continue;
if (ifmt_ctx->streams[i]->codecpar->codec_type == AVMEDIA_TYPE_VIDEO)
filter_spec = "null"; /* passthrough (dummy) filter for video */
else
filter_spec = "anull"; /* passthrough (dummy) filter for audio */
ret = init_filter(&filter_ctx[i], stream_ctx[i].dec_ctx,
stream_ctx[i].enc_ctx, filter_spec);
if (ret)
return ret;
filter_ctx[i].enc_pkt = av_packet_alloc();
if (!filter_ctx[i].enc_pkt)
return AVERROR(ENOMEM);
filter_ctx[i].filtered_frame = av_frame_alloc();
if (!filter_ctx[i].filtered_frame)
return AVERROR(ENOMEM);
}
return 0;
}
static int init_filter(FilteringContext* fctx, AVCodecContext *dec_ctx,
AVCodecContext *enc_ctx, const char *filter_spec)
{
char args[512];
int ret = 0;
const AVFilter *buffersrc = NULL;
const AVFilter *buffersink = NULL;
AVFilterContext *buffersrc_ctx = NULL;
AVFilterContext *buffersink_ctx = NULL;
AVFilterInOut *outputs = avfilter_inout_alloc();
AVFilterInOut *inputs = avfilter_inout_alloc();
AVFilterGraph *filter_graph = avfilter_graph_alloc();
if (!outputs || !inputs || !filter_graph) {
ret = AVERROR(ENOMEM);
goto end;
}
if (dec_ctx->codec_type == AVMEDIA_TYPE_VIDEO) {
buffersrc = avfilter_get_by_name("buffer");
buffersink = avfilter_get_by_name("buffersink");
if (!buffersrc || !buffersink) {
av_log(NULL, AV_LOG_ERROR, "filtering source or sink element not found\n");
ret = AVERROR_UNKNOWN;
goto end;
}
ret = avfilter_graph_create_filter(&buffersrc_ctx, buffersrc, "in",
args, NULL, filter_graph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer source\n");
goto end;
}
ret = avfilter_graph_create_filter(&buffersink_ctx, buffersink, "out",
NULL, NULL, filter_graph);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot create buffer sink\n");
goto end;
}
ret = av_opt_set_bin(buffersink_ctx, "pix_fmts",
(uint8_t*)&enc_ctx->pix_fmt, sizeof(enc_ctx->pix_fmt),
AV_OPT_SEARCH_CHILDREN);
if (ret < 0) {
av_log(NULL, AV_LOG_ERROR, "Cannot set output pixel format\n");
goto end;
}
} else if (dec_ctx->codec_type == AVMEDIA_TYPE_AUDIO) {
......
}
......
if ((ret = avfilter_graph_parse_ptr(filter_graph, filter_spec,
&inputs, &outputs, NULL)) < 0)
goto end;
if ((ret = avfilter_graph_config(filter_graph, NULL)) < 0)
goto end;
/* Fill FilteringContext */
fctx->buffersrc_ctx = buffersrc_ctx;
fctx->buffersink_ctx = buffersink_ctx;
fctx->filter_graph = filter_graph;
end:
avfilter_inout_free(&inputs);
avfilter_inout_free(&outputs);
return ret;
/**
* Return the next frame of a stream.
* This function returns what is stored in the file, and does not validate
* that what is there are valid frames for the decoder. It will split what is
* stored in the file into frames and return one for each call. It will not
* omit invalid data between valid frames so as to give the decoder the maximum
* information possible for decoding.
*/
int av_read_frame(AVFormatContext *s, AVPacket *pkt)
{
//Supply raw packet data as input to a decoder.
int avcodec_send_packet(AVCodecContext *avctx, const AVPacket *avpkt);
//Return decoded output data from a decoder.
int avcodec_receive_frame(AVCodecContext *avctx, AVFrame *frame);
/* push the decoded frame into the filtergraph */
//Add a frame to the buffer source.
int av_buffersrc_add_frame_flags(AVFilterContext *buffer_src,
AVFrame *frame, int flags);
/* pull filtered frames from the filtergraph */
// Get a frame with filtered data from sink and put it in frame.
int av_buffersink_get_frame(AVFilterContext *ctx, AVFrame *frame);
/** Supply a raw video or audio frame to the encoder. Use avcodec_receive_packet()
* to retrieve buffered output packets. */
int avcodec_send_frame(AVCodecContext *avctx, const AVFrame *frame);
//Read encoded data from the encoder.
int avcodec_receive_packet(AVCodecContext *avctx, AVPacket *avpkt);
/* mux encoded frame */
//Write a packet to an output media file ensuring correct interleaving.
int av_interleaved_write_frame(AVFormatContext *s, AVPacket *pkt);
/**
* Write the stream trailer to an output media file and free the
* file private data.
*/
int av_write_trailer(AVFormatContext *s);