输出mp4文件基本信息

如何使用mp4v2将H264+AAC裸流录制成mp4文件

添加adts头结构

MP4GetAudioProfileLevel()    //获取mp4音频配置级别
MP4GetTrackAudioChannels()  //获取mp4音频声道数
Structure{
AAAAAAAA AAAABCCD EEFFFFGH HHIJKLMM MMMMMMMM MMMOOOOO OOOOOOPP (QQQQQQQQ QQQQQQQQ)
Header consists of 7 or 9 bytes (without or with CRC).
}

Letter Length (bits) Description
A     12     syncword 0xFFF, all bits must be 1
B     1       MPEG Version: 0 for MPEG-4, 1 for MPEG-2
C     2       Layer: always 0
D     1       protection absent, Warning, set to 1 if there is no CRC and 0 if there is CRC
E     2       profile, the MPEG-4 Audio Object Type minus 1
F     4       MPEG-4 Sampling Frequency Index (15 is forbidden)
G     1       private stream, set to 0 when encoding, ignore when decoding
H     3       MPEG-4 Channel Configuration (in the case of 0, the channel configuration is sent via an inband PCE)
I      1       originality, set to 0 when encoding, ignore when decoding
J      1       home, set to 0 when encoding, ignore when decoding
K     1       copyrighted stream, set to 0 when encoding, ignore when decoding
L     1        copyright start, set to 0 when encoding, ignore when decoding
M   13      frame length, this value must include 7 or 9 bytes of header length: FrameLength =

(ProtectionAbsent == 1 ? 7 : 9) + size(AACFrame)
O    11       Buffer fullness
P     2        Number of AAC frames (RDBs) in ADTS frame minus 1, for maximum compatibility always use 1 AAC frame per ADTS frame
Q     16     CRC if protection absent is 0


Usage in MPEG-TS
ADTS packet must be a content of PES packet. Pack AAC data inside ADTS frame, than pack inside PES packet, then mux by TS packetizer.

Usage in Shoutcast
ADTS frames goes one by one in TCP stream. Look for syncword, parse header and look for next syncword after.
/**
     *  Add ADTS header at the beginning of each and every AAC packet.
     *  This is needed as MediaCodec encoder generates a packet of raw
     *  AAC data.
     *
     *  Note the packetLen must count in the ADTS header itself !!! .
     *注意,这里的packetLen参数为raw aac Packet Len + 7; 7 bytes adts header
     **/
    private void addADTStoPacket(byte[] packet, int packetLen) {
        int profile = 2;  //AAC LC,MediaCodecInfo.CodecProfileLevel.AACObjectLC;
        int freqIdx = 5;  //32K, 见后面注释avpriv_mpeg4audio_sample_rates中32000对应的数组下标,来自ffmpeg源码
        int chanCfg = 2;  //见后面注释channel_configuration,Stero双声道立体声

        /*int avpriv_mpeg4audio_sample_rates[] = {
            96000, 88200, 64000, 48000, 44100, 32000,
                    24000, 22050, 16000, 12000, 11025, 8000, 7350
        };
        channel_configuration: 表示声道数chanCfg
        0: Defined in AOT Specifc Config
        1: 1 channel: front-center
        2: 2 channels: front-left, front-right
        3: 3 channels: front-center, front-left, front-right
        4: 4 channels: front-center, front-left, front-right, back-center
        5: 5 channels: front-center, front-left, front-right, back-left, back-right
        6: 6 channels: front-center, front-left, front-right, back-left, back-right, LFE-channel
        7: 8 channels: front-center, front-left, front-right, side-left, side-right, back-left, back-right, LFE-channel
        8-15: Reserved
        */

        // fill in ADTS data
        packet[0] = (byte)0xFF;
        packet[1] = (byte)0xF9;
        packet[2] = (byte)(((profile-1)<<6) + (freqIdx<<2) +(chanCfg>>2));
        packet[3] = (byte)(((chanCfg&3)<<6) + (packetLen>>11));
        packet[4] = (byte)((packetLen&0x7FF) >> 3);
        packet[5] = (byte)(((packetLen&7)<<5) + 0x1F);
        packet[6] = (byte)0xFC;
    }

H264与AAC打包成MP4



Audio Specific Config

The Audio Specific Config is the global header for MPEG-4 Audio:
5 bits: object type
if (object type == 31) 6 bits + 32: object type
4 bits: frequency index
if (frequency index == 15)  24 bits: frequency //this case is forbidden in aac , don't care
4 bits: channel configuration
var bits: AOT Specific Config
object type != 31:profile(5bit)-sampleRate(4bit)-channelCount(4bit)-空(3bit)
object type == 31:profile(6bit)-sampleRate(4bit)-channelCount(4bit)-空(2bit)

int mp4_get_audio_specific_config(uint8_t *buf,uint8_t &freq_id,uint8_t &profile,uint8_t &channel_num)
  {
    if(profile != 31)
    {
      buf[0]= (profile << 3) | (freq_id >> 1) ;
      buf[1]= ((freq_id & 0x1) << 7) | (channel_num << 3);
    }
    else
    {
      buf[0]= ((profile+32) << 2) | (freq_id >> 2) ;
      buf[1]= ((freq_id & 0x3) << 6) | (channel_num << 2);
    }
    return 0;
  }



demuxer参照


mp4v2实现remuxer

#include <mp4v2/mp4v2.h>
#include <mp4v2/track.h>
#include <string.h>
#include <iostream>
#include <cassert>
#define see(x) std::cerr << (#x) << " = "<< x <<std::endl
int Remuxer(const char* file_name,const char* dst_file_name)
{
    char *p=(char*)file_name;
    MP4FileHandle input_mp4=MP4Read(file_name);
    MP4FileHandle output_mp4=MP4Create(dst_file_name);
    int track_num=MP4GetNumberOfTracks(input_mp4);
    uint32_t mp4_time_scale=MP4GetTimeScale(input_mp4);
    MP4SetTimeScale(output_mp4,mp4_time_scale);
    // see(track_num),see(mp4_time_scale);
    for(int i=1;i<=track_num;i++)
    {
        uint32_t track_time_scale=MP4GetTrackTimeScale(input_mp4,i);
        MP4Duration track_dur=MP4GetTrackDuration(input_mp4,i);
        const char *track_type=MP4GetTrackType(input_mp4,i);
        int sample_num=MP4GetTrackNumberOfSamples(input_mp4,i);
        uint32_t track_bit_rate=MP4GetTrackBitRate(input_mp4,i);
        //see(i),see(track_time_scale),see(track_dur),see(sample_num),see(track_type),see(track_bit_rate);
        if(MP4_IS_VIDEO_TRACK_TYPE(track_type))
        {
            double video_track_frame_rate=MP4GetTrackVideoFrameRate(input_mp4,i);
            uint8_t video_profile_level=MP4GetVideoProfileLevel(input_mp4,i);
            uint16_t video_track_h=MP4GetTrackVideoHeight(input_mp4,i);
            uint16_t video_track_w=MP4GetTrackVideoWidth(input_mp4,i);
            uint32_t plen;
            uint8_t **pSeqHeaders=NULL,**pPictHeaders=NULL;
            uint32_t *pSeqHeaderSize=NULL,*pPictHeaderSize=NULL;
            MP4GetTrackH264LengthSize(input_mp4,i,&plen);
            MP4GetTrackH264SeqPictHeaders(input_mp4,i,&pSeqHeaders,&pSeqHeaderSize,&pPictHeaders,&pPictHeaderSize);
            // see(video_track_frame_rate),see(video_track_h),see(video_track_w);
            // for(int j=0;pSeqHeaders[j];j++)
            // {
            //     see(j);
            //     for(uint32_t k=0;k<pSeqHeaderSize[j];k++)
            //         printf("%02x ",pSeqHeaders[j][k]);
            //     see(pSeqHeaderSize[j]);
            // }
            // for(int j=0;pPictHeaders[j];j++)
            // {
            //     see(j);
            //     for(uint32_t k=0;k<pPictHeaderSize[j];k++)
            //         printf("%02x ",pPictHeaders[j][k]);
            //     see(pPictHeaderSize[j]);
            // }
            // see(plen);
            MP4TrackId video_track_id=MP4AddH264VideoTrack( output_mp4,
                                                            track_time_scale,
                                                            MP4GetSampleDuration(input_mp4,i,1),
                                                            video_track_w,
                                                            video_track_h,
                                                            pSeqHeaders[0][0],
                                                            pSeqHeaders[0][1],
                                                            pSeqHeaders[0][2],
                                                            plen-1
                                                            );
            MP4SetTrackTimeScale(output_mp4,video_track_id,track_time_scale);
            for(int j=0;pPictHeaders[j];j++)
                MP4AddH264PictureParameterSet(output_mp4,video_track_id,pPictHeaders[j],pPictHeaderSize[j]);
            
            for(int j=0;pSeqHeaders[j];j++)
                MP4AddH264SequenceParameterSet(output_mp4,video_track_id,pSeqHeaders[j],pSeqHeaderSize[j]);
            
            MP4SetVideoProfileLevel(output_mp4,video_profile_level);
            
            uint8_t *pBytes;
            uint32_t byte_num;
            MP4Duration sample_dur;
            MP4Duration sample_render_offset;
            int8_t IsSync;

            for(int j=1;j<=sample_num;j++)
            {
                pBytes=NULL;
                byte_num=0;
                sample_dur=MP4GetSampleDuration(input_mp4,i,j);
                sample_render_offset=MP4GetSampleRenderingOffset(input_mp4,i,j);
                IsSync=MP4GetSampleSync(input_mp4,i,j);
                
                MP4ReadSample(input_mp4,i,j,&pBytes,&byte_num);
                
                MP4WriteSample(output_mp4,video_track_id,pBytes,byte_num,sample_dur,sample_render_offset,IsSync);
            
                free(pBytes);
                pBytes=NULL;
            }

            pSeqHeaders=pPictHeaders=NULL;
            pSeqHeaderSize=pPictHeaderSize=NULL;
        }
        else if(MP4_IS_AUDIO_TRACK_TYPE(track_type))
        {
            
            int channel_num=MP4GetTrackAudioChannels(input_mp4,i);
            uint8_t audio_profile_level=MP4GetAudioProfileLevel(input_mp4);
            uint8_t *ppConfig=NULL;
            uint32_t pConfigsize=0;
            uint8_t audio_track_type=MP4GetTrackEsdsObjectTypeId(input_mp4,i);

            MP4GetTrackESConfiguration(input_mp4,i,&ppConfig,&pConfigsize);
            // for(int j=0;j<pConfigsize;j++)printf("%02x ",ppConfig[j]);
            // see(pConfigsize);
            // see(channel_num);
            MP4TrackId audio_track_id = MP4AddAudioTrack(output_mp4,track_time_scale,track_dur,audio_track_type);
            MP4SetTrackTimeScale(output_mp4,audio_track_id,track_time_scale);
            MP4SetTrackESConfiguration(output_mp4,audio_track_id,ppConfig,pConfigsize);
            MP4SetAudioProfileLevel(output_mp4,audio_profile_level);
            for(int j=1;j<=sample_num;j++)
            {
                uint8_t *pBytes=NULL;
                uint32_t byte_num=0;
                MP4Duration sample_dur=MP4GetSampleDuration(input_mp4,i,j);
                MP4Duration sample_render_offset=MP4GetSampleRenderingOffset(input_mp4,i,j);
                int8_t IsSync=MP4GetSampleSync(input_mp4,i,j);
                MP4ReadSample(input_mp4,i,j,&pBytes,&byte_num);

                MP4WriteSample(output_mp4,audio_track_id,pBytes,byte_num,sample_dur,sample_render_offset,IsSync);
            
                free(pBytes);
                pBytes=NULL;
            }
            ppConfig=NULL;
        }
    }
    MP4Close(input_mp4),MP4Close(output_mp4);
    return 0;
}

int main()
{
    Remuxer("2.mp4","3.mp4");

    return 0;
}

mp4v2和ffmpeg处理mp4时获取到的数据对比

adts格式相同,均为Audio Specific Config
sps,pps格式不同,ffmpeg有start code,mp4v2直接得到裸的数据
读到的音视频帧二进制相同