Merge branch 'master' of https://github.com/ZLMediaKit/ZLMediaKit into feature/transcode2

# Conflicts:
#	conf/config.ini
#	src/Codec/Transcode.cpp
#	src/Common/MediaSource.h
#	src/Common/MultiMediaSourceMuxer.cpp
#	src/Common/MultiMediaSourceMuxer.h
#	src/Common/macros.h
#	webrtc/WebRtcPusher.cpp
#	webrtc/WebRtcTransport.cpp
#	webrtc/WebRtcTransport.h
This commit is contained in:
cqm
2026-04-03 09:35:50 +08:00
283 changed files with 42056 additions and 13083 deletions

View File

@@ -413,6 +413,12 @@ Track::Ptr getTrackBySdp(const SdpTrack::Ptr &track) {
// If aac config information cannot be obtained from sdp, then it cannot be obtained from rtp either, so ignore this Track
return nullptr;
}
while (aac_cfg_str.size() < 4) {
aac_cfg_str = '0' + aac_cfg_str;
}
if (aac_cfg_str.size() > 4) {
aac_cfg_str = aac_cfg_str.substr(0, 4);
}
string aac_cfg;
for (size_t i = 0; i < aac_cfg_str.size() / 2; ++i) {
unsigned int cfg;

95
ext-codec/AV1.cpp Normal file
View File

@@ -0,0 +1,95 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#include "AV1.h"
#include "AV1Rtp.h"
#include "VpxRtmp.h"
#include "Extension/Factory.h"
using namespace std;
using namespace toolkit;
namespace mediakit {
bool AV1Track::inputFrame(const Frame::Ptr &frame) {
char *dataPtr = frame->data() + frame->prefixSize();
if (0 == aom_av1_codec_configuration_record_init(&_context, dataPtr, frame->size() - frame->prefixSize())) {
_width = _context.width;
_height = _context.height;
//InfoL << _width << "x" << _height;
}
return VideoTrackImp::inputFrame(frame);
}
Track::Ptr AV1Track::clone() const {
return std::make_shared<AV1Track>(*this);
}
Buffer::Ptr AV1Track::getExtraData() const {
if (_context.bytes <= 0)
return nullptr;
auto ret = BufferRaw::create(4 + _context.bytes);
ret->setSize(aom_av1_codec_configuration_record_save(&_context, (uint8_t *)ret->data(), ret->getCapacity()));
return ret;
}
void AV1Track::setExtraData(const uint8_t *data, size_t size) {
if (aom_av1_codec_configuration_record_load(data, size, &_context) > 0) {
_width = _context.width;
_height = _context.height;
}
}
namespace {
CodecId getCodec() {
return CodecAV1;
}
Track::Ptr getTrackByCodecId(int sample_rate, int channels, int sample_bit) {
return std::make_shared<AV1Track>();
}
Track::Ptr getTrackBySdp(const SdpTrack::Ptr &track) {
return std::make_shared<AV1Track>();
}
RtpCodec::Ptr getRtpEncoderByCodecId(uint8_t pt) {
return std::make_shared<AV1RtpEncoder>();
}
RtpCodec::Ptr getRtpDecoderByCodecId() {
return std::make_shared<AV1RtpDecoder>();
}
RtmpCodec::Ptr getRtmpEncoderByTrack(const Track::Ptr &track) {
return std::make_shared<VpxRtmpEncoder>(track);
}
RtmpCodec::Ptr getRtmpDecoderByTrack(const Track::Ptr &track) {
return std::make_shared<VpxRtmpDecoder>(track);
}
Frame::Ptr getFrameFromPtr(const char *data, size_t bytes, uint64_t dts, uint64_t pts) {
return std::make_shared<AV1FrameNoCacheAble>((char *)data, bytes, dts, pts, 0);
}
} // namespace
CodecPlugin av1_plugin = { getCodec,
getTrackByCodecId,
getTrackBySdp,
getRtpEncoderByCodecId,
getRtpDecoderByCodecId,
getRtmpEncoderByTrack,
getRtmpDecoderByTrack,
getFrameFromPtr };
} // namespace mediakit

65
ext-codec/AV1.h Normal file
View File

@@ -0,0 +1,65 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#ifndef ZLMEDIAKIT_AV1_H
#define ZLMEDIAKIT_AV1_H
#include "Extension/Frame.h"
#include "Extension/Track.h"
#include "aom-av1.h"
namespace mediakit {
template <typename Parent>
class AV1FrameHelper : public Parent {
public:
friend class FrameImp;
//friend class toolkit::ResourcePool_l<Av1FrameHelper>;
using Ptr = std::shared_ptr<AV1FrameHelper>;
template <typename... ARGS>
AV1FrameHelper(ARGS &&...args)
: Parent(std::forward<ARGS>(args)...) {
this->_codec_id = CodecAV1;
}
bool keyFrame() const override {
auto ptr = (uint8_t *) this->data() + this->prefixSize();
return (*ptr & 0x78) >> 3 == 1;
}
bool configFrame() const override { return false; }
bool dropAble() const override { return false; }
bool decodeAble() const override { return true; }
};
/// Av1 帧类
using AV1Frame = AV1FrameHelper<FrameImp>;
using AV1FrameNoCacheAble = AV1FrameHelper<FrameFromPtr>;
/**
* AV1视频通道
*/
class AV1Track : public VideoTrackImp {
public:
using Ptr = std::shared_ptr<AV1Track>;
AV1Track() : VideoTrackImp(CodecAV1) {}
Track::Ptr clone() const override;
bool inputFrame(const Frame::Ptr &frame) override;
toolkit::Buffer::Ptr getExtraData() const override;
void setExtraData(const uint8_t *data, size_t size) override;
protected:
aom_av1_t _context {};
};
} // namespace mediakit
#endif

582
ext-codec/AV1Rtp.cpp Normal file
View File

@@ -0,0 +1,582 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#include "AV1.h"
#include "AV1Rtp.h"
#include <algorithm>
#include <cstring>
#include <vector>
#include <sstream>
#include <iomanip>
using namespace std;
using namespace toolkit;
namespace mediakit {
// AV1 OBU类型定义
static constexpr int kObuTypeSequenceHeader = 1;
static constexpr int kObuTypeTemporalDelimiter = 2;
static constexpr int kObuTypeTileList = 8;
static constexpr int kObuTypePadding = 15;
// RTP聚合头中的位定义
static constexpr uint8_t kObuSizePresentBit = 0b00000010;
static constexpr int kAggregationHeaderSize = 1;
static constexpr int kMaxNumObusToOmitSize = 3;
// LEB128编码/解码辅助函数
static size_t writeLeb128(uint64_t value, uint8_t* buffer) {
size_t size = 0;
do {
uint8_t byte = value & 0x7F;
value >>= 7;
if (value != 0) {
byte |= 0x80;
}
buffer[size++] = byte;
} while (value != 0);
return size;
}
static size_t leb128Size(uint64_t value) {
size_t size = 0;
do {
value >>= 7;
++size;
} while (value != 0);
return size;
}
static bool readLeb128(const uint8_t*& data, size_t& remaining, uint64_t& value) {
value = 0;
size_t shift = 0;
while (remaining > 0 && shift < 56) {
uint8_t byte = *data++;
remaining--;
value |= (uint64_t(byte & 0x7F) << shift);
shift += 7;
if ((byte & 0x80) == 0) {
return true;
}
}
// 兼容性处理如果到达数据末尾但最后一个字节的MSB仍为1
// 假设这是leb128编码的结尾
if (remaining == 0 && shift > 0) {
WarnL << "Tolerating non-standard LEB128 encoding (missing termination bit)";
return true;
}
return false;
}
// OBU辅助函数
static bool obuHasExtension(uint8_t obu_header) {
return obu_header & 0b00000100;
}
static bool obuHasSize(uint8_t obu_header) {
return obu_header & kObuSizePresentBit;
}
static int obuType(uint8_t obu_header) {
return (obu_header & 0b01111000) >> 3;
}
static int maxFragmentSize(int remaining_bytes) {
if (remaining_bytes <= 1) {
return 0;
}
for (int i = 1; ; ++i) {
if (remaining_bytes < (1 << (7 * i)) + i) {
return remaining_bytes - i;
}
}
}
//////////////////////////////////////////////////////////////////////////
// AV1RtpEncoder 实现
//////////////////////////////////////////////////////////////////////////
AV1RtpEncoder::AV1RtpEncoder() {
}
std::vector<AV1RtpEncoder::ObuInfo> AV1RtpEncoder::parseObus(const uint8_t* data, size_t size) {
std::vector<ObuInfo> result;
const uint8_t* ptr = data;
size_t remaining = size;
while (remaining > 0) {
if (remaining < 1) {
WarnL << "Malformed AV1 input: expected OBU header";
return {};
}
ObuInfo obu{};
obu.header = *ptr++;
remaining--;
obu.has_extension = obuHasExtension(obu.header);
obu.has_size_field = obuHasSize(obu.header);
if (obu.has_extension) {
if (remaining < 1) {
WarnL << "Malformed AV1 input: expected extension header";
return {};
}
obu.extension_header = *ptr++;
remaining--;
}
uint64_t payload_size = 0;
if (obu.has_size_field) {
if (!readLeb128(ptr, remaining, payload_size)) {
WarnL << "Malformed AV1 input: failed to read OBU size";
return {};
}
if (payload_size > remaining) {
WarnL << "Malformed AV1 input: OBU size exceeds remaining data";
return {};
}
} else {
payload_size = remaining;
}
obu.payload_data = ptr;
obu.payload_size = payload_size;
ptr += payload_size;
remaining -= payload_size;
int type = obuType(obu.header);
if (type != kObuTypeTemporalDelimiter &&
type != kObuTypeTileList &&
type != kObuTypePadding) {
result.push_back(obu);
}
}
return result;
}
uint8_t AV1RtpEncoder::makeAggregationHeader(bool first_obu_is_fragment,
bool last_obu_is_fragment,
int num_obu_elements,
bool starts_new_coded_video_sequence) {
uint8_t header = 0;
// Z bit: first OBU element is continuation of previous OBU
if (first_obu_is_fragment) {
header |= 0x80;
}
// Y bit: last OBU element will be continued in next packet
if (last_obu_is_fragment) {
header |= 0x40;
}
// W field: number of OBU elements (when <= 3)
if (num_obu_elements <= kMaxNumObusToOmitSize) {
header |= (num_obu_elements << 4);
}
// N bit: beginning of new coded video sequence
if (starts_new_coded_video_sequence) {
header |= 0x08;
}
return header;
}
void AV1RtpEncoder::outputRtp(const uint8_t* data, size_t len, bool mark,
uint64_t stamp, uint8_t aggregation_header) {
auto rtp = getRtpInfo().makeRtp(TrackVideo, nullptr, len + kAggregationHeaderSize, mark, stamp);
auto payload = rtp->data() + RtpPacket::kRtpTcpHeaderSize + RtpPacket::kRtpHeaderSize;
// 写入聚合头
payload[0] = aggregation_header;
// 复制数据
if (len > 0) {
memcpy(payload + kAggregationHeaderSize, data, len);
}
RtpCodec::inputRtp(std::move(rtp), false);
}
bool AV1RtpEncoder::inputFrame(const Frame::Ptr &frame) {
auto ptr = frame->data() + frame->prefixSize();
auto size = frame->size() - frame->prefixSize();
if (size == 0) {
return false;
}
// 解析OBU
auto obus = parseObus((const uint8_t*)ptr, size);
if (obus.empty()) {
return false;
}
// 检查是否包含序列头(关键帧标志)
bool has_sequence_header = false;
for (const auto& obu : obus) {
int type = obuType(obu.header);
if (type == kObuTypeSequenceHeader) {
has_sequence_header = true;
_got_key_frame = true;
break;
}
}
// 如果还没有收到过关键帧,且当前帧不是关键帧,则丢弃
if (!_got_key_frame && !has_sequence_header) {
DebugL << "Dropping AV1 frame before first keyframe";
return false;
}
size_t max_payload_size = getRtpInfo().getMaxSize() - kAggregationHeaderSize;
if (max_payload_size == 0) {
WarnL << "Invalid RTP max payload size for AV1";
return false;
}
for (size_t i = 0; i < obus.size(); ++i) {
const auto& obu = obus[i];
bool is_first_obu = (i == 0);
bool is_last_obu = (i == obus.size() - 1);
if (!sendObu(obu, is_first_obu, is_last_obu,
has_sequence_header && is_first_obu, frame->pts(), max_payload_size)) {
return false;
}
}
return true;
}
bool AV1RtpEncoder::sendObu(const ObuInfo& obu,
bool is_first_obu,
bool is_last_obu,
bool starts_new_sequence,
uint64_t stamp,
size_t max_payload_size) {
std::vector<uint8_t> obu_bytes;
obu_bytes.reserve(1 + (obu.has_extension ? 1 : 0) + obu.payload_size);
obu_bytes.push_back(obu.header & ~kObuSizePresentBit);
if (obu.has_extension) {
obu_bytes.push_back(obu.extension_header);
}
if (obu.payload_size > 0) {
obu_bytes.insert(obu_bytes.end(), obu.payload_data, obu.payload_data + obu.payload_size);
}
size_t offset = 0;
bool first_fragment = true;
while (offset < obu_bytes.size()) {
size_t fragment_size = std::min<size_t>(max_payload_size, obu_bytes.size() - offset);
bool last_fragment = (offset + fragment_size) == obu_bytes.size();
uint8_t agg_header = makeAggregationHeader(
!first_fragment,
!last_fragment,
1,
first_fragment && starts_new_sequence
);
bool mark = last_fragment && is_last_obu;
outputRtp(obu_bytes.data() + offset, fragment_size, mark, stamp, agg_header);
offset += fragment_size;
first_fragment = false;
}
return true;
}
//////////////////////////////////////////////////////////////////////////
// AV1RtpDecoder 实现
//////////////////////////////////////////////////////////////////////////
AV1RtpDecoder::AV1RtpDecoder() {
obtainFrame();
}
void AV1RtpDecoder::obtainFrame() {
_frame = FrameImp::create<AV1Frame>();
}
AV1RtpDecoder::AggregationHeader AV1RtpDecoder::parseAggregationHeader(uint8_t header) {
AggregationHeader agg;
agg.first_obu_is_fragment = (header & 0x80) != 0;
agg.last_obu_is_fragment = (header & 0x40) != 0;
agg.num_obu_elements = (header & 0x30) >> 4;
agg.starts_new_coded_video_sequence = (header & 0x08) != 0;
return agg;
}
bool AV1RtpDecoder::inputRtp(const RtpPacket::Ptr &rtp, bool key_pos) {
auto payload_size = rtp->getPayloadSize();
if (payload_size < kAggregationHeaderSize) {
return false;
}
uint32_t ssrc = rtp->getSSRC();
if (!_has_last_ssrc || _last_ssrc != ssrc) {
resetState();
_last_ssrc = ssrc;
_has_last_ssrc = true;
}
auto stamp = rtp->getStampMS();
auto payload = rtp->getPayload();
auto seq = rtp->getSeq();
// 解析聚合头
auto agg_header = parseAggregationHeader(payload[0]);
const uint8_t* data = payload + kAggregationHeaderSize;
size_t remaining = payload_size - kAggregationHeaderSize;
// InfoL << "RTP seq=" << seq << ", Z=" << agg_header.first_obu_is_fragment
// << ", Y=" << agg_header.last_obu_is_fragment
// << ", W=" << agg_header.num_obu_elements
// << ", N=" << agg_header.starts_new_coded_video_sequence
// << ", payload_size=" << remaining;
// if (remaining > 0) {
// std::ostringstream hex_stream;
// for (size_t i = 0; i < std::min(remaining, size_t(16)); ++i) {
// hex_stream << std::hex << std::setw(2) << std::setfill('0') << (int)data[i] << " ";
// }
// InfoL << "RTP payload hex: " << hex_stream.str();
// }
// 如果开始新的编码视频序列,清理之前的状态
if (agg_header.starts_new_coded_video_sequence) {
InfoL << "Starting new coded video sequence";
resetState();
obtainFrame();
}
if (_has_last_seq) {
uint16_t expected = _last_seq + 1;
if (seq != expected && _assembling_fragment) {
WarnL << "RTP seq gap while assembling fragment, expected=" << expected
<< " got=" << seq << ", dropping incomplete OBU";
_fragment_buffer.clear();
_assembling_fragment = false;
}
}
_last_seq = seq;
_has_last_seq = true;
if (!processPayload(agg_header, data, remaining)) {
resetState();
obtainFrame();
return false;
}
bool marker = rtp->getHeader()->mark;
if (marker) {
if (_assembling_fragment) {
WarnL << "Marker bit set while awaiting fragment continuation";
_fragment_buffer.clear();
_assembling_fragment = false;
}
_last_dts = stamp;
if (!_received_keyframe) {
WarnL << "AV1 RTP packet before keyframe, dropping";
_frame->_buffer.clear();
obtainFrame();
return false;
}
flushFrame(stamp);
return true;
}
_last_dts = stamp;
return false;
}
bool AV1RtpDecoder::processPayload(const AggregationHeader& agg_header,
const uint8_t* data,
size_t remaining) {
size_t element_index = 0;
int expected_elements = agg_header.num_obu_elements;
while (remaining > 0) {
uint64_t element_size = 0;
bool has_size = (expected_elements == 0) || (static_cast<int>(element_index) < expected_elements - 1);
if (has_size) {
if (!readLeb128(data, remaining, element_size)) {
WarnL << "Failed to read OBU element size, trying fallback parsing";
// 兼容性回退如果leb128解析失败尝试直接使用剩余字节数
element_size = remaining;
} else if (element_size > remaining) {
WarnL << "OBU element size (" << element_size << ") exceeds remaining payload ("
<< remaining << "), using remaining size";
element_size = remaining;
}
} else {
element_size = remaining;
}
std::vector<uint8_t> element_bytes;
element_bytes.reserve(element_size);
if (element_size > 0) {
element_bytes.insert(element_bytes.end(), data, data + element_size);
data += element_size;
remaining -= element_size;
}
bool is_first = element_index == 0;
bool is_last = (remaining == 0);
if (is_first && agg_header.first_obu_is_fragment) {
if (_fragment_buffer.empty()) {
WarnL << "Unexpected fragment continuation in AV1 RTP packet";
return false;
}
_fragment_buffer.insert(_fragment_buffer.end(), element_bytes.begin(), element_bytes.end());
} else {
if (_assembling_fragment && !_fragment_buffer.empty()) {
WarnL << "Previous fragment never completed, discarding";
return false;
}
_fragment_buffer = std::move(element_bytes);
}
bool will_continue = is_last && agg_header.last_obu_is_fragment;
if (will_continue) {
_assembling_fragment = true;
} else {
if (!emitObu(_fragment_buffer.data(), _fragment_buffer.size())) {
return false;
}
_fragment_buffer.clear();
_assembling_fragment = false;
}
++element_index;
}
if (expected_elements > 0 && static_cast<int>(element_index) != expected_elements) {
WarnL << "Mismatch between W field (" << expected_elements
<< ") and parsed OBU elements (" << element_index
<< "), tolerating for compatibility";
// 不返回false继续处理以提高兼容性
}
return true;
}
bool AV1RtpDecoder::emitObu(const uint8_t* data, size_t size) {
if (size == 0) {
return true;
}
if (size < 1) {
WarnL << "Empty OBU fragment";
return false;
}
uint8_t obu_header = data[0];
size_t header_size = 1;
// 检查OBU头部是否已经包含size bit
bool already_has_size = obuHasSize(obu_header);
// 如果RTP包中的OBU已经包含size字段需要特殊处理
if (already_has_size) {
//WarnL << "RTP OBU contains size field";
// 跳过extension header处理
if (obuHasExtension(obu_header)) {
if (size < 2) {
WarnL << "OBU with extension flag but insufficient data";
return false;
}
header_size = 2;
}
// 读取原始的size字段
const uint8_t* ptr = data + header_size;
size_t remaining = size - header_size;
uint64_t original_size = 0;
if (!readLeb128(ptr, remaining, original_size)) {
WarnL << "Failed to read original OBU size field";
return false;
}
if (original_size != remaining) {
WarnL << "OBU size mismatch in RTP packet, original_size=" << original_size
<< " remaining=" << remaining;
}
// 直接拷贝完整的OBU包括已有的size字段
_frame->_buffer.append((char*)data, size);
} else {
// 标准情况RTP包中的OBU没有size字段需要我们添加
// 写入带size bit的OBU头部
_frame->_buffer.push_back(obu_header | kObuSizePresentBit);
if (obuHasExtension(obu_header)) {
if (size < 2) {
WarnL << "OBU with extension flag but insufficient data";
return false;
}
_frame->_buffer.push_back(data[1]);
header_size = 2;
}
if (size < header_size) {
WarnL << "Invalid OBU size";
return false;
}
// 计算payload大小并写入leb128编码的size字段
uint64_t payload_size = size - header_size;
uint8_t size_bytes[8];
size_t size_len = writeLeb128(payload_size, size_bytes);
_frame->_buffer.append((char*)size_bytes, size_len);
// 拷贝payload数据
if (payload_size > 0) {
_frame->_buffer.append((char*)data + header_size, payload_size);
}
}
if (obuType(obu_header) == kObuTypeSequenceHeader) {
_received_keyframe = true;
}
return true;
}
void AV1RtpDecoder::flushFrame(uint64_t stamp) {
if (_frame->_buffer.empty()) {
return;
}
_frame->_dts = stamp;
_frame->_pts = stamp;
RtpCodec::inputFrame(_frame);
obtainFrame();
}
void AV1RtpDecoder::resetState() {
_fragment_buffer.clear();
_assembling_fragment = false;
_has_last_seq = false;
_received_keyframe = false;
}
} // namespace mediakit

95
ext-codec/AV1Rtp.h Normal file
View File

@@ -0,0 +1,95 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#ifndef ZLMEDIAKIT_AV1RTP_H
#define ZLMEDIAKIT_AV1RTP_H
#include "Rtsp/RtpCodec.h"
#include "Extension/Frame.h"
#include "Extension/CommonRtp.h"
namespace mediakit {
/**
* AV1 RTP编码器
*/
class AV1RtpEncoder : public RtpCodec {
public:
using Ptr = std::shared_ptr<AV1RtpEncoder>;
AV1RtpEncoder();
~AV1RtpEncoder() override = default;
bool inputFrame(const Frame::Ptr &frame) override;
private:
// AV1 OBU信息
struct ObuInfo {
uint8_t header;
uint8_t extension_header;
const uint8_t* payload_data;
size_t payload_size;
bool has_extension;
bool has_size_field;
};
std::vector<ObuInfo> parseObus(const uint8_t* data, size_t size);
void outputRtp(const uint8_t* data, size_t len, bool mark, uint64_t stamp, uint8_t aggregation_header);
uint8_t makeAggregationHeader(bool first_obu_is_fragment, bool last_obu_is_fragment,
int num_obu_elements, bool starts_new_coded_video_sequence);
bool sendObu(const ObuInfo& obu, bool is_first_obu, bool is_last_obu,
bool starts_new_sequence, uint64_t stamp, size_t max_payload_size);
private:
bool _got_key_frame = false;
};
/**
* AV1 RTP解码器
*/
class AV1RtpDecoder : public RtpCodec {
public:
using Ptr = std::shared_ptr<AV1RtpDecoder>;
AV1RtpDecoder();
~AV1RtpDecoder() override = default;
bool inputRtp(const RtpPacket::Ptr &rtp, bool key_pos = false) override;
private:
struct AggregationHeader {
bool first_obu_is_fragment; // Z bit
bool last_obu_is_fragment; // Y bit
int num_obu_elements; // W field (0 = any number)
bool starts_new_coded_video_sequence; // N bit
};
AggregationHeader parseAggregationHeader(uint8_t header);
void obtainFrame();
bool emitObu(const uint8_t* data, size_t size);
bool processPayload(const AggregationHeader& agg_header, const uint8_t* data,
size_t remaining);
void flushFrame(uint64_t stamp);
void resetState();
private:
uint64_t _last_dts = 0;
FrameImp::Ptr _frame;
std::vector<uint8_t> _fragment_buffer;
bool _assembling_fragment = false;
bool _received_keyframe = false;
bool _has_last_seq = false;
uint16_t _last_seq = 0;
bool _has_last_ssrc = false;
uint32_t _last_ssrc = 0;
};
}//namespace mediakit
#endif //ZLMEDIAKIT_AV1RTP_H

View File

@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2016-2022 The ZLMediaKit project authors. All Rights Reserved.
# Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal

View File

@@ -13,18 +13,35 @@
#include "Extension/Factory.h"
#include "Extension/CommonRtp.h"
#include "Extension/CommonRtmp.h"
#include "riff-acm.h"
using namespace std;
using namespace toolkit;
namespace mediakit {
Track::Ptr G711Track::clone() const {
return std::make_shared<G711Track>(*this);
Buffer::Ptr G711Track::getExtraData() const {
struct wave_format_t wav {};
wav.wFormatTag = getCodecId() == CodecG711A ? WAVE_FORMAT_ALAW : WAVE_FORMAT_MULAW;
wav.nChannels = getAudioChannel();
wav.nSamplesPerSec = getAudioSampleRate();
wav.nAvgBytesPerSec = 8000;
wav.nBlockAlign = 1;
wav.wBitsPerSample = 8;
auto buff = BufferRaw::create(18 + wav.cbSize);
wave_format_save(&wav, (uint8_t*)buff->data(), buff->size());
return buff;
}
Sdp::Ptr G711Track::getSdp(uint8_t payload_type) const {
return std::make_shared<DefaultSdp>(payload_type, *this);
void G711Track::setExtraData(const uint8_t *data, size_t size) {
struct wave_format_t wav;
if (wave_format_load(data, size, &wav) > 0) {
// Successfully parsed Opus header
_sample_rate = wav.nSamplesPerSec;
_channels = wav.nChannels;
_codecid = (wav.wFormatTag == WAVE_FORMAT_ALAW) ? CodecG711A : CodecG711U;
} else {
WarnL << "Failed to parse G711 extra data";
}
}
namespace {

View File

@@ -18,19 +18,16 @@ namespace mediakit{
/**
* G711音频通道
* G711 audio channel
* [AUTO-TRANSLATED:57f8bc08]
*/
class G711Track : public AudioTrackImp{
public:
using Ptr = std::shared_ptr<G711Track>;
G711Track(CodecId codecId, int sample_rate = 8000, int channels = 1, int sample_bit = 16) : AudioTrackImp(codecId, sample_rate, channels, sample_bit) {}
toolkit::Buffer::Ptr getExtraData() const override;
void setExtraData(const uint8_t *data, size_t size) override;
private:
Sdp::Ptr getSdp(uint8_t payload_type) const override;
Track::Ptr clone() const override;
Track::Ptr clone() const override { return std::make_shared<G711Track>(*this); }
};
}//namespace mediakit

View File

@@ -38,7 +38,8 @@ bool G711RtpEncoder::inputFrame(const Frame::Ptr &frame) {
_buffer.append(ptr, size);
while (_buffer.size() >= _pkt_bytes) {
RtpCodec::inputRtp(getRtpInfo().makeRtp(TrackAudio, _buffer.data(), _pkt_bytes, false, in_pts), false);
auto tmp = (in_pts+_pkt_dur_ms-1)/_pkt_dur_ms*_pkt_dur_ms;
RtpCodec::inputRtp(getRtpInfo().makeRtp(TrackAudio, _buffer.data(), _pkt_bytes, false, tmp), false);
in_pts += _pkt_dur_ms;
_buffer.erase(0, _pkt_bytes);
}

View File

@@ -153,7 +153,6 @@ bool H264Track::ready() const {
bool H264Track::inputFrame(const Frame::Ptr &frame) {
using H264FrameInternal = FrameInternal<H264FrameNoCacheAble>;
int type = H264_TYPE(frame->data()[frame->prefixSize()]);
if ((type == H264Frame::NAL_B_P || type == H264Frame::NAL_IDR) && ready()) {
return inputFrame_l(frame);
}
@@ -263,6 +262,10 @@ Track::Ptr H264Track::clone() const {
bool H264Track::inputFrame_l(const Frame::Ptr &frame) {
int type = H264_TYPE(frame->data()[frame->prefixSize()]);
if (type == H264Frame::NAL_AUD) {
// AUD帧丢弃
return false;
}
bool ret = true;
switch (type) {
case H264Frame::NAL_SPS: {
@@ -388,7 +391,7 @@ Track::Ptr getTrackBySdp(const SdpTrack::Ptr &track) {
// If there is no sps/pps in the sdp, then it may be possible to recover the sps/pps in the subsequent rtp
return std::make_shared<H264Track>();
}
return std::make_shared<H264Track>(sps, pps, 0, 0);
return std::make_shared<H264Track>(sps, pps, prefixSize(sps.data(), sps.size()), prefixSize(pps.data(), pps.size()));
}
RtpCodec::Ptr getRtpEncoderByCodecId(uint8_t pt) {

View File

@@ -160,6 +160,7 @@ toolkit::Buffer::Ptr H265Track::getExtraData() const {
WarnL << "生成H265 extra_data 失败";
return nullptr;
}
extra_data.resize(extra_data_size);
return std::make_shared<BufferString>(std::move(extra_data));
#else
WarnL << "请开启MP4相关功能并使能\"ENABLE_MP4\",否则对H265的支持不完善";
@@ -215,6 +216,108 @@ void H265Track::insertConfigFrame(const Frame::Ptr &frame) {
}
}
class BitReader {
public:
BitReader(const uint8_t* data, size_t size) : _data(data), _size(size), _bitPos(0) {}
uint32_t readBits(int n) {
uint32_t result = 0;
for (int i = 0; i < n; i++) {
if (_bitPos >= _size * 8) throw std::runtime_error("Out of range");
int bytePos = _bitPos / 8;
int bitOffset = 7 - (_bitPos % 8);
result = (result << 1) | ((_data[bytePos] >> bitOffset) & 0x01);
_bitPos++;
}
return result;
}
void skipBits(int n) {
_bitPos += n;
if (_bitPos > _size * 8) throw std::runtime_error("Skip out of range");
}
private:
const uint8_t* _data;
size_t _size;
size_t _bitPos;
};
struct HevcProfileInfo {
int profile_id = -1; // profile-id
int level_id = -1; // level-id
int tier_flag = -1; // tier-flag
};
// 移除 00 00 03 防竞争字节
std::vector<uint8_t> removeEmulationPrevention(const uint8_t *data, size_t size) {
std::vector<uint8_t> out;
out.reserve(size);
for (size_t i = 0; i < size; i++) {
if (i + 2 < size && data[i] == 0x00 && data[i + 1] == 0x00 && data[i + 2] == 0x03) {
out.push_back(0x00);
out.push_back(0x00);
i += 2; // skip 0x00 0x00 0x03
} else {
out.push_back(data[i]);
}
}
return out;
}
// 从 VPS 或 SPS 里提取 profile/level/tier 信息
HevcProfileInfo parse_hevc_profile_tier_level(const uint8_t *nalu, size_t size) {
// 去掉起始码 (00 00 01 或 00 00 00 01)
size_t offset = 0;
if (size > 4 && nalu[0] == 0x00 && nalu[1] == 0x00) {
if (nalu[2] == 0x01)
offset = 3;
else if (nalu[2] == 0x00 && nalu[3] == 0x01)
offset = 4;
}
auto rbsp = removeEmulationPrevention(nalu + offset, size - offset);
BitReader br(rbsp.data(), rbsp.size());
// ---- NALU header ----
br.skipBits(1 + 6 + 6 + 3); // forbidden_zero_bit + nal_unit_type + nuh_layer_id + nuh_temporal_id_plus1
// VPS 和 SPS 都包含 profile_tier_level()
// 先解析最少需要的部分
// vps_video_parameter_set_id 或 sps_video_parameter_set_id (略过)
br.readBits(4);
// sps 里还有 sps_max_sub_layers_minus1
uint32_t max_sub_layers_minus1 = br.readBits(3);
// temporal_id_nesting_flag
br.readBits(1);
// ---- profile_tier_level ----
HevcProfileInfo info;
uint32_t profile_space = br.readBits(2); // general_profile_space
info.tier_flag = br.readBits(1); // general_tier_flag
info.profile_id = br.readBits(5); // general_profile_idc
// general_profile_compatibility_flag[32]
for (int i = 0; i < 32; i++)
br.readBits(1);
// general_progressive_source_flag 等 (跳过)
br.readBits(1); // progressive_source_flag
br.readBits(1); // interlaced_source_flag
br.readBits(1); // non_packed_constraint_flag
br.readBits(1); // frame_only_constraint_flag
// general_reserved_zero_44bits
br.skipBits(44);
// general_level_idc (8 bits)
info.level_id = br.readBits(8);
return info;
}
/////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
/**
@@ -247,7 +350,9 @@ public:
_printer << "b=AS:" << bitrate << "\r\n";
}
_printer << "a=rtpmap:" << payload_type << " " << getCodecName(CodecH265) << "/" << 90000 << "\r\n";
_printer << "a=fmtp:" << payload_type << " ";
auto info = parse_hevc_profile_tier_level((uint8_t *)strSPS.data(), strSPS.size());
_printer << "a=fmtp:" << payload_type << " level-id=" << info.level_id << "; profile-id=" << info.profile_id << "; tier-flag=" << info.tier_flag << "; ";
_printer << "sprop-vps=";
_printer << encodeBase64(strVPS) << "; ";
_printer << "sprop-sps=";
@@ -287,7 +392,10 @@ Track::Ptr getTrackBySdp(const SdpTrack::Ptr &track) {
// If there is no sps/pps in the sdp, then it may be possible to recover sps/pps from the subsequent rtp
return std::make_shared<H265Track>();
}
return std::make_shared<H265Track>(vps, sps, pps, 0, 0, 0);
return std::make_shared<H265Track>(vps, sps, pps,
prefixSize(vps.data(), vps.size()),
prefixSize(sps.data(), sps.size()),
prefixSize(pps.data(), pps.size()));
}
RtpCodec::Ptr getRtpEncoderByCodecId(uint8_t pt) {

View File

@@ -268,12 +268,12 @@ void H265RtpEncoder::packRtpFu(const char *ptr, size_t len, uint64_t pts, bool i
auto nal_type = H265_TYPE(ptr[0]); //获取NALU的5bit 帧类型
unsigned char s_e_flags;
bool fu_start = true;
bool mark_bit = false;
bool fu_end = false;
size_t offset = 2;
while (!mark_bit) {
while (!fu_end) {
if (len <= offset + max_size) {
// FU end
mark_bit = true;
fu_end = true;
max_size = len - offset;
s_e_flags = (1 << 6) | nal_type;
} else if (fu_start) {
@@ -287,7 +287,9 @@ void H265RtpEncoder::packRtpFu(const char *ptr, size_t len, uint64_t pts, bool i
{
// 传入nullptr先不做payload的内存拷贝 [AUTO-TRANSLATED:7ed49f0a]
// Pass in nullptr first, do not copy the payload memory
auto rtp = getRtpInfo().makeRtp(TrackVideo, nullptr, max_size + 3, mark_bit, pts);
// 只有FU的最后一个分片且整个帧需要设置mark时才设置mark位
bool mark_bit = fu_end && is_mark;
auto rtp = getRtpInfo().makeRtp(TrackVideo, nullptr, max_size + 3, mark_bit && is_mark, pts); //yzw 帧(不是NALU多TILE时一帧有多个NALU)最后一个rtp才设置mark位
// rtp payload 负载部分 [AUTO-TRANSLATED:03a5ef9b]
// rtp payload load part
uint8_t *payload = rtp->getPayload();

View File

@@ -133,7 +133,7 @@ static inline void bytestream2_put_be16(PutByteContext *p, uint16_t value) {
}
}
static inline void bytestream2_put_be24(PutByteContext *p, uint16_t value) {
static inline void bytestream2_put_be24(PutByteContext *p, uint32_t value) {
if (!p->eof && (p->buffer_end - p->buffer >= 2)) {
p->buffer[0] = value >> 16;
p->buffer[1] = value >> 8;

218
ext-codec/MP2A.cpp Normal file
View File

@@ -0,0 +1,218 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#include "MP2A.h"
#include "MP2ARtp.h"
#include "Extension/Factory.h"
#include "Extension/CommonRtmp.h"
#include "Rtsp/Rtsp.h"
using namespace std;
using namespace toolkit;
namespace mediakit {
// ======================== MpegAudioFrameInfo ========================
// MPEG Audio 版本表
// MPEG Audio version table
// Index: version_bits (2 bits from header)
// 00 = MPEG 2.5, 01 = reserved, 10 = MPEG 2, 11 = MPEG 1
static const int s_mpeg_version[] = { 3, 0, 2, 1 }; // 3=MPEG2.5, 0=reserved, 2=MPEG2, 1=MPEG1
// Layer 表: 00=reserved, 01=III, 10=II, 11=I
static const int s_mpeg_layer[] = { 0, 3, 2, 1 };
// MPEG-1 比特率表 (kbps)
// bitrate_index: 0-15, layer: 1-3
static const int s_bitrate_mpeg1[][16] = {
// Layer I
{ 0, 32, 64, 96, 128, 160, 192, 224, 256, 288, 320, 352, 384, 416, 448, 0 },
// Layer II
{ 0, 32, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 384, 0 },
// Layer III
{ 0, 32, 40, 48, 56, 64, 80, 96, 112, 128, 160, 192, 224, 256, 320, 0 },
};
// MPEG-2/2.5 比特率表 (kbps)
static const int s_bitrate_mpeg2[][16] = {
// Layer I
{ 0, 32, 48, 56, 64, 80, 96, 112, 128, 144, 160, 176, 192, 224, 256, 0 },
// Layer II / III
{ 0, 8, 16, 24, 32, 40, 48, 56, 64, 80, 96, 112, 128, 144, 160, 0 },
};
// 采样率表 (Hz)
// Index: [version_index][samplerate_index]
static const int s_sample_rate[][4] = {
{ 44100, 48000, 32000, 0 }, // MPEG-1
{ 22050, 24000, 16000, 0 }, // MPEG-2
{ 11025, 12000, 8000, 0 }, // MPEG-2.5
};
bool MpegAudioFrameInfo::parse(const uint8_t *data, size_t size, MpegAudioFrameInfo &info) {
if (size < 4) {
return false;
}
// 检查同步字 0xFFE0 (11 bits all 1)
if (data[0] != 0xFF || (data[1] & 0xE0) != 0xE0) {
return false;
}
int version_bits = (data[1] >> 3) & 0x03;
int layer_bits = (data[1] >> 1) & 0x03;
// int protection = !(data[1] & 0x01);
int bitrate_index = (data[2] >> 4) & 0x0F;
int samplerate_index = (data[2] >> 2) & 0x03;
int padding = (data[2] >> 1) & 0x01;
int channel_mode = (data[3] >> 6) & 0x03;
int ver = s_mpeg_version[version_bits];
int layer = s_mpeg_layer[layer_bits];
if (ver == 0 || layer == 0 || samplerate_index == 3 || bitrate_index == 0 || bitrate_index == 15) {
return false;
}
int ver_index = ver - 1; // 0=MPEG1, 1=MPEG2, 2=MPEG2.5
int sr = s_sample_rate[ver_index][samplerate_index];
if (sr == 0) {
return false;
}
int bitrate = 0;
if (ver == 1) {
// MPEG-1
bitrate = s_bitrate_mpeg1[layer - 1][bitrate_index];
} else {
// MPEG-2 / MPEG-2.5
if (layer == 1) {
bitrate = s_bitrate_mpeg2[0][bitrate_index];
} else {
bitrate = s_bitrate_mpeg2[1][bitrate_index];
}
}
info.version = ver;
info.layer = layer;
info.bitrate = bitrate;
info.sample_rate = sr;
info.channels = (channel_mode == 3) ? 1 : 2; // 3=mono, 其他=stereo
// 计算每帧的采样数和帧大小
if (layer == 1) {
// Layer I: 384 samples
info.samples_per_frame = 384;
info.frame_size = (12 * bitrate * 1000 / sr + padding) * 4;
} else if (layer == 2) {
// Layer II: 1152 samples
info.samples_per_frame = 1152;
info.frame_size = 144 * bitrate * 1000 / sr + padding;
} else {
// Layer III
if (ver == 1) {
info.samples_per_frame = 1152;
info.frame_size = 144 * bitrate * 1000 / sr + padding;
} else {
info.samples_per_frame = 576;
info.frame_size = 72 * bitrate * 1000 / sr + padding;
}
}
return true;
}
// ======================== MP2ATrack ========================
bool MP2ATrack::inputFrame(const Frame::Ptr &frame) {
if (!_info_parsed) {
auto data = (const uint8_t *)frame->data() + frame->prefixSize();
auto size = frame->size() - frame->prefixSize();
MpegAudioFrameInfo info;
if (MpegAudioFrameInfo::parse(data, size, info)) {
_sample_rate = info.sample_rate;
_channels = info.channels;
_info_parsed = true;
}
}
return AudioTrackImp::inputFrame(frame);
}
Sdp::Ptr MP2ATrack::getSdp(uint8_t pt) const {
// RFC 2250/3551: MPA 的 RTP 时钟频率固定为 90000而不是音频采样率
// RFC 2250/3551: MPA RTP clock rate is fixed at 90000, not the audio sample rate
class MP2ASdp : public Sdp {
public:
// 注意Sdp 基类构造必须传入 90000 作为 sample_rate
MP2ASdp(uint8_t payload_type, int channels, int bitrate)
: Sdp(90000, payload_type) {
_printer << "m=audio 0 RTP/AVP " << (int)payload_type << "\r\n";
if (bitrate) {
_printer << "b=AS:" << bitrate << "\r\n";
}
_printer << "a=rtpmap:" << (int)payload_type << " MPA/90000/" << channels << "\r\n";
}
std::string getSdp() const override { return _printer; }
private:
toolkit::_StrPrinter _printer;
};
return std::make_shared<MP2ASdp>(pt, getAudioChannel(), getBitRate() >> 10);
}
Track::Ptr MP2ATrack::clone() const {
return std::make_shared<MP2ATrack>(*this);
}
namespace {
CodecId getCodec() {
return CodecMP2A;
}
Track::Ptr getTrackByCodecId(int sample_rate, int channels, int sample_bit) {
return std::make_shared<MP2ATrack>(sample_rate, channels);
}
Track::Ptr getTrackBySdp(const SdpTrack::Ptr &track) {
return std::make_shared<MP2ATrack>(track->_samplerate, track->_channel);
}
RtpCodec::Ptr getRtpEncoderByCodecId(uint8_t pt) {
return std::make_shared<MP2ARtpEncoder>();
}
RtpCodec::Ptr getRtpDecoderByCodecId() {
return std::make_shared<MP2ARtpDecoder>();
}
RtmpCodec::Ptr getRtmpEncoderByTrack(const Track::Ptr &track) {
return std::make_shared<CommonRtmpEncoder>(track);
}
RtmpCodec::Ptr getRtmpDecoderByTrack(const Track::Ptr &track) {
return std::make_shared<CommonRtmpDecoder>(track);
}
Frame::Ptr getFrameFromPtr(const char *data, size_t bytes, uint64_t dts, uint64_t pts) {
return std::make_shared<MP2AFrameNoCacheAble>((char *)data, bytes, dts, pts);
}
} // namespace
CodecPlugin mp2a_plugin = { getCodec,
getTrackByCodecId,
getTrackBySdp,
getRtpEncoderByCodecId,
getRtpDecoderByCodecId,
getRtmpEncoderByTrack,
getRtmpDecoderByTrack,
getFrameFromPtr };
} // namespace mediakit

90
ext-codec/MP2A.h Normal file
View File

@@ -0,0 +1,90 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#ifndef ZLMEDIAKIT_MP2A_H
#define ZLMEDIAKIT_MP2A_H
#include "Extension/Frame.h"
#include "Extension/Track.h"
namespace mediakit {
/**
* MPEG-1/2 Audio (Layer I/II) 帧辅助类模板
* MPEG-1/2 Audio (Layer I/II) frame helper class template
*/
template <typename Parent>
class MP2AFrameHelper : public Parent {
public:
using Ptr = std::shared_ptr<MP2AFrameHelper>;
template <typename... ARGS>
MP2AFrameHelper(ARGS &&...args)
: Parent(std::forward<ARGS>(args)...) {
this->_codec_id = CodecMP2A;
}
bool keyFrame() const override { return false; }
bool configFrame() const override { return false; }
};
/// MPEG-1/2 Audio 帧类
using MP2AFrame = MP2AFrameHelper<FrameImp>;
using MP2AFrameNoCacheAble = MP2AFrameHelper<FrameFromPtr>;
// MPEG Audio 帧头解析工具
// MPEG Audio frame header parsing utility
struct MpegAudioFrameInfo {
int version = 0; // 1: MPEG-1, 2: MPEG-2, 3: MPEG-2.5
int layer = 0; // 1: Layer I, 2: Layer II, 3: Layer III
int bitrate = 0; // kbps
int sample_rate = 0; // Hz
int channels = 0; // 1: mono, 2: stereo
int frame_size = 0; // bytes per frame
int samples_per_frame = 0;
/**
* 从 MPEG Audio sync word 解析帧头信息
* Parse frame header info from MPEG Audio sync word
* @param data 数据指针至少4字节
* @param size 数据大小
* @return 是否解析成功
*/
static bool parse(const uint8_t *data, size_t size, MpegAudioFrameInfo &info);
};
/**
* MPEG-1/2 Audio (Layer I/II) Track
* 对应 CodecMP2A
*/
class MP2ATrack : public AudioTrackImp {
public:
using Ptr = std::shared_ptr<MP2ATrack>;
MP2ATrack(int sample_rate = 44100, int channels = 2)
: AudioTrackImp(CodecMP2A, sample_rate, channels, 16) {}
bool inputFrame(const Frame::Ptr &frame) override;
private:
/**
* RFC 2250/3551 规定 MPA 的 RTP 时钟频率固定为 90000
* RFC 2250/3551 specifies MPA RTP clock rate is fixed at 90000
*/
Sdp::Ptr getSdp(uint8_t payload_type) const override;
Track::Ptr clone() const override;
private:
bool _info_parsed = false;
};
} // namespace mediakit
#endif // ZLMEDIAKIT_MP2A_H

175
ext-codec/MP2ARtp.cpp Normal file
View File

@@ -0,0 +1,175 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#include "MP2ARtp.h"
namespace mediakit {
// ======================== MP2ARtpEncoder ========================
void MP2ARtpEncoder::outputRtp(const char *data, size_t len, size_t frag_offset, bool mark, uint64_t stamp) {
// RFC 2250 Section 3.5:
// 4 bytes MPEG Audio-specific header + ES data
auto rtp = getRtpInfo().makeRtp(TrackAudio, nullptr, len + kMP2AHeaderSize, mark, stamp);
auto payload = rtp->getPayload();
// MPEG Audio-specific header
// MBZ (16 bits) = 0
payload[0] = 0;
payload[1] = 0;
// Frag_offset (16 bits)
payload[2] = (frag_offset >> 8) & 0xFF;
payload[3] = frag_offset & 0xFF;
// ES data
memcpy(payload + kMP2AHeaderSize, data, len);
RtpCodec::inputRtp(std::move(rtp), false);
}
bool MP2ARtpEncoder::inputFrame(const Frame::Ptr &frame) {
auto data = (const uint8_t *)frame->data() + frame->prefixSize();
auto total_size = (size_t)(frame->size() - frame->prefixSize());
if (total_size <= 0) {
return false;
}
auto max_payload = getRtpInfo().getMaxSize() - kMP2AHeaderSize;
auto base_dts = frame->dts();
// TS demux 可能一次回调多个完整的 MPEG Audio 帧(一个 PES 包),
// 需要逐帧解析并独立打 RTP 包,否则 FFmpeg 等接收端会因为分片
// 导致 RTP payload 不以 sync word 开头而报 "Header missing"。
size_t pos = 0;
int frame_index = 0;
while (pos + 4 <= total_size) {
// 检查 MPEG Audio sync word
if (data[pos] != 0xFF || (data[pos + 1] & 0xE0) != 0xE0) {
// 跳过无效字节,寻找下一个 sync word
++pos;
continue;
}
// 解析帧头获取帧大小
MpegAudioFrameInfo info;
if (!MpegAudioFrameInfo::parse(data + pos, total_size - pos, info) || info.frame_size <= 0) {
++pos;
continue;
}
size_t frame_size = (size_t)info.frame_size;
if (pos + frame_size > total_size) {
// 不完整的帧,打包剩余数据
frame_size = total_size - pos;
}
// 计算当前帧的时间戳偏移(毫秒)
// 每帧 samples_per_frame 个采样点,采样率 info.sample_rate
uint64_t stamp = base_dts;
if (frame_index > 0 && info.sample_rate > 0) {
stamp += (uint64_t)frame_index * info.samples_per_frame * 1000 / info.sample_rate;
}
// 对单个 MPEG Audio 帧打 RTP 包
auto ptr = (const char *)(data + pos);
size_t remain = frame_size;
size_t frag_offset = 0;
while (remain > 0) {
if (remain <= max_payload) {
outputRtp(ptr, remain, frag_offset, true, stamp);
break;
}
outputRtp(ptr, max_payload, frag_offset, false, stamp);
ptr += max_payload;
remain -= max_payload;
frag_offset += max_payload;
}
pos += frame_size;
++frame_index;
}
return true;
}
// ======================== MP2ARtpDecoder ========================
MP2ARtpDecoder::MP2ARtpDecoder() {
obtainFrame();
}
void MP2ARtpDecoder::obtainFrame() {
_frame = FrameImp::create<MP2AFrame>();
}
void MP2ARtpDecoder::flushData() {
if (_frame->_buffer.empty()) {
return;
}
RtpCodec::inputFrame(_frame);
obtainFrame();
}
bool MP2ARtpDecoder::inputRtp(const RtpPacket::Ptr &rtp, bool key_pos) {
auto payload_size = rtp->getPayloadSize();
if (payload_size <= (ssize_t)kMP2AHeaderSize) {
// 负载太小,没有有效 ES 数据
return false;
}
auto payload = rtp->getPayload();
auto stamp = rtp->getStamp();
auto seq = rtp->getSeq();
// 解析 MPEG Audio-specific header (RFC 2250 Section 3.5)
// MBZ (16 bits) + Frag_offset (16 bits)
uint16_t frag_offset = (payload[2] << 8) | payload[3];
auto es_data = payload + kMP2AHeaderSize;
auto es_size = payload_size - kMP2AHeaderSize;
if (frag_offset == 0) {
// frag_offset == 0 表示这是一个新帧(或完整帧)的开始
// 先输出之前缓存的帧(如果有)
flushData();
// 使用 90kHz 时间戳转换为毫秒
_frame->_dts = rtp->getStampMS();
_frame->_pts = _frame->_dts;
} else if (_frame->_buffer.empty()) {
// frag_offset != 0 但 buffer 为空,说明丢了第一个分片包,丢弃
_last_seq = seq;
_last_stamp = stamp;
return false;
} else if (seq != (uint16_t)(_last_seq + 1)) {
// 分片包 seq 不连续,丢包了,丢弃当前帧
WarnL << "mp2a rtp packet loss:" << _last_seq << " -> " << seq;
_frame->_buffer.clear();
_last_seq = seq;
_last_stamp = stamp;
return false;
}
_last_seq = seq;
_last_stamp = stamp;
// 追加 ES 数据
_frame->_buffer.append((char *)es_data, es_size);
// mark bit 表示帧的最后一个 RTP 包,立即输出
if (rtp->getHeader()->mark) {
flushData();
}
return false;
}
} // namespace mediakit

87
ext-codec/MP2ARtp.h Normal file
View File

@@ -0,0 +1,87 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#ifndef ZLMEDIAKIT_MP2ARTP_H
#define ZLMEDIAKIT_MP2ARTP_H
#include "MP2A.h"
#include "Rtsp/RtpCodec.h"
namespace mediakit {
// RFC 2250 Section 3.5 MPEG Audio-specific header (4 bytes)
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | MBZ | Frag_offset |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
//
// MBZ: Must Be Zero (16 bits)
// Frag_offset: Byte offset into the audio frame for the data in this packet (16 bits)
static constexpr size_t kMP2AHeaderSize = 4;
/**
* MP2A (MPEG-1/2 Audio Layer I/II) RTP 编码器
* RFC 2250 Section 3.5
*/
class MP2ARtpEncoder : public RtpCodec {
public:
using Ptr = std::shared_ptr<MP2ARtpEncoder>;
/**
* 输入 MPEG Audio 帧并打包为 RTP
* @param frame 帧数据
*/
bool inputFrame(const Frame::Ptr &frame) override;
private:
/**
* 输出一个 RTP 包
* @param data ES 数据
* @param len 数据长度
* @param frag_offset 分片在帧内的偏移
* @param mark 是否为帧最后一个包
* @param stamp 时间戳(ms)
*/
void outputRtp(const char *data, size_t len, size_t frag_offset, bool mark, uint64_t stamp);
};
/**
* MP2A (MPEG-1/2 Audio Layer I/II) RTP 解码器
* RFC 2250 Section 3.5
*/
class MP2ARtpDecoder : public RtpCodec {
public:
using Ptr = std::shared_ptr<MP2ARtpDecoder>;
MP2ARtpDecoder();
/**
* 输入 MPEG Audio RTP 包并解码
* @param rtp rtp 数据包
* @param key_pos 音频帧忽略此参数
*/
bool inputRtp(const RtpPacket::Ptr &rtp, bool key_pos = false) override;
private:
void obtainFrame();
void flushData();
private:
uint16_t _last_seq = 0;
uint32_t _last_stamp = 0;
FrameImp::Ptr _frame;
};
} // namespace mediakit
#endif // ZLMEDIAKIT_MP2ARTP_H

116
ext-codec/MP2V.cpp Normal file
View File

@@ -0,0 +1,116 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#include "MP2V.h"
#include "MP2VRtp.h"
#include "Extension/Factory.h"
#include "Rtsp/Rtsp.h"
using namespace std;
using namespace toolkit;
namespace mediakit {
// MPEG-2 sequence header 帧率表 (ISO 13818-2 Table 6-4)
// MPEG-2 sequence header frame rate table
static const float s_mp2v_frame_rate_table[] = {
0, // 0000 forbidden
24000.0 / 1001, // 0001 23.976
24.0, // 0010
25.0, // 0011
30000.0 / 1001, // 0100 29.97
30.0, // 0101
50.0, // 0110
60000.0 / 1001, // 0111 59.94
60.0, // 1000
};
void MP2VTrack::parseSequenceHeader(const uint8_t *data, size_t size) {
// 查找 sequence header start code: 00 00 01 B3
// Look for sequence header start code: 00 00 01 B3
for (size_t i = 0; i + 7 < size; ++i) {
if (data[i] == 0x00 && data[i + 1] == 0x00 && data[i + 2] == 0x01 && data[i + 3] == 0xB3) {
// sequence_header() 结构:
// horizontal_size_value: 12 bits
// vertical_size_value: 12 bits
// aspect_ratio_information: 4 bits
// frame_rate_code: 4 bits
_width = (data[i + 4] << 4) | ((data[i + 5] >> 4) & 0x0F);
_height = ((data[i + 5] & 0x0F) << 8) | data[i + 6];
uint8_t frame_rate_code = data[i + 7] & 0x0F;
if (frame_rate_code > 0 && frame_rate_code <= 8) {
_fps = s_mp2v_frame_rate_table[frame_rate_code];
}
_seq_header_parsed = true;
return;
}
}
}
bool MP2VTrack::inputFrame(const Frame::Ptr &frame) {
if (!_seq_header_parsed) {
parseSequenceHeader((const uint8_t *)frame->data() + frame->prefixSize(),
frame->size() - frame->prefixSize());
}
return VideoTrackImp::inputFrame(frame);
}
Sdp::Ptr MP2VTrack::getSdp(uint8_t pt) const {
return std::make_shared<DefaultSdp>(pt, *this);
}
namespace {
CodecId getCodec() {
return CodecMP2V;
}
Track::Ptr getTrackByCodecId(int sample_rate, int channels, int sample_bit) {
return std::make_shared<MP2VTrack>();
}
Track::Ptr getTrackBySdp(const SdpTrack::Ptr &track) {
return std::make_shared<MP2VTrack>();
}
RtpCodec::Ptr getRtpEncoderByCodecId(uint8_t pt) {
return std::make_shared<MP2VRtpEncoder>();
}
RtpCodec::Ptr getRtpDecoderByCodecId() {
return std::make_shared<MP2VRtpDecoder>();
}
RtmpCodec::Ptr getRtmpEncoderByTrack(const Track::Ptr &track) {
WarnL << "Unsupported MP2V rtmp encoder";
return nullptr;
}
RtmpCodec::Ptr getRtmpDecoderByTrack(const Track::Ptr &track) {
WarnL << "Unsupported MP2V rtmp decoder";
return nullptr;
}
Frame::Ptr getFrameFromPtr(const char *data, size_t bytes, uint64_t dts, uint64_t pts) {
return std::make_shared<MP2VFrameNoCacheAble>((char *)data, bytes, dts, pts, 0);
}
} // namespace
CodecPlugin mp2v_plugin = { getCodec,
getTrackByCodecId,
getTrackBySdp,
getRtpEncoderByCodecId,
getRtpDecoderByCodecId,
getRtmpEncoderByTrack,
getRtmpDecoderByTrack,
getFrameFromPtr };
} // namespace mediakit

97
ext-codec/MP2V.h Normal file
View File

@@ -0,0 +1,97 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#ifndef ZLMEDIAKIT_MP2V_H
#define ZLMEDIAKIT_MP2V_H
#include "Extension/Frame.h"
#include "Extension/Track.h"
namespace mediakit {
/**
* MPEG-2 Video 帧辅助类模板
* MPEG-2 Video frame helper class template
*/
template <typename Parent>
class MP2VFrameHelper : public Parent {
public:
using Ptr = std::shared_ptr<MP2VFrameHelper>;
template <typename... ARGS>
MP2VFrameHelper(ARGS &&...args)
: Parent(std::forward<ARGS>(args)...) {
this->_codec_id = CodecMP2V;
}
/**
* MPEG-2 视频起始码: 00 00 01 00 (picture_start_code)
* I帧判断picture_coding_type == 1 (I-Picture)
* picture_coding_type 位于 picture header 的第 11-12 bit (从 temporal_reference 之后)
*
* MPEG-2 video start code: 00 00 01 00 (picture_start_code)
* I-frame detection: picture_coding_type == 1 (I-Picture)
*/
bool keyFrame() const override {
auto data = (const uint8_t *)this->data() + this->prefixSize();
auto size = this->size() - this->prefixSize();
return isMP2VKeyFrame(data, size);
}
bool configFrame() const override { return false; }
static bool isMP2VKeyFrame(const uint8_t *data, size_t size) {
// 查找 picture start code (00 00 01 00),然后检查 picture_coding_type
// Look for picture start code (00 00 01 00), then check picture_coding_type
for (size_t i = 0; i + 5 < size; ++i) {
if (data[i] == 0x00 && data[i + 1] == 0x00 && data[i + 2] == 0x01 && data[i + 3] == 0x00) {
// picture header: temporal_reference(10bits) + picture_coding_type(3bits)
// picture_coding_type: 001 = I, 010 = P, 011 = B
uint8_t picture_coding_type = (data[i + 5] >> 3) & 0x07;
return picture_coding_type == 1;
}
}
return false;
}
};
/// MPEG-2 Video 帧类
using MP2VFrame = MP2VFrameHelper<FrameImp>;
using MP2VFrameNoCacheAble = MP2VFrameHelper<FrameFromPtr>;
/**
* MPEG-2 Video Track
*/
class MP2VTrack : public VideoTrackImp {
public:
using Ptr = std::shared_ptr<MP2VTrack>;
MP2VTrack() : VideoTrackImp(CodecMP2V) {}
Track::Ptr clone() const override { return std::make_shared<MP2VTrack>(*this); }
bool inputFrame(const Frame::Ptr &frame) override;
private:
Sdp::Ptr getSdp(uint8_t payload_type) const override;
/**
* 从 sequence header 中解析宽高和帧率
* Parse width, height and fps from sequence header
*/
void parseSequenceHeader(const uint8_t *data, size_t size);
private:
bool _seq_header_parsed = false;
};
} // namespace mediakit
#endif // ZLMEDIAKIT_MP2V_H

274
ext-codec/MP2VRtp.cpp Normal file
View File

@@ -0,0 +1,274 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#include "MP2VRtp.h"
#include "Common/config.h"
namespace mediakit {
// ======================== MP2VRtpDecoder ========================
MP2VRtpDecoder::MP2VRtpDecoder() {
obtainFrame();
}
void MP2VRtpDecoder::obtainFrame() {
_frame = FrameImp::create<MP2VFrame>();
}
bool MP2VRtpDecoder::inputRtp(const RtpPacket::Ptr &rtp, bool key_pos) {
auto seq = rtp->getSeq();
auto last_gop_dropped = _gop_dropped;
bool is_gop_start = decodeRtp(rtp);
if (!_gop_dropped && seq != (uint16_t)(_last_seq + 1) && _last_seq) {
_gop_dropped = true;
WarnL << "start drop mp2v gop, last seq:" << _last_seq << ", rtp:\r\n" << rtp->dumpString();
}
_last_seq = seq;
return is_gop_start && !last_gop_dropped;
}
/**
* RFC 2250 MPEG Video-specific header (4 bytes):
*
* 0 1 2 3
* 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* | MBZ |T| TR |AN|N|S|B|E| P | | BFC | | FFC |
* +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
* FBV FFV
*
* T: MPEG-2 specific header extension present (1 bit)
* TR: Temporal Reference (10 bits)
* AN: Active N bit (1 bit)
* N: New picture header (1 bit)
* S: Sequence-header-present (1 bit)
* B: Beginning-of-slice (1 bit)
* E: End-of-slice (1 bit)
* P: Picture-Type (3 bits): I(1), P(2), B(3), D(4)
* FBV: full_pel_backward_vector (1 bit)
* BFC: backward_f_code (3 bits)
* FFV: full_pel_forward_vector (1 bit)
* FFC: forward_f_code (3 bits)
*/
bool MP2VRtpDecoder::decodeRtp(const RtpPacket::Ptr &rtp) {
auto payload_size = rtp->getPayloadSize();
if (payload_size <= (ssize_t)kMP2VHeaderSize) {
// 负载太小,不包含有效数据
return false;
}
auto payload = rtp->getPayload();
auto stamp = rtp->getStampMS();
auto seq = rtp->getSeq();
// 解析 RFC 2250 MPEG Video-specific header
bool t_bit = (payload[0] >> 2) & 0x01;
// uint16_t temporal_ref = ((payload[0] & 0x03) << 8) | payload[1];
// bool seq_header_present = (payload[2] >> 5) & 0x01;
// bool begin_of_slice = (payload[2] >> 4) & 0x01;
// bool end_of_slice = (payload[2] >> 3) & 0x01;
uint8_t picture_type = (payload[2] & 0x07);
// 如果 T bit 置位,还有 4 字节的 MPEG-2 扩展头需要跳过
size_t header_size = kMP2VHeaderSize + (t_bit ? 4 : 0);
if (payload_size <= (ssize_t)header_size) {
return false;
}
auto es_data = payload + header_size;
auto es_size = payload_size - header_size;
// 检查是否为新帧(时间戳变化)
if (!_frame->_buffer.empty() && stamp != _frame->_pts) {
// 时间戳变化,输出上一帧
outputFrame(rtp);
}
if (_frame->_buffer.empty()) {
// 新帧开始
_frame->_pts = stamp;
_drop_flag = false;
_picture_type = picture_type;
}
if (_drop_flag) {
return false;
}
// 检测 seq 不连续,丢弃当前帧
if (!_frame->_buffer.empty() && seq != (uint16_t)(_last_seq + 1) && _last_seq) {
_drop_flag = true;
_frame->_buffer.clear();
return false;
}
// 追加 ES 数据
_frame->_buffer.append((char *)es_data, es_size);
// RTP mark bit 标识帧结束
if (rtp->getHeader()->mark) {
outputFrame(rtp);
return _picture_type == 1; // I-Picture
}
return false;
}
void MP2VRtpDecoder::outputFrame(const RtpPacket::Ptr &rtp) {
if (_frame->_buffer.empty()) {
return;
}
// 生成 DTSMPEG-2 有 B 帧PTS 和 DTS 不一定相同)
_dts_generator.getDts(_frame->_pts, _frame->_dts);
bool is_key = _frame->keyFrame();
if (is_key && _gop_dropped) {
_gop_dropped = false;
InfoL << "new mp2v gop received, rtp:\r\n" << rtp->dumpString();
}
if (!_gop_dropped) {
RtpCodec::inputFrame(_frame);
}
obtainFrame();
}
// ======================== MP2VRtpEncoder ========================
bool MP2VRtpEncoder::hasSequenceHeader(const uint8_t *data, size_t size) {
// 查找 sequence header start code: 00 00 01 B3
for (size_t i = 0; i + 3 < size; ++i) {
if (data[i] == 0x00 && data[i + 1] == 0x00 && data[i + 2] == 0x01 && data[i + 3] == 0xB3) {
return true;
}
}
return false;
}
void MP2VRtpEncoder::parsePictureInfo(const uint8_t *data, size_t size) {
_temporal_ref = 0;
_picture_type = 0;
_fbv = 0;
_bfc = 0;
_ffv = 0;
_ffc = 0;
_has_seq_header = hasSequenceHeader(data, size);
// 查找 picture start code: 00 00 01 00
for (size_t i = 0; i + 5 < size; ++i) {
if (data[i] == 0x00 && data[i + 1] == 0x00 && data[i + 2] == 0x01 && data[i + 3] == 0x00) {
// temporal_reference: 10 bits, picture_coding_type: 3 bits
_temporal_ref = (data[i + 4] << 2) | ((data[i + 5] >> 6) & 0x03);
_picture_type = (data[i + 5] >> 3) & 0x07;
// 解析 motion vector codes (vbv_delay 之后)
// picture header: temporal_reference(10) + picture_coding_type(3) + vbv_delay(16)
if (i + 8 < size) {
uint8_t extra_byte = data[i + 8];
if (_picture_type == 2 /* P */ || _picture_type == 3 /* B */) {
// full_pel_forward_vector(1) + forward_f_code(3)
_ffv = (extra_byte >> 2) & 0x01;
_ffc = ((extra_byte & 0x03) << 1);
if (i + 9 < size) {
_ffc |= (data[i + 9] >> 7) & 0x01;
}
}
if (_picture_type == 3 /* B */) {
// full_pel_backward_vector(1) + backward_f_code(3) 紧跟在 forward 之后
if (i + 9 < size) {
_fbv = (data[i + 9] >> 6) & 0x01;
_bfc = (data[i + 9] >> 3) & 0x07;
}
}
}
return;
}
}
}
void MP2VRtpEncoder::buildMpvHeader(uint8_t *buf, const uint8_t *data, size_t size,
bool is_begin_of_slice, bool is_end_of_slice) {
// RFC 2250 Section 3.4
// Byte 0: MBZ(5) + T(1) + TR high 2 bits
// T = 0 (不发送 MPEG-2 扩展头)
buf[0] = (_temporal_ref >> 8) & 0x03;
// Byte 1: TR low 8 bits
buf[1] = _temporal_ref & 0xFF;
// Byte 2: AN(1) + N(1) + S(1) + B(1) + E(1) + P(3)
uint8_t byte2 = 0;
// AN = 0, N = 0
if (_has_seq_header) {
byte2 |= 0x20; // S bit
}
if (is_begin_of_slice) {
byte2 |= 0x10; // B bit
}
if (is_end_of_slice) {
byte2 |= 0x08; // E bit
}
byte2 |= (_picture_type & 0x07);
buf[2] = byte2;
// Byte 3: FBV(1) + BFC(3) + FFV(1) + FFC(3)
buf[3] = ((_fbv & 0x01) << 7) | ((_bfc & 0x07) << 4) | ((_ffv & 0x01) << 3) | (_ffc & 0x07);
}
bool MP2VRtpEncoder::inputFrame(const Frame::Ptr &frame) {
auto ptr = (const uint8_t *)frame->data() + frame->prefixSize();
auto size = frame->size() - frame->prefixSize();
if (size == 0) {
return false;
}
// 解析帧信息picture type, temporal reference 等)
parsePictureInfo(ptr, size);
bool is_key = frame->keyFrame();
auto max_payload = getRtpInfo().getMaxSize() - kMP2VHeaderSize;
size_t offset = 0;
while (offset < size) {
bool is_first = (offset == 0);
size_t payload_size;
bool is_last;
if (size - offset <= max_payload) {
payload_size = size - offset;
is_last = true;
} else {
payload_size = max_payload;
is_last = false;
}
// 构建 MPEG Video-specific header
uint8_t mpv_header[kMP2VHeaderSize];
buildMpvHeader(mpv_header, ptr + offset, payload_size, is_first, is_last);
// 创建 RTP 包MPEG header + ES data
auto rtp = getRtpInfo().makeRtp(TrackVideo, nullptr, kMP2VHeaderSize + payload_size, is_last, frame->pts());
auto rtp_payload = rtp->getPayload();
// 写入 MPEG Video-specific header
memcpy(rtp_payload, mpv_header, kMP2VHeaderSize);
// 写入 ES 数据
memcpy(rtp_payload + kMP2VHeaderSize, ptr + offset, payload_size);
// 输入到 RTP 环形缓存
RtpCodec::inputRtp(rtp, is_key && is_first);
offset += payload_size;
}
return true;
}
} // namespace mediakit

112
ext-codec/MP2VRtp.h Normal file
View File

@@ -0,0 +1,112 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#ifndef ZLMEDIAKIT_MP2VRTP_H
#define ZLMEDIAKIT_MP2VRTP_H
#include "MP2V.h"
#include "Common/Stamp.h"
#include "Rtsp/RtpCodec.h"
namespace mediakit {
// RFC 2250 MPEG Video-specific header (4 bytes)
//
// 0 1 2 3
// 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// | MBZ |T| TR |N|S|B|E| P | | BFC | | FFC |
// +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
// AN FBV FFV
static constexpr size_t kMP2VHeaderSize = 4;
/**
* MP2V (MPEG-2 Video) RTP 解码器
* 将 MPEG-2 Video over RTP 解复用出 MP2V Frame
* RFC 2250
*/
class MP2VRtpDecoder : public RtpCodec {
public:
using Ptr = std::shared_ptr<MP2VRtpDecoder>;
MP2VRtpDecoder();
/**
* 输入 MPEG-2 Video RTP 包
* @param rtp rtp包
* @param key_pos 此参数忽略之
*/
bool inputRtp(const RtpPacket::Ptr &rtp, bool key_pos = true) override;
private:
bool decodeRtp(const RtpPacket::Ptr &rtp);
void outputFrame(const RtpPacket::Ptr &rtp);
void obtainFrame();
private:
bool _gop_dropped = true;
bool _drop_flag = false;
uint16_t _last_seq = 0;
uint8_t _picture_type = 0;
MP2VFrame::Ptr _frame;
DtsGenerator _dts_generator;
};
/**
* MP2V (MPEG-2 Video) RTP 编码器
* 将 MPEG-2 Video 帧打包为 RTP
* RFC 2250
*/
class MP2VRtpEncoder : public RtpCodec {
public:
using Ptr = std::shared_ptr<MP2VRtpEncoder>;
/**
* 输入 MPEG-2 Video 帧
* @param frame 帧数据
*/
bool inputFrame(const Frame::Ptr &frame) override;
private:
/**
* 构建 RFC 2250 MPEG Video-specific header
* @param buf 输出缓冲区至少4字节
* @param data MPEG-2 ES 数据
* @param size 数据大小
* @param is_begin_of_slice 是否为 slice 起始
* @param is_end_of_slice 是否为 slice 结束
*/
void buildMpvHeader(uint8_t *buf, const uint8_t *data, size_t size,
bool is_begin_of_slice, bool is_end_of_slice);
/**
* 解析当前帧信息picture type, temporal reference 等)
*/
void parsePictureInfo(const uint8_t *data, size_t size);
/**
* 查找 sequence header 是否存在
*/
bool hasSequenceHeader(const uint8_t *data, size_t size);
private:
uint16_t _temporal_ref = 0;
uint8_t _picture_type = 0;
uint8_t _fbv = 0;
uint8_t _bfc = 0;
uint8_t _ffv = 0;
uint8_t _ffc = 0;
bool _has_seq_header = false;
};
} // namespace mediakit
#endif // ZLMEDIAKIT_MP2VRTP_H

View File

@@ -11,16 +11,32 @@
#include "Opus.h"
#include "Extension/Factory.h"
#include "Extension/CommonRtp.h"
#include "Extension/CommonRtmp.h"
#include "OpusRtmp.h"
#include "opus-head.h"
using namespace std;
using namespace toolkit;
namespace mediakit {
void OpusTrack::setExtraData(const uint8_t *data, size_t size) {
opus_head_t header;
if (opus_head_load(data, size, &header) > 0) {
// Successfully parsed Opus header
_sample_rate = header.input_sample_rate;
_channels = header.channels;
}
}
Sdp::Ptr OpusTrack::getSdp(uint8_t payload_type) const {
return std::make_shared<DefaultSdp>(payload_type, *this);
Buffer::Ptr OpusTrack::getExtraData() const {
struct opus_head_t opus {};
opus.version = 1;
opus.channels = getAudioChannel();
opus.input_sample_rate = getAudioSampleRate();
// opus.pre_skip = 120;
opus.channel_mapping_family = 0;
auto ret = BufferRaw::create(29);
ret->setSize(opus_head_save(&opus, (uint8_t *)ret->data(), ret->getCapacity()));
return ret;
}
namespace {
@@ -46,11 +62,11 @@ RtpCodec::Ptr getRtpDecoderByCodecId() {
}
RtmpCodec::Ptr getRtmpEncoderByTrack(const Track::Ptr &track) {
return std::make_shared<CommonRtmpEncoder>(track);
return std::make_shared<OpusRtmpEncoder>(track);
}
RtmpCodec::Ptr getRtmpDecoderByTrack(const Track::Ptr &track) {
return std::make_shared<CommonRtmpDecoder>(track);
return std::make_shared<OpusRtmpDecoder>(track);
}
Frame::Ptr getFrameFromPtr(const char *data, size_t bytes, uint64_t dts, uint64_t pts) {

View File

@@ -19,23 +19,20 @@ namespace mediakit {
/**
* Opus帧音频通道
* Opus frame audio channel
* [AUTO-TRANSLATED:522e95da]
*/
class OpusTrack : public AudioTrackImp{
class OpusTrack : public AudioTrackImp {
public:
using Ptr = std::shared_ptr<OpusTrack>;
OpusTrack() : AudioTrackImp(CodecOpus,48000,2,16){}
private:
// 克隆该Track [AUTO-TRANSLATED:9a15682a]
// Clone this Track
Track::Ptr clone() const override {
return std::make_shared<OpusTrack>(*this);
}
// 生成sdp [AUTO-TRANSLATED:663a9367]
// Generate sdp
Sdp::Ptr getSdp(uint8_t payload_type) const override ;
toolkit::Buffer::Ptr getExtraData() const override;
void setExtraData(const uint8_t *data, size_t size) override;
};
}//namespace mediakit

113
ext-codec/OpusRtmp.cpp Normal file
View File

@@ -0,0 +1,113 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#include "OpusRtmp.h"
#include "Rtmp/utils.h"
#include "Common/config.h"
#include "Extension/Factory.h"
using namespace std;
using namespace toolkit;
namespace mediakit {
void OpusRtmpDecoder::inputRtmp(const RtmpPacket::Ptr &pkt) {
auto data = pkt->data();
int size = pkt->size();
auto flags = (uint8_t)data[0];
auto codec = (RtmpAudioCodec)(flags >> 4);
auto type = flags & 0x0F;
data++; size--;
if (codec == RtmpAudioCodec::ex_header) {
// @todo parse enhance audio header and check fourcc
data += 4;
size -= 4;
if (type == (uint8_t)RtmpPacketType::PacketTypeSequenceStart) {
getTrack()->setExtraData((uint8_t *)data, size);
} else {
outputFrame(data, size, pkt->time_stamp, pkt->time_stamp);
}
} else {
if (codec == RtmpAudioCodec::aac) {
uint8_t pkt_type = *data;
data++; size--;
if (pkt_type == (uint8_t)RtmpAACPacketType::aac_config_header) {
getTrack()->setExtraData((uint8_t *)data, size);
return;
}
}
outputFrame(data, size, pkt->time_stamp, pkt->time_stamp);
}
}
void OpusRtmpDecoder::outputFrame(const char *data, size_t size, uint32_t dts, uint32_t pts) {
RtmpCodec::inputFrame(Factory::getFrameFromPtr(getTrack()->getCodecId(), data, size, dts, pts));
}
////////////////////////////////////////////////////////////////////////
OpusRtmpEncoder::OpusRtmpEncoder(const Track::Ptr &track) : RtmpCodec(track) {
_enhanced = mINI::Instance()[Rtmp::kEnhanced];
}
bool OpusRtmpEncoder::inputFrame(const Frame::Ptr &frame) {
auto packet = RtmpPacket::create();
if (_enhanced) {
uint8_t flags = ((uint8_t)RtmpAudioCodec::ex_header << 4) | (uint8_t)RtmpPacketType::PacketTypeCodedFrames;
packet->buffer.push_back(flags);
uint32_t fourcc = htonl(getCodecFourCC(getTrack()->getCodecId()));
packet->buffer.append(reinterpret_cast<char *>(&fourcc), 4);
} else {
uint8_t flags = getAudioRtmpFlags(getTrack());
packet->buffer.push_back(flags);
if (getTrack()->getCodecId() == CodecAAC) {
packet->buffer.push_back((uint8_t)RtmpAACPacketType::aac_raw);
}
}
packet->buffer.append(frame->data(), frame->size());
packet->body_size = packet->buffer.size();
packet->time_stamp = frame->dts();
packet->chunk_id = CHUNK_AUDIO;
packet->stream_index = STREAM_MEDIA;
packet->type_id = MSG_AUDIO;
// Output rtmp packet
RtmpCodec::inputRtmp(packet);
return true;
}
void OpusRtmpEncoder::makeConfigPacket() {
auto extra_data = getTrack()->getExtraData();
if (!extra_data || !extra_data->size())
return;
auto packet = RtmpPacket::create();
if (_enhanced) {
uint8_t flags = ((uint8_t)RtmpAudioCodec::ex_header << 4) | (uint8_t)RtmpPacketType::PacketTypeSequenceStart;
packet->buffer.push_back(flags);
uint32_t fourcc = htonl(getCodecFourCC(getTrack()->getCodecId()));
packet->buffer.append(reinterpret_cast<char *>(&fourcc), 4);
} else {
uint8_t flags = getAudioRtmpFlags(getTrack());
packet->buffer.push_back(flags);
if (getTrack()->getCodecId() == CodecAAC) {
packet->buffer.push_back((uint8_t)RtmpAACPacketType::aac_config_header);
}
else{
return ;
}
}
packet->buffer.append(extra_data->data(), extra_data->size());
packet->body_size = packet->buffer.size();
packet->chunk_id = CHUNK_AUDIO;
packet->stream_index = STREAM_MEDIA;
packet->time_stamp = 0;
packet->type_id = MSG_AUDIO;
RtmpCodec::inputRtmp(packet);
}
} // namespace mediakit

51
ext-codec/OpusRtmp.h Normal file
View File

@@ -0,0 +1,51 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#ifndef ZLMEDIAKIT_OPUS_RTMPCODEC_H
#define ZLMEDIAKIT_OPUS_RTMPCODEC_H
#include "Rtmp/RtmpCodec.h"
#include "Extension/Track.h"
namespace mediakit {
/**
* Rtmp解码类
* 将 Opus over rtmp 解复用出 OpusFrame
*/
class OpusRtmpDecoder : public RtmpCodec {
public:
using Ptr = std::shared_ptr<OpusRtmpDecoder>;
OpusRtmpDecoder(const Track::Ptr &track) : RtmpCodec(track) {}
void inputRtmp(const RtmpPacket::Ptr &rtmp) override;
protected:
void outputFrame(const char *data, size_t size, uint32_t dts, uint32_t pts);
};
/**
* Rtmp打包类
*/
class OpusRtmpEncoder : public RtmpCodec {
bool _enhanced = false;
public:
using Ptr = std::shared_ptr<OpusRtmpEncoder>;
OpusRtmpEncoder(const Track::Ptr &track);
bool inputFrame(const Frame::Ptr &frame) override;
void makeConfigPacket() override;
};
} // namespace mediakit
#endif // ZLMEDIAKIT_OPUS_RTMPCODEC_H

79
ext-codec/VP8.cpp Normal file
View File

@@ -0,0 +1,79 @@
#include "VP8.h"
#include "VP8Rtp.h"
#include "VpxRtmp.h"
#include "Extension/Factory.h"
using namespace std;
using namespace toolkit;
namespace mediakit {
bool VP8Track::inputFrame(const Frame::Ptr &frame) {
char *dataPtr = frame->data() + frame->prefixSize();
if (frame->keyFrame()) {
if (frame->size() - frame->prefixSize() < 10)
return false;
_width = ((dataPtr[7] << 8) + dataPtr[6]) & 0x3FFF;
_height = ((dataPtr[9] << 8) + dataPtr[8]) & 0x3FFF;
webm_vpx_codec_configuration_record_from_vp8(&_vpx, &_width, &_height, dataPtr, frame->size() - frame->prefixSize());
// InfoL << _width << "x" << _height;
}
return VideoTrackImp::inputFrame(frame);
}
Buffer::Ptr VP8Track::getExtraData() const {
auto ret = BufferRaw::create(8 + _vpx.codec_intialization_data_size);
ret->setSize(webm_vpx_codec_configuration_record_save(&_vpx, (uint8_t *)ret->data(), ret->getCapacity()));
return ret;
}
void VP8Track::setExtraData(const uint8_t *data, size_t size) {
webm_vpx_codec_configuration_record_load(data, size, &_vpx);
}
namespace {
CodecId getCodec() {
return CodecVP8;
}
Track::Ptr getTrackByCodecId(int sample_rate, int channels, int sample_bit) {
return std::make_shared<VP8Track>();
}
Track::Ptr getTrackBySdp(const SdpTrack::Ptr &track) {
return std::make_shared<VP8Track>();
}
RtpCodec::Ptr getRtpEncoderByCodecId(uint8_t pt) {
return std::make_shared<VP8RtpEncoder>();
}
RtpCodec::Ptr getRtpDecoderByCodecId() {
return std::make_shared<VP8RtpDecoder>();
}
RtmpCodec::Ptr getRtmpEncoderByTrack(const Track::Ptr &track) {
return std::make_shared<VpxRtmpEncoder>(track);
}
RtmpCodec::Ptr getRtmpDecoderByTrack(const Track::Ptr &track) {
return std::make_shared<VpxRtmpDecoder>(track);
}
Frame::Ptr getFrameFromPtr(const char *data, size_t bytes, uint64_t dts, uint64_t pts) {
return std::make_shared<VP8FrameNoCacheAble>((char *)data, bytes, dts, pts, 0);
}
} // namespace
CodecPlugin vp8_plugin = { getCodec,
getTrackByCodecId,
getTrackBySdp,
getRtpEncoderByCodecId,
getRtpDecoderByCodecId,
getRtmpEncoderByTrack,
getRtmpDecoderByTrack,
getFrameFromPtr };
} // namespace mediakit

49
ext-codec/VP8.h Normal file
View File

@@ -0,0 +1,49 @@
#ifndef ZLMEDIAKIT_VP8_H
#define ZLMEDIAKIT_VP8_H
#include "Extension/Frame.h"
#include "Extension/Track.h"
#include "webm-vpx.h"
namespace mediakit {
template <typename Parent>
class VP8FrameHelper : public Parent {
public:
friend class FrameImp;
//friend class toolkit::ResourcePool_l<VP8FrameHelper>;
using Ptr = std::shared_ptr<VP8FrameHelper>;
template <typename... ARGS>
VP8FrameHelper(ARGS &&...args)
: Parent(std::forward<ARGS>(args)...) {
this->_codec_id = CodecVP8;
}
bool keyFrame() const override {
auto ptr = (uint8_t *) this->data() + this->prefixSize();
return !(*ptr & 0x01);
}
bool configFrame() const override { return false; }
bool dropAble() const override { return false; }
bool decodeAble() const override { return true; }
};
/// VP8 帧类
using VP8Frame = VP8FrameHelper<FrameImp>;
using VP8FrameNoCacheAble = VP8FrameHelper<FrameFromPtr>;
class VP8Track : public VideoTrackImp {
public:
VP8Track() : VideoTrackImp(CodecVP8) {}
Track::Ptr clone() const override { return std::make_shared<VP8Track>(*this); }
bool inputFrame(const Frame::Ptr &frame) override;
toolkit::Buffer::Ptr getExtraData() const override;
void setExtraData(const uint8_t *data, size_t size) override;
private:
webm_vpx_t _vpx {};
};
} // namespace mediakit
#endif

356
ext-codec/VP8Rtp.cpp Normal file
View File

@@ -0,0 +1,356 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#include "VP8Rtp.h"
#include "Extension/Frame.h"
#include "Common/config.h"
namespace mediakit{
const int16_t kNoPictureId = -1;
const int8_t kNoTl0PicIdx = -1;
const uint8_t kNoTemporalIdx = 0xFF;
const int kNoKeyIdx = -1;
// internal bits
constexpr int kXBit = 0x80;
constexpr int kNBit = 0x20;
constexpr int kSBit = 0x10;
constexpr int kKeyIdxField = 0x1F;
constexpr int kIBit = 0x80;
constexpr int kLBit = 0x40;
constexpr int kTBit = 0x20;
constexpr int kKBit = 0x10;
constexpr int kYBit = 0x20;
constexpr int kFailedToParse = 0;
// VP8 payload descriptor
// https://datatracker.ietf.org/doc/html/rfc7741#section-4.2
//
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |X|R|N|S|R| PID | (REQUIRED)
// +-+-+-+-+-+-+-+-+
// X: |I|L|T|K| RSV | (OPTIONAL)
// +-+-+-+-+-+-+-+-+
// I: |M| PictureID | (OPTIONAL)
// +-+-+-+-+-+-+-+-+
// | PictureID |
// +-+-+-+-+-+-+-+-+
// L: | TL0PICIDX | (OPTIONAL)
// +-+-+-+-+-+-+-+-+
// T/K: |TID|Y| KEYIDX | (OPTIONAL)
// +-+-+-+-+-+-+-+-+
struct RTPVideoHeaderVP8 {
void InitRTPVideoHeaderVP8();
int Size() const;
int Write(uint8_t *data, int size) const;
int Read(const uint8_t *data, int data_length);
bool isFirstPacket() const { return beginningOfPartition && partitionId == 0; }
friend bool operator!=(const RTPVideoHeaderVP8 &lhs, const RTPVideoHeaderVP8 &rhs) { return !(lhs == rhs); }
friend bool operator==(const RTPVideoHeaderVP8 &lhs, const RTPVideoHeaderVP8 &rhs) {
return lhs.nonReference == rhs.nonReference && lhs.pictureId == rhs.pictureId && lhs.tl0PicIdx == rhs.tl0PicIdx && lhs.temporalIdx == rhs.temporalIdx
&& lhs.layerSync == rhs.layerSync && lhs.keyIdx == rhs.keyIdx && lhs.partitionId == rhs.partitionId
&& lhs.beginningOfPartition == rhs.beginningOfPartition;
}
bool nonReference; // Frame is discardable.
int16_t pictureId; // Picture ID index, 15 bits;
// kNoPictureId if PictureID does not exist.
int8_t tl0PicIdx; // TL0PIC_IDX, 8 bits;
// kNoTl0PicIdx means no value provided.
uint8_t temporalIdx; // Temporal layer index, or kNoTemporalIdx.
bool layerSync; // This frame is a layer sync frame.
// Disabled if temporalIdx == kNoTemporalIdx.
int8_t keyIdx; // 5 bits; kNoKeyIdx means not used.
int8_t partitionId; // VP8 partition ID
bool beginningOfPartition; // True if this packet is the first
// in a VP8 partition. Otherwise false
};
void RTPVideoHeaderVP8::InitRTPVideoHeaderVP8() {
nonReference = false;
pictureId = kNoPictureId;
tl0PicIdx = kNoTl0PicIdx;
temporalIdx = kNoTemporalIdx;
layerSync = false;
keyIdx = kNoKeyIdx;
partitionId = 0;
beginningOfPartition = false;
}
int RTPVideoHeaderVP8::Size() const {
bool tid_present = this->temporalIdx != kNoTemporalIdx;
bool keyid_present = this->keyIdx != kNoKeyIdx;
bool tl0_pid_present = this->tl0PicIdx != kNoTl0PicIdx;
bool pid_present = this->pictureId != kNoPictureId;
int ret = 2;
if (pid_present)
ret += 2;
if (tl0_pid_present)
ret++;
if (tid_present || keyid_present)
ret++;
return ret == 2 ? 1 : ret;
}
int RTPVideoHeaderVP8::Write(uint8_t *data, int size) const {
int ret = 0;
bool tid_present = this->temporalIdx != kNoTemporalIdx;
bool keyid_present = this->keyIdx != kNoKeyIdx;
bool tl0_pid_present = this->tl0PicIdx != kNoTl0PicIdx;
bool pid_present = this->pictureId != kNoPictureId;
uint8_t x_field = 0;
if (pid_present)
x_field |= kIBit;
if (tl0_pid_present)
x_field |= kLBit;
if (tid_present)
x_field |= kTBit;
if (keyid_present)
x_field |= kKBit;
uint8_t flags = 0;
if (x_field != 0)
flags |= kXBit;
if (this->nonReference)
flags |= kNBit;
// Create header as first packet in the frame. NextPacket() will clear it
// after first use.
flags |= kSBit;
data[ret++] = flags;
if (x_field == 0) {
return ret;
}
data[ret++] = x_field;
if (pid_present) {
const uint16_t pic_id = static_cast<uint16_t>(this->pictureId);
data[ret++] = (0x80 | ((pic_id >> 8) & 0x7F));
data[ret++] = (pic_id & 0xFF);
}
if (tl0_pid_present) {
data[ret++] = this->tl0PicIdx;
}
if (tid_present || keyid_present) {
uint8_t data_field = 0;
if (tid_present) {
data_field |= this->temporalIdx << 6;
if (this->layerSync)
data_field |= kYBit;
}
if (keyid_present) {
data_field |= (this->keyIdx & kKeyIdxField);
}
data[ret++] = data_field;
}
return ret;
}
int RTPVideoHeaderVP8::Read(const uint8_t *data, int data_length) {
// RTC_DCHECK_GT(data_length, 0);
int parsed_bytes = 0;
// Parse mandatory first byte of payload descriptor.
bool extension = (*data & 0x80) ? true : false; // X bit
this->nonReference = (*data & 0x20) ? true : false; // N bit
this->beginningOfPartition = (*data & 0x10) ? true : false; // S bit
this->partitionId = (*data & 0x07); // PID field
data++;
parsed_bytes++;
data_length--;
if (!extension)
return parsed_bytes;
if (data_length == 0)
return kFailedToParse;
// Optional X field is present.
bool has_picture_id = (*data & 0x80) ? true : false; // I bit
bool has_tl0_pic_idx = (*data & 0x40) ? true : false; // L bit
bool has_tid = (*data & 0x20) ? true : false; // T bit
bool has_key_idx = (*data & 0x10) ? true : false; // K bit
// Advance data and decrease remaining payload size.
data++;
parsed_bytes++;
data_length--;
if (has_picture_id) {
if (data_length == 0)
return kFailedToParse;
this->pictureId = (*data & 0x7F);
if (*data & 0x80) {
data++;
parsed_bytes++;
if (--data_length == 0)
return kFailedToParse;
// PictureId is 15 bits
this->pictureId = (this->pictureId << 8) + *data;
}
data++;
parsed_bytes++;
data_length--;
}
if (has_tl0_pic_idx) {
if (data_length == 0)
return kFailedToParse;
this->tl0PicIdx = *data;
data++;
parsed_bytes++;
data_length--;
}
if (has_tid || has_key_idx) {
if (data_length == 0)
return kFailedToParse;
if (has_tid) {
this->temporalIdx = ((*data >> 6) & 0x03);
this->layerSync = (*data & 0x20) ? true : false; // Y bit
}
if (has_key_idx) {
this->keyIdx = *data & 0x1F;
}
data++;
parsed_bytes++;
data_length--;
}
return parsed_bytes;
}
/////////////////////////////////////////////////
// VP8RtpDecoder
VP8RtpDecoder::VP8RtpDecoder() {
obtainFrame();
}
void VP8RtpDecoder::obtainFrame() {
_frame = FrameImp::create<VP8Frame>();
}
bool VP8RtpDecoder::inputRtp(const RtpPacket::Ptr &rtp, bool key_pos) {
auto seq = rtp->getSeq();
bool ret = decodeRtp(rtp);
if (!_gop_dropped && seq != (uint16_t)(_last_seq + 1) && _last_seq) {
_gop_dropped = true;
WarnL << "start drop vp8 gop, last seq:" << _last_seq << ", rtp:\r\n" << rtp->dumpString();
}
_last_seq = seq;
return ret;
}
bool VP8RtpDecoder::decodeRtp(const RtpPacket::Ptr &rtp) {
auto payload_size = rtp->getPayloadSize();
if (payload_size <= 0) {
// No actual payload
return false;
}
auto payload = rtp->getPayload();
auto stamp = rtp->getStampMS();
auto seq = rtp->getSeq();
RTPVideoHeaderVP8 info;
int offset = info.Read(payload, payload_size);
if (!offset) {
//_frame_drop = true;
return false;
}
bool start = info.isFirstPacket();
if (start) {
_frame->_pts = stamp;
_frame->_buffer.clear();
_frame_drop = false;
}
if (_frame_drop) {
// This frame is incomplete
return false;
}
if (!start && seq != (uint16_t)(_last_seq + 1)) {
// 中间的或末尾的rtp包其seq必须连续否则说明rtp丢包那么该帧不完整必须得丢弃
_frame_drop = true;
_frame->_buffer.clear();
return false;
}
// Append data
_frame->_buffer.append((char *)payload + offset, payload_size - offset);
bool end = rtp->getHeader()->mark;
if (end) {
// 确保下一次fu必须收到第一个包
_frame_drop = true;
// 该帧最后一个rtp包,输出frame [AUTO-TRANSLATED:a648aaa5]
// The last rtp packet of this frame, output frame
outputFrame(rtp);
}
return (info.isFirstPacket() && (payload[offset] & 0x01) == 0);
}
void VP8RtpDecoder::outputFrame(const RtpPacket::Ptr &rtp) {
if (_frame->dropAble()) {
// 不参与dts生成 [AUTO-TRANSLATED:dff3b747]
// Not involved in dts generation
_frame->_dts = _frame->_pts;
} else {
// rtsp没有dts那么根据pts排序算法生成dts [AUTO-TRANSLATED:f37c17f3]
// Rtsp does not have dts, so dts is generated according to the pts sorting algorithm
_dts_generator.getDts(_frame->_pts, _frame->_dts);
}
if (_frame->keyFrame() && _gop_dropped) {
_gop_dropped = false;
InfoL << "new gop received, rtp:\r\n" << rtp->dumpString();
}
if (!_gop_dropped || _frame->configFrame()) {
RtpCodec::inputFrame(_frame);
}
obtainFrame();
}
////////////////////////////////////////////////////////////////////////
bool VP8RtpEncoder::inputFrame(const Frame::Ptr &frame) {
RTPVideoHeaderVP8 info;
info.InitRTPVideoHeaderVP8();
info.beginningOfPartition = true;
info.nonReference = !frame->dropAble();
uint8_t header[20];
int header_size = info.Write(header, sizeof(header));
int pdu_size = getRtpInfo().getMaxSize() - header_size;
const char *ptr = frame->data() + frame->prefixSize();
size_t len = frame->size() - frame->prefixSize();
bool key = frame->keyFrame();
bool mark = false;
for (size_t pos = 0; pos < len; pos += pdu_size) {
if (static_cast<int>(len - pos) <= pdu_size) {
pdu_size = len - pos;
mark = true;
}
auto rtp = getRtpInfo().makeRtp(TrackVideo, nullptr, pdu_size + header_size, mark, frame->pts());
if (rtp) {
uint8_t *payload = rtp->getPayload();
memcpy(payload, header, header_size);
memcpy(payload + header_size, ptr + pos, pdu_size);
RtpCodec::inputRtp(rtp, key);
}
key = false;
header[0] &= (~kSBit); // Clear 'Start of partition' bit.
}
return true;
}
} // namespace mediakit

63
ext-codec/VP8Rtp.h Normal file
View File

@@ -0,0 +1,63 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#ifndef ZLMEDIAKIT_VP8RTPCODEC_H
#define ZLMEDIAKIT_VP8RTPCODEC_H
#include "VP8.h"
// for DtsGenerator
#include "Common/Stamp.h"
#include "Rtsp/RtpCodec.h"
namespace mediakit {
/**
* vp8 rtp解码类
* 将 vp8 over rtsp-rtp 解复用出 VP8Frame
*/
class VP8RtpDecoder : public RtpCodec {
public:
using Ptr = std::shared_ptr<VP8RtpDecoder>;
VP8RtpDecoder();
/**
* 输入vp8 rtp包
* @param rtp rtp包
* @param key_pos 此参数忽略之
*/
bool inputRtp(const RtpPacket::Ptr &rtp, bool key_pos = true) override;
private:
bool decodeRtp(const RtpPacket::Ptr &rtp);
void outputFrame(const RtpPacket::Ptr &rtp);
void obtainFrame();
private:
bool _gop_dropped = false;
bool _frame_drop = true;
uint16_t _last_seq = 0;
VP8Frame::Ptr _frame;
DtsGenerator _dts_generator;
};
/**
* vp8 rtp打包类
*/
class VP8RtpEncoder : public RtpCodec {
public:
using Ptr = std::shared_ptr<VP8RtpEncoder>;
bool inputFrame(const Frame::Ptr &frame) override;
};
}//namespace mediakit
#endif //ZLMEDIAKIT_VP8RTPCODEC_H

76
ext-codec/VP9.cpp Normal file
View File

@@ -0,0 +1,76 @@
#include "VP9.h"
#include "VP9Rtp.h"
#include "VpxRtmp.h"
#include "Extension/Factory.h"
using namespace std;
using namespace toolkit;
namespace mediakit {
bool VP9Track::inputFrame(const Frame::Ptr &frame) {
char *dataPtr = frame->data() + frame->prefixSize();
if (frame->keyFrame()) {
if (frame->size() - frame->prefixSize() < 10)
return false;
webm_vpx_codec_configuration_record_from_vp9(&_vpx, &_width, &_height, dataPtr, frame->size() - frame->prefixSize());
}
return VideoTrackImp::inputFrame(frame);
}
Buffer::Ptr VP9Track::getExtraData() const {
auto ret = BufferRaw::create(8 + _vpx.codec_intialization_data_size);
ret->setSize(webm_vpx_codec_configuration_record_save(&_vpx, (uint8_t *)ret->data(), ret->getCapacity()));
return ret;
}
void VP9Track::setExtraData(const uint8_t *data, size_t size) {
webm_vpx_codec_configuration_record_load(data, size, &_vpx);
}
namespace {
CodecId getCodec() {
return CodecVP9;
}
Track::Ptr getTrackByCodecId(int sample_rate, int channels, int sample_bit) {
return std::make_shared<VP9Track>();
}
Track::Ptr getTrackBySdp(const SdpTrack::Ptr &track) {
return std::make_shared<VP9Track>();
}
RtpCodec::Ptr getRtpEncoderByCodecId(uint8_t pt) {
return std::make_shared<VP9RtpEncoder>();
}
RtpCodec::Ptr getRtpDecoderByCodecId() {
return std::make_shared<VP9RtpDecoder>();
}
RtmpCodec::Ptr getRtmpEncoderByTrack(const Track::Ptr &track) {
return std::make_shared<VpxRtmpEncoder>(track);
}
RtmpCodec::Ptr getRtmpDecoderByTrack(const Track::Ptr &track) {
return std::make_shared<VpxRtmpDecoder>(track);
}
Frame::Ptr getFrameFromPtr(const char *data, size_t bytes, uint64_t dts, uint64_t pts) {
return std::make_shared<VP9FrameNoCacheAble>((char *)data, bytes, dts, pts, 0);
}
} // namespace
CodecPlugin vp9_plugin = { getCodec,
getTrackByCodecId,
getTrackBySdp,
getRtpEncoderByCodecId,
getRtpDecoderByCodecId,
getRtmpEncoderByTrack,
getRtmpDecoderByTrack,
getFrameFromPtr };
} // namespace mediakit

49
ext-codec/VP9.h Normal file
View File

@@ -0,0 +1,49 @@
#ifndef ZLMEDIAKIT_VP9_H
#define ZLMEDIAKIT_VP9_H
#include "Extension/Frame.h"
#include "Extension/Track.h"
#include "webm-vpx.h"
namespace mediakit {
template <typename Parent>
class VP9FrameHelper : public Parent {
public:
friend class FrameImp;
//friend class toolkit::ResourcePool_l<VP9FrameHelper>;
using Ptr = std::shared_ptr<VP9FrameHelper>;
template <typename... ARGS>
VP9FrameHelper(ARGS &&...args)
: Parent(std::forward<ARGS>(args)...) {
this->_codec_id = CodecVP9;
}
bool keyFrame() const override {
auto ptr = (uint8_t *) this->data() + this->prefixSize();
return (*ptr & 0x80);
}
bool configFrame() const override { return false; }
bool dropAble() const override { return false; }
bool decodeAble() const override { return true; }
};
/// VP9 帧类
using VP9Frame = VP9FrameHelper<FrameImp>;
using VP9FrameNoCacheAble = VP9FrameHelper<FrameFromPtr>;
class VP9Track : public VideoTrackImp {
public:
VP9Track() : VideoTrackImp(CodecVP9) {};
Track::Ptr clone() const override { return std::make_shared<VP9Track>(*this); }
bool inputFrame(const Frame::Ptr &frame) override;
toolkit::Buffer::Ptr getExtraData() const override;
void setExtraData(const uint8_t *data, size_t size) override;
private:
webm_vpx_t _vpx {};
};
} // namespace mediakit
#endif

342
ext-codec/VP9Rtp.cpp Normal file
View File

@@ -0,0 +1,342 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#include "VP9Rtp.h"
#include "Extension/Frame.h"
#include "Common/config.h"
namespace mediakit{
const int16_t kNoPictureId = -1;
const int8_t kNoTl0PicIdx = -1;
const uint8_t kNoTemporalIdx = 0xFF;
const int kNoKeyIdx = -1;
struct VP9ResolutionLayer {
int width;
int height;
};
struct RTPPayloadVP9 {
bool hasPictureID = false;
bool interPicturePrediction = false;
bool hasLayerIndices = false;
bool flexibleMode = false;
bool beginningOfLayerFrame = false;
bool endingOfLayerFrame = false;
bool hasScalabilityStructure = false;
bool largePictureID = false;
int pictureID = -1;
int temporalID = -1;
bool isSwitchingUp = false;
int spatialID = -1;
bool isInterLayeredDepUsed = false;
int tl0PicIdx = -1;
int referenceIdx = -1;
bool additionalReferenceIdx = false;
int spatialLayers = -1;
bool hasResolution = false;
bool hasGof = false;
int numberOfFramesInGof = -1;
std::vector<VP9ResolutionLayer> resolutions;
int parse(unsigned char* data, int dataLength);
bool keyFrame() const { return beginningOfLayerFrame && !interPicturePrediction; }
std::string dump() const {
char line[64] = {0};
snprintf(line, sizeof(line), "%c%c%c%c%c%c%c- %d %d, %d %d",
hasPictureID ? 'I' : ' ',
interPicturePrediction ? 'P' : ' ',
hasLayerIndices ? 'L' : ' ',
flexibleMode ? 'F' : ' ',
beginningOfLayerFrame ? 'B' : ' ',
endingOfLayerFrame ? 'E' : ' ',
hasScalabilityStructure ? 'V' : ' ',
pictureID, tl0PicIdx,
spatialID, temporalID);
return line;
}
};
//
// VP9 format:
//
// Payload descriptor (Flexible mode F = 1)
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |I|P|L|F|B|E|V|-| (REQUIRED)
// +-+-+-+-+-+-+-+-+
// I: |M| PICTURE ID | (REQUIRED)
// +-+-+-+-+-+-+-+-+
// M: | EXTENDED PID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED)
// +-+-+-+-+-+-+-+-+ -
// P,F: | P_DIFF |N| (CONDITIONALLY REQUIRED) - up to 3 times
// +-+-+-+-+-+-+-+-+ -
// V: | SS |
// | .. |
// +-+-+-+-+-+-+-+-+
//
// Payload descriptor (Non flexible mode F = 0)
//
// 0 1 2 3 4 5 6 7
// +-+-+-+-+-+-+-+-+
// |I|P|L|F|B|E|V|-| (REQUIRED)
// +-+-+-+-+-+-+-+-+
// I: |M| PICTURE ID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// M: | EXTENDED PID | (RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// L: | T |U| S |D| (CONDITIONALLY RECOMMENDED)
// +-+-+-+-+-+-+-+-+
// | TL0PICIDX | (CONDITIONALLY REQUIRED)
// +-+-+-+-+-+-+-+-+
// V: | SS |
// | .. |
// +-+-+-+-+-+-+-+-+
#define kIBit 0x80
#define kPBit 0x40
#define kLBit 0x20
#define kFBit 0x10
#define kBBit 0x08
#define kEBit 0x04
#define kVBit 0x02
int RTPPayloadVP9::parse(unsigned char *data, int dataLength) {
const unsigned char* dataPtr = data;
const unsigned char* dataEnd = data + dataLength;
#define VP9_CHECK_BOUNDS(n) do { if (dataPtr + (n) > dataEnd) return -1; } while (0)
// Parse mandatory first byte of payload descriptor
VP9_CHECK_BOUNDS(1);
this->hasPictureID = (*dataPtr & kIBit); // I bit
this->interPicturePrediction = (*dataPtr & kPBit); // P bit
this->hasLayerIndices = (*dataPtr & kLBit); // L bit
this->flexibleMode = (*dataPtr & kFBit); // F bit
this->beginningOfLayerFrame = (*dataPtr & kBBit); // B bit
this->endingOfLayerFrame = (*dataPtr & kEBit); // E bit
this->hasScalabilityStructure = (*dataPtr & kVBit); // V bit
dataPtr++;
if (this->hasPictureID) {
VP9_CHECK_BOUNDS(1);
this->largePictureID = (*dataPtr & 0x80); // M bit
this->pictureID = (*dataPtr & 0x7F);
if (this->largePictureID) {
dataPtr++;
VP9_CHECK_BOUNDS(1);
this->pictureID = ntohs((this->pictureID << 16) + (*dataPtr & 0xFF));
}
dataPtr++;
}
if (this->hasLayerIndices) {
VP9_CHECK_BOUNDS(1);
this->temporalID = (*dataPtr & 0xE0) >> 5; // T bits
this->isSwitchingUp = (*dataPtr & 0x10); // U bit
this->spatialID = (*dataPtr & 0x0E) >> 1; // S bits
this->isInterLayeredDepUsed = (*dataPtr & 0x01); // D bit
if (this->flexibleMode) { // marked in webrtc code
do {
dataPtr++;
VP9_CHECK_BOUNDS(1);
this->referenceIdx = (*dataPtr & 0xFE) >> 1;
this->additionalReferenceIdx = (*dataPtr & 0x01); // D bit
} while (this->additionalReferenceIdx);
} else {
dataPtr++;
VP9_CHECK_BOUNDS(1);
this->tl0PicIdx = (*dataPtr & 0xFF);
}
dataPtr++;
}
if (this->flexibleMode && this->interPicturePrediction) {
/* Skip reference indices */
uint8_t nbit;
do {
VP9_CHECK_BOUNDS(1);
uint8_t p_diff = (*dataPtr & 0xFE) >> 1;
nbit = (*dataPtr & 0x01);
dataPtr++;
} while (nbit);
}
if (this->hasScalabilityStructure) {
VP9_CHECK_BOUNDS(1);
this->spatialLayers = (*dataPtr & 0xE0) >> 5; // N_S bits
this->hasResolution = (*dataPtr & 0x10); // Y bit
this->hasGof = (*dataPtr & 0x08); // G bit
dataPtr++;
if (this->hasResolution) {
for (int i = 0; i <= this->spatialLayers; i++) {
VP9_CHECK_BOUNDS(4);
int width = (dataPtr[0] << 8) + dataPtr[1];
dataPtr += 2;
int height = (dataPtr[0] << 8) + dataPtr[1];
dataPtr += 2;
// InfoL << "got vp9 " << width << "x" << height;
this->resolutions.push_back({ width, height });
}
}
if (this->hasGof) {
VP9_CHECK_BOUNDS(1);
this->numberOfFramesInGof = *dataPtr & 0xFF; // N_G bits
dataPtr++;
for (int frame_index = 0; frame_index < this->numberOfFramesInGof; frame_index++) {
// TODO(javierc): Read these values if needed
VP9_CHECK_BOUNDS(1);
int reference_indices = (*dataPtr & 0x0C) >> 2; // R bits
dataPtr++;
VP9_CHECK_BOUNDS(reference_indices);
for (int reference_index = 0; reference_index < reference_indices; reference_index++) {
dataPtr++;
}
}
}
}
#undef VP9_CHECK_BOUNDS
return dataPtr - data;
}
////////////////////////////////////////////////////
VP9RtpDecoder::VP9RtpDecoder() {
obtainFrame();
}
void VP9RtpDecoder::obtainFrame() {
_frame = FrameImp::create<VP9Frame>();
}
bool VP9RtpDecoder::inputRtp(const RtpPacket::Ptr &rtp, bool key_pos) {
auto seq = rtp->getSeq();
bool is_gop = decodeRtp(rtp);
if (!_gop_dropped && seq != (uint16_t)(_last_seq + 1) && _last_seq) {
_gop_dropped = true;
WarnL << "start drop VP9 gop, last seq:" << _last_seq << ", rtp:\r\n" << rtp->dumpString();
}
_last_seq = seq;
return is_gop;
}
bool VP9RtpDecoder::decodeRtp(const RtpPacket::Ptr &rtp) {
auto payload_size = rtp->getPayloadSize();
if (payload_size < 1) {
// No actual payload
return false;
}
auto payload = rtp->getPayload();
auto stamp = rtp->getStampMS();
auto seq = rtp->getSeq();
RTPPayloadVP9 info;
int offset = info.parse(payload, payload_size);
if (offset < 0) {
WarnL << "VP9 RTP payload parse failed, seq:" << seq;
return false;
}
// InfoL << rtp->dumpString() << "\n" << info.dump();
bool start = info.beginningOfLayerFrame;
if (start) {
_frame->_pts = stamp;
_frame->_buffer.clear();
_frame_drop = false;
}
if (_frame_drop) {
// This frame is incomplete
return false;
}
if (!start && seq != (uint16_t)(_last_seq + 1)) {
// 中间的或末尾的rtp包其seq必须连续否则说明rtp丢包那么该帧不完整必须得丢弃
_frame_drop = true;
_frame->_buffer.clear();
return false;
}
// Append data
_frame->_buffer.append((char *)payload + offset, payload_size - offset);
if (info.endingOfLayerFrame) { // rtp->getHeader()->mark
// 确保下一个包必须是beginningOfLayerFrame
_frame_drop = true;
// 该帧最后一个rtp包,输出frame
outputFrame(rtp);
}
return info.keyFrame();
}
void VP9RtpDecoder::outputFrame(const RtpPacket::Ptr &rtp) {
if (_frame->dropAble()) {
// 不参与dts生成 [AUTO-TRANSLATED:dff3b747]
// Not involved in dts generation
_frame->_dts = _frame->_pts;
} else {
// rtsp没有dts那么根据pts排序算法生成dts [AUTO-TRANSLATED:f37c17f3]
// Rtsp does not have dts, so dts is generated according to the pts sorting algorithm
_dts_generator.getDts(_frame->_pts, _frame->_dts);
}
if (_frame->keyFrame() && _gop_dropped) {
_gop_dropped = false;
InfoL << "new gop received, rtp:\r\n" << rtp->dumpString();
}
if (!_gop_dropped || _frame->configFrame()) {
// InfoL << _frame->pts() << " size=" << _frame->size();
RtpCodec::inputFrame(_frame);
}
obtainFrame();
}
////////////////////////////////////////////////////////////////////////
bool VP9RtpEncoder::inputFrame(const Frame::Ptr &frame) {
uint8_t header[20] = { 0 };
int nheader = 1;
header[0] = kBBit;
bool key = frame->keyFrame();
if (!key)
header[0] |= kPBit;
#if 1
header[0] |= kIBit;
if (++_pic_id > 0x7FFF) {
_pic_id = 0;
}
header[1] = (0x80 | ((_pic_id >> 8) & 0x7F));
header[2] = (_pic_id & 0xFF);
nheader += 2;
#endif
const char *ptr = frame->data() + frame->prefixSize();
int len = frame->size() - frame->prefixSize();
int pdu_size = getRtpInfo().getMaxSize() - nheader;
bool mark = false;
for (int pos = 0; pos < len; pos += pdu_size) {
if (len - pos <= pdu_size) {
pdu_size = len - pos;
header[0] |= kEBit;
mark = true;
}
auto rtp = getRtpInfo().makeRtp(TrackVideo, nullptr, pdu_size + nheader, mark, frame->pts());
if (rtp) {
uint8_t *payload = rtp->getPayload();
memcpy(payload, header, nheader);
memcpy(payload + nheader, ptr + pos, pdu_size);
RtpCodec::inputRtp(rtp, key);
}
key = false;
header[0] &= (~kBBit); // Clear 'Begin of partition' bit.
}
return true;
}
} // namespace mediakit

64
ext-codec/VP9Rtp.h Normal file
View File

@@ -0,0 +1,64 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#ifndef ZLMEDIAKIT_VP9RTPCODEC_H
#define ZLMEDIAKIT_VP9RTPCODEC_H
#include "VP9.h"
// for DtsGenerator
#include "Common/Stamp.h"
#include "Rtsp/RtpCodec.h"
namespace mediakit {
/**
* VP9 rtp解码类
* 将 VP9 over rtsp-rtp 解复用出 VP9Frame
*/
class VP9RtpDecoder : public RtpCodec {
public:
using Ptr = std::shared_ptr<VP9RtpDecoder>;
VP9RtpDecoder();
/**
* 输入VP9 rtp包
* @param rtp rtp包
* @param key_pos 此参数忽略之
*/
bool inputRtp(const RtpPacket::Ptr &rtp, bool key_pos = true) override;
private:
bool decodeRtp(const RtpPacket::Ptr &rtp);
void outputFrame(const RtpPacket::Ptr &rtp);
void obtainFrame();
private:
bool _gop_dropped = false;
bool _frame_drop = true;
uint16_t _last_seq = 0;
VP9Frame::Ptr _frame;
DtsGenerator _dts_generator;
};
/**
* VP9 rtp打包类
*/
class VP9RtpEncoder : public RtpCodec {
public:
using Ptr = std::shared_ptr<VP9RtpEncoder>;
bool inputFrame(const Frame::Ptr &frame) override;
private:
uint16_t _pic_id = 0;
};
}//namespace mediakit
#endif //ZLMEDIAKIT_VP9RTPCODEC_H

153
ext-codec/VpxRtmp.cpp Normal file
View File

@@ -0,0 +1,153 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#include "VpxRtmp.h"
#include "Rtmp/utils.h"
#include "Common/config.h"
#include "Extension/Factory.h"
using namespace std;
using namespace toolkit;
namespace mediakit {
void VpxRtmpDecoder::inputRtmp(const RtmpPacket::Ptr &pkt) {
if (_info.codec == CodecInvalid) {
// First, determine if it is an enhanced rtmp
parseVideoRtmpPacket((uint8_t *)pkt->data(), pkt->size(), &_info);
}
if (_info.is_enhanced) {
// Enhanced rtmp
parseVideoRtmpPacket((uint8_t *)pkt->data(), pkt->size(), &_info);
if (!_info.is_enhanced || _info.codec != getTrack()->getCodecId()) {
throw std::invalid_argument("Invalid enhanced-rtmp packet!");
}
auto data = (uint8_t *)pkt->data() + RtmpPacketInfo::kEnhancedRtmpHeaderSize;
auto size = pkt->size() - RtmpPacketInfo::kEnhancedRtmpHeaderSize;
switch (_info.video.pkt_type) {
case RtmpPacketType::PacketTypeSequenceStart: {
getTrack()->setExtraData(data, size);
break;
}
case RtmpPacketType::PacketTypeCodedFramesX:
case RtmpPacketType::PacketTypeCodedFrames: {
auto pts = pkt->time_stamp;
if (RtmpPacketType::PacketTypeCodedFrames == _info.video.pkt_type) {
CHECK_RET(size > 3);
// SI24 = [CompositionTime Offset]
int32_t cts = (load_be24(data) + 0xff800000) ^ 0xff800000;
pts += cts;
data += 3;
size -= 3;
}
outputFrame((char*)data, size, pkt->time_stamp, pts);
break;
}
default:
WarnL << "Unknown pkt_type: " << (int)_info.video.pkt_type;
break;
}
} else {
CHECK_RET(pkt->size() > 5);
uint8_t *cts_ptr = (uint8_t *)(pkt->buffer.data() + 2);
int32_t cts = (load_be24(cts_ptr) + 0xff800000) ^ 0xff800000;
// 国内扩展(12) Vpx rtmp
if (pkt->isConfigFrame()) {
getTrack()->setExtraData((uint8_t *)pkt->data() + 5, pkt->size() - 5);
} else {
outputFrame(pkt->data() + 5, pkt->size() - 5, pkt->time_stamp, pkt->time_stamp + cts);
}
}
}
void VpxRtmpDecoder::outputFrame(const char *data, size_t size, uint32_t dts, uint32_t pts) {
RtmpCodec::inputFrame(Factory::getFrameFromPtr(getTrack()->getCodecId(), data, size, dts, pts));
}
////////////////////////////////////////////////////////////////////////
VpxRtmpEncoder::VpxRtmpEncoder(const Track::Ptr &track) : RtmpCodec(track) {
_enhanced = mINI::Instance()[Rtmp::kEnhanced];
}
bool VpxRtmpEncoder::inputFrame(const Frame::Ptr &frame) {
auto packet = RtmpPacket::create();
packet->buffer.resize(8 + frame->size());
char *buff = packet->data();
int32_t cts = frame->pts() - frame->dts();
if (_enhanced) {
auto header = (RtmpVideoHeaderEnhanced *)buff;
header->enhanced = 1;
header->frame_type = frame->keyFrame() ? (int)RtmpFrameType::key_frame : (int)RtmpFrameType::inter_frame;
header->fourcc = htonl(getCodecFourCC(frame->getCodecId()));
buff += RtmpPacketInfo::kEnhancedRtmpHeaderSize;
if (cts) {
header->pkt_type = (uint8_t)RtmpPacketType::PacketTypeCodedFrames;
set_be24(buff, cts);
buff += 3;
} else {
header->pkt_type = (uint8_t)RtmpPacketType::PacketTypeCodedFramesX;
}
} else {
// flags
uint8_t flags = getCodecFlags(frame->getCodecId());
flags |= (uint8_t)(frame->keyFrame() ? RtmpFrameType::key_frame : RtmpFrameType::inter_frame) << 4;
buff[0] = flags;
buff[1] = (uint8_t)RtmpH264PacketType::h264_nalu;
// cts
set_be24(&buff[2], cts);
buff += 5;
}
packet->time_stamp = frame->dts();
memcpy(buff, frame->data(), frame->size());
buff += frame->size();
packet->body_size = buff - packet->data();
packet->chunk_id = CHUNK_VIDEO;
packet->stream_index = STREAM_MEDIA;
packet->type_id = MSG_VIDEO;
// Output rtmp packet
RtmpCodec::inputRtmp(packet);
return true;
}
void VpxRtmpEncoder::makeConfigPacket() {
auto extra_data = getTrack()->getExtraData();
if (!extra_data || !extra_data->size())
return;
auto pkt = RtmpPacket::create();
pkt->body_size = 5 + extra_data->size();
pkt->buffer.resize(pkt->body_size);
auto buff = pkt->buffer.data();
if (_enhanced) {
auto header = (RtmpVideoHeaderEnhanced *)buff;
header->enhanced = 1;
header->pkt_type = (int)RtmpPacketType::PacketTypeSequenceStart;
header->frame_type = (int)RtmpFrameType::key_frame;
header->fourcc = htonl(getCodecFourCC(getTrack()->getCodecId()));
} else {
uint8_t flags = getCodecFlags(getTrack()->getCodecId());
flags |= ((uint8_t)RtmpFrameType::key_frame << 4);
buff[0] = flags;
buff[1] = (uint8_t)RtmpH264PacketType::h264_config_header;
// cts
memset(buff + 2, 0, 3);
}
memcpy(buff+5, extra_data->data(), extra_data->size());
pkt->chunk_id = CHUNK_VIDEO;
pkt->stream_index = STREAM_MEDIA;
pkt->time_stamp = 0;
pkt->type_id = MSG_VIDEO;
RtmpCodec::inputRtmp(pkt);
}
} // namespace mediakit

54
ext-codec/VpxRtmp.h Normal file
View File

@@ -0,0 +1,54 @@
/*
* Copyright (c) 2016-present The ZLMediaKit project authors. All Rights Reserved.
*
* This file is part of ZLMediaKit(https://github.com/ZLMediaKit/ZLMediaKit).
*
* Use of this source code is governed by MIT-like license that can be found in the
* LICENSE file in the root of the source tree. All contributing project authors
* may be found in the AUTHORS file in the root of the source tree.
*/
#ifndef ZLMEDIAKIT_VPX_RTMPCODEC_H
#define ZLMEDIAKIT_VPX_RTMPCODEC_H
#include "Rtmp/RtmpCodec.h"
#include "Extension/Track.h"
namespace mediakit {
/**
* Rtmp解码类
* 将 Vpx over rtmp 解复用出 VpxFrame
*/
class VpxRtmpDecoder : public RtmpCodec {
public:
using Ptr = std::shared_ptr<VpxRtmpDecoder>;
VpxRtmpDecoder(const Track::Ptr &track) : RtmpCodec(track) {}
void inputRtmp(const RtmpPacket::Ptr &rtmp) override;
protected:
void outputFrame(const char *data, size_t size, uint32_t dts, uint32_t pts);
protected:
RtmpPacketInfo _info;
};
/**
* Rtmp打包类
*/
class VpxRtmpEncoder : public RtmpCodec {
bool _enhanced = false;
public:
using Ptr = std::shared_ptr<VpxRtmpEncoder>;
VpxRtmpEncoder(const Track::Ptr &track);
bool inputFrame(const Frame::Ptr &frame) override;
void makeConfigPacket() override;
};
} // namespace mediakit
#endif // ZLMEDIAKIT_VPX_RTMPCODEC_H