add VideoDecoder/Encoder for blocky CLI tool

This commit is contained in:
falsycat 2022-07-04 17:50:55 +09:00
parent b88b2fe47e
commit c9f19b960f
4 changed files with 499 additions and 235 deletions

View File

@ -7,9 +7,13 @@ target_sources(blocky
bytes.hh bytes.hh
common.hh common.hh
video_encoder.hh
video_decoder.hh
) )
target_link_libraries(blocky target_link_libraries(blocky
PUBLIC PUBLIC
args args
liblocky liblocky
minimp4
openh264
) )

View File

@ -2,6 +2,7 @@
#include <exception> #include <exception>
#include <iomanip> #include <iomanip>
#include <iostream> #include <iostream>
#include <optional>
#include <tuple> #include <tuple>
#include "common.hh" #include "common.hh"
@ -10,267 +11,278 @@
#include <args.hxx> #include <args.hxx>
#include "video_decoder.hh"
#include "video_encoder.hh"
using namespace blky; using namespace blky;
int main(int argc, char** argv) {
args::ArgumentParser parser(
"liblocky command line tool",
"liblocky allow you to embed bits into video data secretly");
args::MapFlag<std::string, DataFlow> from { args::ArgumentParser parser(
parser, kDataFlowList, "input layer specifier", {"from"}, kDataFlowMap, args::Options::Required}; "liblocky command line tool",
"liblocky allow you to embed a bit array into video data secretly");
args::MapFlag<std::string, DataFlow> to { args::MapFlag<std::string, DataFlow> from {
parser, kDataFlowList, "output layer specifier", {"to"}, kDataFlowMap, args::Options::Required}; parser, kDataFlowList, "input layer specifier", {"from"}, kDataFlowMap, args::Options::Required};
args::Group src_group { args::MapFlag<std::string, DataFlow> to {
parser, "source specifier", args::Group::Validators::Xor, args::Options::Required}; parser, kDataFlowList, "output layer specifier", {"to"}, kDataFlowMap, args::Options::Required};
args::Flag src_stdin {src_group, "src-stdin", "read from stdin", {"src-stdin", "stdin"}};
args::Flag src_stdin_hex {src_group, "src-stdin-hex", "read hex text from stdin", {"src-stdin-hex", "stdin-hex"}};
args::Group dst_group { args::Group src_group {
parser, "destination specifier", args::Group::Validators::Xor, args::Options::Required}; parser, "source specifier", args::Group::Validators::Xor, args::Options::Required};
args::Flag dst_stdout {dst_group, "dst-stdout", "write to stdout", {"dst-stdout", "stdout"}}; args::Flag src_stdin {src_group, "src-stdin", "read from stdin", {"src-stdin", "stdin"}};
args::Flag dst_stdout_hex {dst_group, "dst-stdout-hex", "write to stdout as hex text", {"dst-stdout-hex", "stdout-hex"}}; args::Flag src_stdin_hex {src_group, "src-stdin-hex", "read hex text from stdin", {"src-stdin-hex", "stdin-hex"}};
args::ValueFlag<std::string> src_video {src_group, "path", "video input", {"src-video"}};
args::Group param_group { args::Group dst_group {
parser, "general parameters", args::Group::Validators::DontCare parser, "destination specifier", args::Group::Validators::Xor, args::Options::Required};
}; args::Flag dst_stdout {dst_group, "dst-stdout", "write to stdout", {"dst-stdout", "stdout"}};
args::ValueFlag<std::tuple<uint32_t, uint32_t>> param_block_num { args::Flag dst_stdout_hex {dst_group, "dst-stdout-hex", "write to stdout as hex text", {"dst-stdout-hex", "stdout-hex"}};
param_group, args::ValueFlag<std::string> dst_video {dst_group, "path", "video output", {"dst-video"}};
"int>0,int>0",
"number of features",
{"feature-num"},
{16, 16}
};
args::ValueFlag<uint32_t> param_block_first {
param_group,
"int>=0",
"an index of first block where feature will be embedded. used when encoding",
{"feature-first-index"},
0
};
args::ValueFlag<uint8_t> param_feat_bits {
param_group,
"int>0",
"number of bits that can be represented by a single feature",
{"feature-bits"},
1
};
args::ValueFlag<uint8_t> param_seed {
param_group,
"int>0",
"seed number for hopping randomization",
{"seed"},
123
};
args::Group probgen_group { args::Group param_group {
parser, "params for feature probability generator", args::Group::Validators::DontCare parser, "general parameters", args::Group::Validators::DontCare
}; };
args::ValueFlag<double> probgen_false_positive { args::ValueFlag<std::tuple<uint32_t, uint32_t>> param_block_num {
probgen_group, param_group,
"0<=double<=1", "int>0,int>0",
"false positive ratio in feature probability generation", "number of features",
{"probgen-false-positive"}, {"feature-num"},
0 {16, 16}
}; };
args::ValueFlag<double> probgen_false_negative { args::ValueFlag<uint32_t> param_block_first {
probgen_group, param_group,
"0<=double<=1", "int>=0",
"false negative ratio in feature probability generation", "an index of first block where feature will be embedded. used when encoding",
{"probgen-false-negative"}, {"feature-first-index"},
0 0
}; };
args::ValueFlag<uint64_t> probgen_seed { args::ValueFlag<uint8_t> param_feat_bits {
probgen_group, param_group,
"int>0", "int>0",
"random seed", "number of bits that can be represented by a single feature",
{"probgen-seed"}, {"feature-bits"},
1 1
}; };
args::Flag probgen_normalize { args::ValueFlag<uint8_t> param_seed {
probgen_group, param_group,
"probgen-normalize", "int>0",
"normalize probabilities", "seed number for hopping randomization",
{"probgen-normalize"}, {"seed"},
}; 123
};
try { args::Group probgen_group {
parser.ParseCLI(argc, argv); parser, "params for feature probability generator", args::Group::Validators::DontCare
};
args::ValueFlag<double> probgen_false_positive {
probgen_group,
"0<=double<=1",
"false positive ratio in feature probability generation",
{"probgen-false-positive"},
0
};
args::ValueFlag<double> probgen_false_negative {
probgen_group,
"0<=double<=1",
"false negative ratio in feature probability generation",
{"probgen-false-negative"},
0
};
args::ValueFlag<uint64_t> probgen_seed {
probgen_group,
"int>0",
"random seed",
{"probgen-seed"},
1
};
args::Flag probgen_normalize {
probgen_group,
"probgen-normalize",
"normalize probabilities",
{"probgen-normalize"},
};
std::vector<uint8_t> bytes;
std::vector<uint32_t> features;
std::vector<double> feature_probs;
// read input std::vector<uint8_t> bytes;
std::vector<uint32_t> features;
std::vector<double> feature_probs;
std::optional<VideoDecoder> decoder;
int main(int argc, char** argv)
try {
parser.ParseCLI(argc, argv);
// read input
switch (args::get(from)) {
case kBytes:
if (src_stdin) {
std::string temp;
std::cin >> temp;
bytes = {temp.begin(), temp.end()};
} else if (src_stdin_hex) {
for (;;) {
char buf[2];
std::cin >> buf[0] >> buf[1];
if (std::cin.eof()) break;
bytes.push_back(ToHex(buf[0]) << 4 | ToHex(buf[1]));
}
} else {
throw std::runtime_error {"invalid source format for bytes"};
}
break;
case kFeatures:
if (src_stdin) {
features = ReadAll<uint32_t>(std::cin);
} else {
throw std::runtime_error {"invalid source format for features"};
}
break;
case kFeatureProbs:
if (src_stdin) {
feature_probs = ReadAll<double>(std::cin);
} else {
throw std::runtime_error {"invalid source format for feature probs"};
}
break;
case kVideo:
if (src_video) {
decoder.emplace(args::get(src_video));
} else {
throw std::runtime_error {"invalid source format for video"};
}
}
if (args::get(from) < args::get(to)) {
// execute encoding
switch (args::get(from)) { switch (args::get(from)) {
case kBytes: case kBytes:
if (src_stdin) { if (args::get(to) == kBytes) break;
std::string temp; features = BytesEncoder(
std::cin >> temp; bytes,
bytes = {temp.begin(), temp.end()}; args::get(param_block_num),
} else if (src_stdin_hex) { args::get(param_feat_bits),
for (;;) { args::get(param_block_first),
char buf[2]; args::get(param_seed));
std::cin >> buf[0] >> buf[1]; /* fallthrough */
if (std::cin.eof()) break;
bytes.push_back(ToHex(buf[0]) << 4 | ToHex(buf[1]));
}
} else {
throw std::runtime_error {"invalid source format for bytes"};
}
break;
case kFeatures: case kFeatures:
if (src_stdin) { if (args::get(to) == kFeatures) break;
features = ReadAll<uint32_t>(std::cin); if (args::get(to) == kFeatureProbs) {
} else { feature_probs = GenerateFeatureProbs(
throw std::runtime_error {"invalid source format for features"};
}
break;
case kFeatureProbs:
if (src_stdin) {
feature_probs = ReadAll<double>(std::cin);
} else {
throw std::runtime_error {"invalid source format for feature probs"};
}
break;
case kVideo:
assert(false);
}
if (args::get(from) < args::get(to)) {
// execute encoding
switch (args::get(from)) {
case kBytes:
if (args::get(to) == kBytes) break;
features = BytesEncoder(
bytes,
args::get(param_block_num),
args::get(param_feat_bits),
args::get(param_block_first),
args::get(param_seed));
/* fallthrough */
case kFeatures:
if (args::get(to) == kFeatures) break;
if (args::get(to) == kFeatureProbs) {
feature_probs = GenerateFeatureProbs(
features,
args::get(param_block_num),
args::get(probgen_false_positive),
args::get(probgen_false_negative),
args::get(probgen_seed),
probgen_normalize);
break;
}
// TODO embed into video
assert(false);
/* fallthrough */
case kVideo:
if (args::get(to) == kVideo) break;
assert(false);
case kFeatureProbs:
throw std::runtime_error("couldn't start flow from the data");
}
} else if (args::get(from) > args::get(to)) {
// execute decoding
switch (args::get(from)) {
case kVideo:
if (args::get(to) == kVideo) break;
// TODO extract feature probs // features = XX
assert(false);
case kFeatureProbs:
if (args::get(to) == kFeatureProbs) break;
features = PathfindFeatures(
feature_probs,
args::get(param_block_num),
args::get(param_feat_bits),
args::get(param_seed));
/* fallthrough */
case kFeatures:
if (args::get(to) == kFeatures) break;
bytes = BytesDecoder(
features, features,
args::get(param_block_num), args::get(param_block_num),
args::get(param_feat_bits), args::get(probgen_false_positive),
args::get(param_seed)); args::get(probgen_false_negative),
/* fallthrough */ args::get(probgen_seed),
probgen_normalize);
case kBytes: break;
if (args::get(to) == kBytes) break;
assert(false);
} }
} // TODO embed into video
assert(false);
// output /* fallthrough */
switch (args::get(to)) {
case kBytes:
if (dst_stdout) {
std::cout << std::string {bytes.begin(), bytes.end()} << std::endl;
} else if (dst_stdout_hex) {
for (auto c : bytes) {
std::cout << std::hex << (int) c;
}
std::cout << std::endl;
} else {
throw std::runtime_error {"invalid destination format for bytes"};
}
break;
case kFeatures:
if (dst_stdout) {
for (auto& f : features) std::cout << f << "\n";
} else {
throw std::runtime_error {"invalid destination format for features"};
}
break;
case kFeatureProbs:
if (dst_stdout) {
const auto size = args::get(param_block_num);
const auto cols = std::get<0>(size) * std::get<1>(size);
for (size_t i = 0; i < feature_probs.size();) {
for (size_t j = 0; i < feature_probs.size() && j < cols; ++i, ++j) {
std::cout << feature_probs[i] << " ";
}
std::cout << "\n";
}
} else {
throw std::runtime_error {"invalid destination format for feature probs"};
}
break;
case kVideo: case kVideo:
break; if (args::get(to) == kVideo) break;
assert(false);
case kFeatureProbs:
throw std::runtime_error("couldn't start flow from the data");
} }
} catch (const args::Help&) { } else if (args::get(from) > args::get(to)) {
std::cout << parser << std::endl; // execute decoding
return 0; switch (args::get(from)) {
case kVideo:
if (args::get(to) == kVideo) break;
// TODO extract feature probs // features = XX
assert(false);
} catch (const args::ParseError& e) { case kFeatureProbs:
std::cerr << e.what() << std::endl; if (args::get(to) == kFeatureProbs) break;
std::cerr << parser << std::endl; features = PathfindFeatures(
return 1; feature_probs,
args::get(param_block_num),
args::get(param_feat_bits),
args::get(param_seed));
/* fallthrough */
} catch (const args::ValidationError& e) { case kFeatures:
std::cerr << e.what() << std::endl; if (args::get(to) == kFeatures) break;
std::cerr << parser << std::endl; bytes = BytesDecoder(
return 1; features,
args::get(param_block_num),
args::get(param_feat_bits),
args::get(param_seed));
/* fallthrough */
} catch (const std::runtime_error& e) { case kBytes:
std::cerr << "runtime error: " << e.what() << std::endl; if (args::get(to) == kBytes) break;
return 1; assert(false);
}
}
// output
switch (args::get(to)) {
case kBytes:
if (dst_stdout) {
std::cout << std::string {bytes.begin(), bytes.end()} << std::endl;
} else if (dst_stdout_hex) {
for (auto c : bytes) {
std::cout << std::hex << (int) c;
}
std::cout << std::endl;
} else {
throw std::runtime_error {"invalid destination format for bytes"};
}
break;
case kFeatures:
if (dst_stdout) {
for (auto& f : features) std::cout << f << "\n";
} else {
throw std::runtime_error {"invalid destination format for features"};
}
break;
case kFeatureProbs:
if (dst_stdout) {
const auto size = args::get(param_block_num);
const auto cols = std::get<0>(size) * std::get<1>(size);
for (size_t i = 0; i < feature_probs.size();) {
for (size_t j = 0; i < feature_probs.size() && j < cols; ++i, ++j) {
std::cout << feature_probs[i] << " ";
}
std::cout << "\n";
}
} else {
throw std::runtime_error {"invalid destination format for feature probs"};
}
break;
case kVideo:
break;
} }
return 0; return 0;
} catch (const args::Help&) {
std::cout << parser << std::endl;
return 0;
} catch (const args::ParseError& e) {
std::cerr << e.what() << std::endl;
std::cerr << parser << std::endl;
return 1;
} catch (const args::ValidationError& e) {
std::cerr << e.what() << std::endl;
std::cerr << parser << std::endl;
return 1;
} catch (const std::runtime_error& e) {
std::cerr << "runtime error: " << e.what() << std::endl;
return 1;
} }

169
blocky/video_decoder.hh Normal file
View File

@ -0,0 +1,169 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <cstring>
#include <exception>
#include <fstream>
#include <string_view>
#include <vector>
#include <minimp4.h>
#include <wels/codec_api.h>
class VideoDecoder final {
public:
VideoDecoder() = delete;
VideoDecoder(const std::string& path) :
file_(path, std::ifstream::binary | std::ifstream::ate),
size_(static_cast<size_t>(file_.tellg())) {
// init objects
SDecodingParam dparam = {};
dparam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
dparam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
WelsCreateDecoder(&decoder_);
decoder_->Initialize(&dparam);
int lv = WELS_LOG_DEBUG;
decoder_->SetOption(DECODER_OPTION_TRACE_LEVEL, &lv);
demuxer_ = {};
MP4D_open(&demuxer_, ReadCallback, this, static_cast<int64_t>(size_));
// find video track
track_ = SIZE_MAX;
for (size_t i = 0; i < demuxer_.track_count; ++i) {
if (demuxer_.track[i].handler_type == MP4D_HANDLER_TYPE_VIDE) {
if (track_ != SIZE_MAX) {
throw std::runtime_error {"there are many video tracks"};
}
track_ = i;
}
}
if (track_ == SIZE_MAX) {
throw std::runtime_error {"there is no video track"};
}
// setup decoder
std::vector<uint8_t> temp_;
for (size_t i = 0;; ++i) {
int size;
auto sps = static_cast<const uint8_t*>(MP4D_read_sps(
&demuxer_,
static_cast<unsigned int>(track_),
static_cast<int>(i),
&size));
if (!sps) break;
temp_.resize(static_cast<size_t>(4+size));
temp_[0] = 0, temp_[1] = 0, temp_[2] = 0, temp_[3] = 1;
std::memcpy(&temp_[4], sps, static_cast<size_t>(size));
if (decoder_->DecodeFrameNoDelay(temp_.data(), static_cast<int>(temp_.size()), yuv_, &frame_)) {
throw std::runtime_error {"failed to decode SPS"};
}
}
for (size_t i = 0;; ++i) {
int size;
auto pps = static_cast<const uint8_t*>(MP4D_read_pps(
&demuxer_,
static_cast<unsigned int>(track_),
static_cast<int>(i),
&size));
if (!pps) break;
temp_.resize(static_cast<size_t>(4+size));
temp_[0] = 0, temp_[1] = 0, temp_[2] = 0, temp_[3] = 1;
std::memcpy(&temp_[4], pps, static_cast<size_t>(size));
if (decoder_->DecodeFrameNoDelay(temp_.data(), static_cast<int>(temp_.size()), yuv_, &frame_)) {
throw std::runtime_error {"failed to decode SPS"};
}
}
temp_.clear();
}
~VideoDecoder() noexcept {
decoder_->Uninitialize();
WelsDestroyDecoder(decoder_);
MP4D_close(&demuxer_);
}
VideoDecoder(const VideoDecoder&) = delete;
VideoDecoder(VideoDecoder&&) = delete;
VideoDecoder& operator=(const VideoDecoder&) = delete;
VideoDecoder& operator=(VideoDecoder&&) = delete;
void Decode() {
if (temp_consumed_ >= temp_.size()) {
unsigned size, time, dur;
const auto off = MP4D_frame_offset(
&demuxer_,
static_cast<unsigned int>(track_),
static_cast<unsigned int>(count_),
&size, &time, &dur);
assert(size > 0);
temp_.resize(size);
temp_consumed_ = 0;
file_.seekg(static_cast<std::streamoff>(off));
assert(file_);
file_.read((char*) temp_.data(), size);
assert(file_);
Decode();
} else {
auto& i = temp_consumed_;
const uint32_t nal_size = 4 +
static_cast<uint32_t>((temp_[i+0] << 24) |
(temp_[i+1] << 16) |
(temp_[i+2] << 8) |
(temp_[i+3] << 0));
temp_[i ] = 0;
temp_[i+1] = 0;
temp_[i+2] = 0;
temp_[i+3] = 1;
if (decoder_->DecodeFrameNoDelay(temp_.data(), static_cast<int>(nal_size), yuv_, &frame_)) {
throw std::runtime_error {"failed to decode a frame"};
}
i += nal_size;
}
}
private:
std::ifstream file_;
size_t size_;
ISVCDecoder* decoder_;
MP4D_demux_t demuxer_;
size_t track_;
uint8_t* yuv_[3] = {0};
SBufferInfo frame_ = {};
size_t count_ = 0;
size_t temp_consumed_ = 0;
std::vector<uint8_t> temp_;
static int ReadCallback(int64_t off, void* buf, size_t size, void* ptr) noexcept {
auto self = (VideoDecoder*) ptr;
auto n = self->size_ - static_cast<size_t>(off) - size;
if (size < n) n = size;
self->file_.seekg(off);
assert(self->file_);
self->file_.read((char*) buf, static_cast<std::streamsize>(n));
assert(self->file_);
return 0;
}
};

79
blocky/video_encoder.hh Normal file
View File

@ -0,0 +1,79 @@
#pragma once
#include <cassert>
#include <cstdint>
#include <cstring>
#include <exception>
#include <fstream>
#include <string>
#include <minimp4.h>
#include <wels/codec_api.h>
class VideoEncoder final {
public:
VideoEncoder() = delete;
VideoEncoder(const std::string& path, const SEncParamBase& p) :
file_(path, std::ofstream::binary), fps_(p.fMaxFrameRate) {
if (WelsCreateSVCEncoder(&encoder_)) {
throw std::runtime_error {"failed to init openh264 encoder"};
}
muxer_ = MP4E_open(false, false, this, WriteCallback);
if (MP4E_STATUS_OK != mp4_h26x_write_init(&writer_, muxer_, p.iPicWidth, p.iPicHeight, false)) {
throw std::runtime_error {"failed to init h26x writer"};
}
encoder_->Initialize(&p);
int lv = WELS_LOG_DEBUG;
encoder_->SetOption(ENCODER_OPTION_TRACE_LEVEL, &lv);
int fmt = videoFormatI420;
encoder_->SetOption(ENCODER_OPTION_DATAFORMAT, &fmt);
}
~VideoEncoder() noexcept {
encoder_->Uninitialize();
WelsDestroySVCEncoder(encoder_);
MP4E_close(muxer_);
mp4_h26x_write_close(&writer_);
}
void Encode(const SSourcePicture& pic) {
SFrameBSInfo info;
if (cmResultSuccess != encoder_->EncodeFrame(&pic, &info)) {
throw std::runtime_error {"failed to encode a frame"};
}
if (info.eFrameType == videoFrameTypeSkip) return;
for (size_t i = 0; i < static_cast<size_t>(info.iLayerNum); ++i) {
const auto& lay = info.sLayerInfo[i];
uint8_t* buf = lay.pBsBuf;
for (size_t j = 0; j < static_cast<size_t>(lay.iNalCount); ++j) {
mp4_h26x_write_nal(&writer_, buf, lay.pNalLengthInByte[j], static_cast<unsigned int>(90000./fps_));
buf += lay.pNalLengthInByte[j];
}
}
}
private:
std::ofstream file_;
ISVCEncoder* encoder_;
MP4E_mux_t* muxer_;
mp4_h26x_writer_t writer_;
double fps_;
static int WriteCallback(int64_t off, const void* buf, size_t size, void* ptr) noexcept {
auto self = (VideoEncoder*) ptr;
self->file_.seekp(off);
assert(self->file_);
self->file_.write(static_cast<const char*>(buf), static_cast<std::streamsize>(size));
return !self->file_;
}
};