add new converter: video -> block probabilities

This commit is contained in:
falsycat 2022-09-05 10:28:14 +09:00
parent 92884041bb
commit de9eb51fbc
8 changed files with 524 additions and 0 deletions

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
/build/

28
CMakeLists.txt Normal file
View File

@ -0,0 +1,28 @@
cmake_minimum_required(VERSION 3.18)
project(blocky C CXX)
option(BLOCKY_STATIC OFF)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(BLOCKY_C_FLAGS
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Werror -pedantic-errors -Wextra -Wconversion -Wsign-conversion>
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:
-Wno-overloaded-virtual>
$<$<CXX_COMPILER_ID:MSVC>:
/W4 /WX>
)
set(BLOCKY_CXX_FLAGS
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>,$<CXX_COMPILER_ID:GNU>>:
-Wall -Werror -pedantic-errors -Wextra -Wconversion -Wsign-conversion>
$<$<OR:$<CXX_COMPILER_ID:Clang>,$<CXX_COMPILER_ID:AppleClang>>:
-Wno-overloaded-virtual>
$<$<CXX_COMPILER_ID:MSVC>:
/W4 /WX>
)
add_subdirectory(thirdparty)
add_subdirectory(conv)

23
README.md Normal file
View File

@ -0,0 +1,23 @@
blocky
====
## CMake command
When you built openh264 on `/home/user/openh264`:
```
cmake -DCMAKE_CXX_FLAGS=-isystem\ /home/user/openh264/codec/api\ -L/home/user/openh264 ..
```
## ffmpeg useful commands
```
ffmpeg -i in.mp4 -vframes 300 "%d.png"
```
```
ffmpeg -r 30 -i "%d.png" -vcodec libx264 -pix_fmt yuv420p out.mp4
```
```
ffmpeg -i src.mp4 -t 10s cut.mp4
```

4
conv/CMakeLists.txt Normal file
View File

@ -0,0 +1,4 @@
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${BLKY_CXX_FLAGS} -I ${PROJECT_SOURCE_DIR}")
add_executable(video_bprob video_bprob.cc)
target_link_libraries(video_bprob PRIVATE args minimp4 openh264)

75
conv/common.hh Normal file
View File

@ -0,0 +1,75 @@
#pragma once
#include <stdexcept>
#include <string>
inline void Enforce(bool eval, const std::string& msg) {
if (!eval) {
throw std::runtime_error {msg};
}
}
inline void CopyNal(std::vector<uint8_t>& v, const uint8_t* buf, size_t sz) noexcept {
v.resize(sz+4);
v[0] = 0;
v[1] = 0;
v[2] = 0;
v[3] = 1;
std::memcpy(&v[4], buf, sz);
}
struct Frame {
std::vector<uint8_t> Y;
std::vector<uint8_t> U;
std::vector<uint8_t> V;
int32_t w, h;
int32_t hw, hh;
Frame() = default;
Frame(uint8_t* yuv[3], const SBufferInfo& frame) {
w = static_cast<int32_t>(frame.UsrData.sSystemBuffer.iWidth);
h = static_cast<int32_t>(frame.UsrData.sSystemBuffer.iHeight);
hw = w/2;
hh = h/2;
const auto ystride = static_cast<int32_t>(frame.UsrData.sSystemBuffer.iStride[0]);
const auto uvstride = static_cast<int32_t>(frame.UsrData.sSystemBuffer.iStride[1]);
Y.resize(w*h);
for (int32_t y = 0; y < h; ++y) {
const auto src = yuv[0] + y*ystride;
const auto dst = Y.data() + y*w;
std::memcpy(dst, src, w);
}
U.resize(hw*hh);
V.resize(hw*hh);
for (int32_t y = 0; y < hh; ++y) {
const auto offset = y*uvstride;
const auto srcu = yuv[1] + y*uvstride;
const auto srcv = yuv[2] + y*uvstride;
const auto dstu = U.data() + y*hw;
const auto dstv = V.data() + y*hw;
std::memcpy(dstu, srcu, hw);
std::memcpy(dstv, srcv, hw);
}
}
SSourcePicture GetSourcePic() noexcept {
SSourcePicture ret;
ret.iPicWidth = w;
ret.iPicHeight = h;
ret.iColorFormat = videoFormatI420;
ret.iStride[0] = w;
ret.iStride[1] = hw;
ret.iStride[2] = hw;
ret.pData[0] = Y.data();
ret.pData[1] = U.data();
ret.pData[2] = V.data();
return ret;
}
};

355
conv/video_bprob.cc Normal file
View File

@ -0,0 +1,355 @@
#include <cmath>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <stdexcept>
#include <string>
#include <unordered_map>
#include <vector>
#include <args.hxx>
#include <minimp4.h>
#include <codec/api/wels/codec_api.h>
#include "conv/common.hh"
namespace param {
using namespace ::args;
ArgumentParser parser {
"converter: video -> block probability matrix"
};
HelpFlag help {
parser, "help", "display this menu", {'h', "help"},
};
ValueFlag<int32_t> bw {
parser, "128", "width of blocks (px)", {"block-w"}, 128,
};
ValueFlag<int32_t> bh {
parser, "128", "height of blocks (px)", {"block-h"}, 128,
};
ValueFlag<int32_t> utime {
parser, "10", "duration of each feature (frame)", {"utime"}, 10,
};
ValueFlag<int32_t> bmw {
parser, "16", "width of blockmatch region (px)", {"bm-w"}, 16,
};
ValueFlag<int32_t> bmh {
parser, "16", "height of blockmatch region (px)", {"bm-h"}, 16,
};
ValueFlag<int32_t> bmsw {
parser, "4", "width of blockmatch search region (px)", {"bm-sw"}, 4,
};
ValueFlag<int32_t> bmsh {
parser, "4", "height of blockmatch search region (px)", {"bm-sh"}, 4,
};
enum Output {
kProb,
kLen,
kVec,
kNull,
};
const std::unordered_map<std::string, Output> kOutput = {
{"default", kProb},
{"prob", kProb},
{"len", kLen},
{"vec", kVec},
{"null", kNull},
};
MapFlag<std::string, Output> output {
parser, "prob", "output type (len, vec, null)", {"output"}, kOutput,
};
Positional<std::string> vpath {
parser, "path", "video file path",
};
} // namespace param
struct Frame {
std::vector<uint8_t> Y;
std::vector<uint8_t> U;
std::vector<uint8_t> V;
int32_t w, h;
int32_t hw, hh;
Frame() = default;
Frame(uint8_t* yuv[3], const SBufferInfo& frame);
};
struct Vec {
double x, y;
};
// utilities
static void CopyNal(std::vector<uint8_t>&, const uint8_t* buf, size_t sz) noexcept;
static Vec BlockMatching(const Frame& cf, const Frame& pf, int32_t bx, int32_t by) {
const auto bmw = args::get(param::bmw);
const auto bmh = args::get(param::bmh);
const auto bmsw = args::get(param::bmsw);
const auto bmsh = args::get(param::bmsh);
int32_t min_sx = 0, min_sy = 0;
double min_score = 1e+100; // INF
for (int32_t sy = -bmsh; sy < bmsh; ++sy) {
for (int32_t sx = -bmsw; sx < bmsw; ++sx) {
double score = 0;
for (int32_t y = 0; y < bmw; ++y) {
for (int32_t x = 0; x < bmh; ++x) {
const auto c_off = (bx+x) + (by+y)*cf.w;
const auto p_off = (bx+x+sx) + (by+y+sy)*cf.w;
const auto diff = static_cast<double>(cf.Y[c_off] - pf.Y[p_off]);
score += diff*diff;
}
}
if (score < min_score) {
min_score = score;
min_sx = sx;
min_sy = sy;
}
}
}
const auto sxf = static_cast<double>(min_sx) / static_cast<double>(bmsw);
const auto syf = static_cast<double>(min_sy) / static_cast<double>(bmsh);
return { .x = sxf, .y = syf, };
}
static double EachBlock(const Frame& cf, const Frame& pf, int32_t bx, int32_t by) {
const auto v = BlockMatching(cf, pf, bx, by);
const auto len = std::sqrt(v.x*v.x + v.y*v.y);
switch (args::get(param::output)) {
case param::kLen:
std::cout << len << '\n';
break;
case param::kVec:
std::cout << bx << " " << by << " " << v.x << " " << v.y << '\n';
break;
default:
break;
}
return len;
}
static void EachFrame(const Frame& cf, const Frame& pf) {
const auto bw = args::get(param::bw);
const auto bh = args::get(param::bw);
const auto ut = args::get(param::utime);
Enforce(cf.w == pf.w && cf.h == pf.h, "variable frame size is not allowed");
Enforce(cf.w > bw && cf.h > bh, "block size must be less than frame size");
static size_t cnt = 0;
static std::vector<double> probs;
if (cnt%ut == 0) {
probs.clear();
probs.resize((cf.w/bw) * (cf.h/bh));
}
double* prob = probs.data();
for (int32_t by = 0; by+bh < cf.h; by+=bh) {
for (int32_t bx = 0; bx+bw < cf.w; bx+=bw) {
*(prob++) += EachBlock(cf, pf, bx, by);
}
}
switch (args::get(param::output)) {
case param::kLen:
case param::kVec:
std::cout << std::endl;
break;
case param::kProb:
if ((cnt+1)%ut == 0) {
for (const auto prob : probs) {
std::cout << prob/(ut-1)/std::sqrt(2) << ' ';
}
std::cout << std::endl;
}
break;
default:
break;
}
++cnt;
}
static void Exec() {
const auto bw = args::get(param::bw);
const auto bh = args::get(param::bw);
const auto ut = args::get(param::utime);
Enforce(bw > 0 && bh > 0, "block size must be greater than 0");
Enforce(ut > 0, "utime must be greater than 0");
const auto bmw = args::get(param::bmw);
const auto bmh = args::get(param::bmh);
const auto bmsw = args::get(param::bmw);
const auto bmsh = args::get(param::bmh);
Enforce(bmw > 0 && bmh > 0, "block matching region size must be greater than 0");
Enforce(bmsw > 0 && bmsh > 0, "block matching search region size must be greater than 0");
// open video stream
const auto vpath = args::get(param::vpath);
std::ifstream vst {vpath.c_str(), std::ifstream::binary | std::ifstream::ate};
Enforce(!!vst, "video stream is invalid");
const auto vsz = vst.tellg();
// init decoder
ISVCDecoder* dec;
WelsCreateDecoder(&dec);
SDecodingParam decp = {};
decp.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
decp.eEcActiveIdc = ERROR_CON_SLICE_COPY;
dec->Initialize(&decp);
int declv = WELS_LOG_INFO;
dec->SetOption(DECODER_OPTION_TRACE_LEVEL, &declv);
uint8_t* yuv[3] = {0};
SBufferInfo frame = {};
// demux
MP4D_demux_t dem = {};
MP4D_open(&dem, [](int64_t off, void* buf, size_t sz, void* ptr) {
auto& vst = *reinterpret_cast<std::ifstream*>(ptr);
vst.seekg(off);
Enforce(!!vst, "seek failure");
vst.read(reinterpret_cast<char*>(buf), sz);
Enforce(!!vst, "read failure");
return 0;
}, &vst, vsz);
// find video track
int ti;
for (ti = 0; ti < dem.track_count; ++ti) {
const auto& t = dem.track[ti];
if (t.handler_type == MP4D_HANDLER_TYPE_VIDE) {
break;
}
}
Enforce(ti < dem.track_count, "no video track");
const auto& t = dem.track[ti];
// consume SPS
std::vector<uint8_t> nal;
for (size_t si = 0;; ++si) {
int sz;
auto sps = reinterpret_cast<const uint8_t*>(MP4D_read_sps(&dem, ti, si, &sz));
if (!sps) break;
CopyNal(nal, sps, sz);
const auto ret = dec->DecodeFrameNoDelay(nal.data(), nal.size(), yuv, &frame);
Enforce(ret == 0, "SPS decode failure");
}
// consume PPS
for (size_t si = 0;; ++si) {
int sz;
auto pps = reinterpret_cast<const uint8_t*>(MP4D_read_pps(&dem, ti, si, &sz));
if (!pps) break;
CopyNal(nal, pps, sz);
const auto ret = dec->DecodeFrameNoDelay(nal.data(), nal.size(), yuv, &frame);
Enforce(ret == 0, "PPS decode failure");
}
// decode frame
Frame pf = {};
size_t fidx = 0;
for (size_t si = 0; si < t.sample_count; ++si) {
unsigned fsz, time, dur;
const auto off = MP4D_frame_offset(&dem, ti, si, &fsz, &time, &dur);
vst.seekg(off);
Enforce(!!vst, "NAL seek failure");
nal.resize(fsz);
vst.read(reinterpret_cast<char*>(nal.data()), fsz);
Enforce(!!vst, "NAL read failure");
for (size_t i = 0; i < nal.size();) {
uint32_t sz =
(nal[i] << 24) | (nal[i+1] << 16) | (nal[i+2] << 8) | nal[i+3];
nal[i+0] = 0;
nal[i+1] = 0;
nal[i+2] = 0;
nal[i+3] = 1;
sz += 4;
const auto ret = dec->DecodeFrameNoDelay(&nal[i], fsz, yuv, &frame);
Enforce(ret == 0, "frame decode failure");
Frame cf = {yuv, frame};
if (fidx%ut > 0) {
EachFrame(cf, pf);
}
pf = std::move(cf);
++fidx;
i += sz;
}
}
}
int main(int argc, char** argv)
try {
param::parser.ParseCLI(argc, argv);
Exec();
return EXIT_SUCCESS;
} catch (const args::Help&) {
std::cout << param::parser << std::endl;
return EXIT_SUCCESS;
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
return EXIT_FAILURE;
}
Frame::Frame(uint8_t* yuv[3], const SBufferInfo& frame) {
w = static_cast<int32_t>(frame.UsrData.sSystemBuffer.iWidth);
h = static_cast<int32_t>(frame.UsrData.sSystemBuffer.iHeight);
hw = w/2;
hh = h/2;
const auto ystride = static_cast<int32_t>(frame.UsrData.sSystemBuffer.iStride[0]);
const auto uvstride = static_cast<int32_t>(frame.UsrData.sSystemBuffer.iStride[1]);
Y.resize(w*h);
for (int32_t y = 0; y < h; ++y) {
const auto src = yuv[0] + y*ystride;
const auto dst = Y.data() + y*w;
std::memcpy(dst, src, w);
}
U.resize(hw*hh);
V.resize(hw*hh);
for (int32_t y = 0; y < hh; ++y) {
const auto offset = y*uvstride;
const auto srcu = yuv[0] + y*uvstride;
const auto srcv = yuv[1] + y*uvstride;
const auto dstu = U.data() + y*hw;
const auto dstv = V.data() + y*hw;
std::memcpy(dstu, srcu, hw);
std::memcpy(dstv, srcv, hw);
}
}
static void CopyNal(std::vector<uint8_t>& v, const uint8_t* buf, size_t sz) noexcept {
v.resize(sz+4);
v[0] = 0;
v[1] = 0;
v[2] = 0;
v[3] = 1;
std::memcpy(&v[4], buf, sz);
}

36
thirdparty/CMakeLists.txt vendored Normal file
View File

@ -0,0 +1,36 @@
set(CMAKE_POLICY_DEFAULT_CMP0077 NEW)
include(FetchContent)
# ---- args ----
# repository: https://github.com/Taywee/args
# license : MIT
FetchContent_Declare(
args
URL "https://github.com/Taywee/args/archive/refs/tags/6.3.0.zip"
)
set(ARGS_BUILD_EXAMPLE OFF)
set(ARGS_BUILD_UNITTESTS OFF)
FetchContent_MakeAvailable(args)
# ---- minimp4 ----
# repository: https://github.com/lieff/minimp4
# license : CC0
FetchContent_Declare(
minimp4
URL "https://github.com/lieff/minimp4/archive/4575afb4f69ace25a1a048e25cc86bf8c8d14f2b.zip"
)
FetchContent_Populate(minimp4)
add_library(minimp4)
target_include_directories(minimp4 PUBLIC SYSTEM ${minimp4_SOURCE_DIR})
target_sources(minimp4
PUBLIC
"${minimp4_SOURCE_DIR}/minimp4.h"
PRIVATE
minimp4.c
)

2
thirdparty/minimp4.c vendored Normal file
View File

@ -0,0 +1,2 @@
#define MINIMP4_IMPLEMENTATION
#include <minimp4.h>