diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..84c048a --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/build/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..da78078 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,28 @@ +cmake_minimum_required(VERSION 3.18) + +project(blocky C CXX) + +option(BLOCKY_STATIC OFF) + +set(CMAKE_CXX_STANDARD 20) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +set(BLOCKY_C_FLAGS + $<$,$,$>: + -Wall -Werror -pedantic-errors -Wextra -Wconversion -Wsign-conversion> + $<$,$>: + -Wno-overloaded-virtual> + $<$: + /W4 /WX> +) +set(BLOCKY_CXX_FLAGS + $<$,$,$>: + -Wall -Werror -pedantic-errors -Wextra -Wconversion -Wsign-conversion> + $<$,$>: + -Wno-overloaded-virtual> + $<$: + /W4 /WX> +) + +add_subdirectory(thirdparty) +add_subdirectory(conv) diff --git a/README.md b/README.md new file mode 100644 index 0000000..95c2099 --- /dev/null +++ b/README.md @@ -0,0 +1,23 @@ +blocky +==== + +## CMake command + +When you built openh264 on `/home/user/openh264`: +``` +cmake -DCMAKE_CXX_FLAGS=-isystem\ /home/user/openh264/codec/api\ -L/home/user/openh264 .. +``` + +## ffmpeg useful commands + +``` +ffmpeg -i in.mp4 -vframes 300 "%d.png" +``` + +``` +ffmpeg -r 30 -i "%d.png" -vcodec libx264 -pix_fmt yuv420p out.mp4 +``` + +``` +ffmpeg -i src.mp4 -t 10s cut.mp4 +``` diff --git a/conv/CMakeLists.txt b/conv/CMakeLists.txt new file mode 100644 index 0000000..2d2e5fc --- /dev/null +++ b/conv/CMakeLists.txt @@ -0,0 +1,4 @@ +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${BLKY_CXX_FLAGS} -I ${PROJECT_SOURCE_DIR}") + +add_executable(video_bprob video_bprob.cc) +target_link_libraries(video_bprob PRIVATE args minimp4 openh264) diff --git a/conv/common.hh b/conv/common.hh new file mode 100644 index 0000000..d9236ec --- /dev/null +++ b/conv/common.hh @@ -0,0 +1,75 @@ +#pragma once + +#include +#include + + +inline void Enforce(bool eval, const std::string& msg) { + if (!eval) { + throw std::runtime_error {msg}; + } +} + +inline void CopyNal(std::vector& v, const uint8_t* buf, size_t sz) noexcept { + v.resize(sz+4); + v[0] = 0; + v[1] = 0; + v[2] = 0; + v[3] = 1; + std::memcpy(&v[4], buf, sz); +} + + +struct Frame { + std::vector Y; + std::vector U; + std::vector V; + + int32_t w, h; + int32_t hw, hh; + + Frame() = default; + Frame(uint8_t* yuv[3], const SBufferInfo& frame) { + w = static_cast(frame.UsrData.sSystemBuffer.iWidth); + h = static_cast(frame.UsrData.sSystemBuffer.iHeight); + hw = w/2; + hh = h/2; + + const auto ystride = static_cast(frame.UsrData.sSystemBuffer.iStride[0]); + const auto uvstride = static_cast(frame.UsrData.sSystemBuffer.iStride[1]); + + Y.resize(w*h); + for (int32_t y = 0; y < h; ++y) { + const auto src = yuv[0] + y*ystride; + const auto dst = Y.data() + y*w; + std::memcpy(dst, src, w); + } + + U.resize(hw*hh); + V.resize(hw*hh); + for (int32_t y = 0; y < hh; ++y) { + const auto offset = y*uvstride; + const auto srcu = yuv[1] + y*uvstride; + const auto srcv = yuv[2] + y*uvstride; + const auto dstu = U.data() + y*hw; + const auto dstv = V.data() + y*hw; + std::memcpy(dstu, srcu, hw); + std::memcpy(dstv, srcv, hw); + } + } + + SSourcePicture GetSourcePic() noexcept { + SSourcePicture ret; + ret.iPicWidth = w; + ret.iPicHeight = h; + ret.iColorFormat = videoFormatI420; + ret.iStride[0] = w; + ret.iStride[1] = hw; + ret.iStride[2] = hw; + + ret.pData[0] = Y.data(); + ret.pData[1] = U.data(); + ret.pData[2] = V.data(); + return ret; + } +}; diff --git a/conv/video_bprob.cc b/conv/video_bprob.cc new file mode 100644 index 0000000..c99f98d --- /dev/null +++ b/conv/video_bprob.cc @@ -0,0 +1,355 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "conv/common.hh" + + +namespace param { +using namespace ::args; + +ArgumentParser parser { + "converter: video -> block probability matrix" +}; +HelpFlag help { + parser, "help", "display this menu", {'h', "help"}, +}; + +ValueFlag bw { + parser, "128", "width of blocks (px)", {"block-w"}, 128, +}; +ValueFlag bh { + parser, "128", "height of blocks (px)", {"block-h"}, 128, +}; +ValueFlag utime { + parser, "10", "duration of each feature (frame)", {"utime"}, 10, +}; + +ValueFlag bmw { + parser, "16", "width of blockmatch region (px)", {"bm-w"}, 16, +}; +ValueFlag bmh { + parser, "16", "height of blockmatch region (px)", {"bm-h"}, 16, +}; +ValueFlag bmsw { + parser, "4", "width of blockmatch search region (px)", {"bm-sw"}, 4, +}; +ValueFlag bmsh { + parser, "4", "height of blockmatch search region (px)", {"bm-sh"}, 4, +}; + +enum Output { + kProb, + kLen, + kVec, + kNull, +}; +const std::unordered_map kOutput = { + {"default", kProb}, + {"prob", kProb}, + {"len", kLen}, + {"vec", kVec}, + {"null", kNull}, +}; +MapFlag output { + parser, "prob", "output type (len, vec, null)", {"output"}, kOutput, +}; + +Positional vpath { + parser, "path", "video file path", +}; + +} // namespace param + + +struct Frame { + std::vector Y; + std::vector U; + std::vector V; + + int32_t w, h; + int32_t hw, hh; + + Frame() = default; + Frame(uint8_t* yuv[3], const SBufferInfo& frame); +}; + +struct Vec { + double x, y; +}; + +// utilities +static void CopyNal(std::vector&, const uint8_t* buf, size_t sz) noexcept; + + +static Vec BlockMatching(const Frame& cf, const Frame& pf, int32_t bx, int32_t by) { + const auto bmw = args::get(param::bmw); + const auto bmh = args::get(param::bmh); + const auto bmsw = args::get(param::bmsw); + const auto bmsh = args::get(param::bmsh); + + int32_t min_sx = 0, min_sy = 0; + double min_score = 1e+100; // INF + for (int32_t sy = -bmsh; sy < bmsh; ++sy) { + for (int32_t sx = -bmsw; sx < bmsw; ++sx) { + double score = 0; + for (int32_t y = 0; y < bmw; ++y) { + for (int32_t x = 0; x < bmh; ++x) { + const auto c_off = (bx+x) + (by+y)*cf.w; + const auto p_off = (bx+x+sx) + (by+y+sy)*cf.w; + const auto diff = static_cast(cf.Y[c_off] - pf.Y[p_off]); + score += diff*diff; + } + } + if (score < min_score) { + min_score = score; + min_sx = sx; + min_sy = sy; + } + } + } + + const auto sxf = static_cast(min_sx) / static_cast(bmsw); + const auto syf = static_cast(min_sy) / static_cast(bmsh); + return { .x = sxf, .y = syf, }; +} + +static double EachBlock(const Frame& cf, const Frame& pf, int32_t bx, int32_t by) { + const auto v = BlockMatching(cf, pf, bx, by); + + const auto len = std::sqrt(v.x*v.x + v.y*v.y); + switch (args::get(param::output)) { + case param::kLen: + std::cout << len << '\n'; + break; + case param::kVec: + std::cout << bx << " " << by << " " << v.x << " " << v.y << '\n'; + break; + default: + break; + } + return len; +} + +static void EachFrame(const Frame& cf, const Frame& pf) { + const auto bw = args::get(param::bw); + const auto bh = args::get(param::bw); + const auto ut = args::get(param::utime); + + Enforce(cf.w == pf.w && cf.h == pf.h, "variable frame size is not allowed"); + Enforce(cf.w > bw && cf.h > bh, "block size must be less than frame size"); + + static size_t cnt = 0; + static std::vector probs; + if (cnt%ut == 0) { + probs.clear(); + probs.resize((cf.w/bw) * (cf.h/bh)); + } + + double* prob = probs.data(); + for (int32_t by = 0; by+bh < cf.h; by+=bh) { + for (int32_t bx = 0; bx+bw < cf.w; bx+=bw) { + *(prob++) += EachBlock(cf, pf, bx, by); + } + } + + switch (args::get(param::output)) { + case param::kLen: + case param::kVec: + std::cout << std::endl; + break; + case param::kProb: + if ((cnt+1)%ut == 0) { + for (const auto prob : probs) { + std::cout << prob/(ut-1)/std::sqrt(2) << ' '; + } + std::cout << std::endl; + } + break; + default: + break; + } + ++cnt; +} + +static void Exec() { + const auto bw = args::get(param::bw); + const auto bh = args::get(param::bw); + const auto ut = args::get(param::utime); + Enforce(bw > 0 && bh > 0, "block size must be greater than 0"); + Enforce(ut > 0, "utime must be greater than 0"); + + const auto bmw = args::get(param::bmw); + const auto bmh = args::get(param::bmh); + const auto bmsw = args::get(param::bmw); + const auto bmsh = args::get(param::bmh); + Enforce(bmw > 0 && bmh > 0, "block matching region size must be greater than 0"); + Enforce(bmsw > 0 && bmsh > 0, "block matching search region size must be greater than 0"); + + // open video stream + const auto vpath = args::get(param::vpath); + std::ifstream vst {vpath.c_str(), std::ifstream::binary | std::ifstream::ate}; + Enforce(!!vst, "video stream is invalid"); + const auto vsz = vst.tellg(); + + // init decoder + ISVCDecoder* dec; + WelsCreateDecoder(&dec); + + SDecodingParam decp = {}; + decp.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT; + decp.eEcActiveIdc = ERROR_CON_SLICE_COPY; + dec->Initialize(&decp); + + int declv = WELS_LOG_INFO; + dec->SetOption(DECODER_OPTION_TRACE_LEVEL, &declv); + + uint8_t* yuv[3] = {0}; + SBufferInfo frame = {}; + + // demux + MP4D_demux_t dem = {}; + MP4D_open(&dem, [](int64_t off, void* buf, size_t sz, void* ptr) { + auto& vst = *reinterpret_cast(ptr); + vst.seekg(off); + Enforce(!!vst, "seek failure"); + vst.read(reinterpret_cast(buf), sz); + Enforce(!!vst, "read failure"); + return 0; + }, &vst, vsz); + + // find video track + int ti; + for (ti = 0; ti < dem.track_count; ++ti) { + const auto& t = dem.track[ti]; + if (t.handler_type == MP4D_HANDLER_TYPE_VIDE) { + break; + } + } + Enforce(ti < dem.track_count, "no video track"); + const auto& t = dem.track[ti]; + + // consume SPS + std::vector nal; + for (size_t si = 0;; ++si) { + int sz; + auto sps = reinterpret_cast(MP4D_read_sps(&dem, ti, si, &sz)); + if (!sps) break; + CopyNal(nal, sps, sz); + + const auto ret = dec->DecodeFrameNoDelay(nal.data(), nal.size(), yuv, &frame); + Enforce(ret == 0, "SPS decode failure"); + } + + // consume PPS + for (size_t si = 0;; ++si) { + int sz; + auto pps = reinterpret_cast(MP4D_read_pps(&dem, ti, si, &sz)); + if (!pps) break; + CopyNal(nal, pps, sz); + + const auto ret = dec->DecodeFrameNoDelay(nal.data(), nal.size(), yuv, &frame); + Enforce(ret == 0, "PPS decode failure"); + } + + // decode frame + Frame pf = {}; + size_t fidx = 0; + for (size_t si = 0; si < t.sample_count; ++si) { + unsigned fsz, time, dur; + const auto off = MP4D_frame_offset(&dem, ti, si, &fsz, &time, &dur); + + vst.seekg(off); + Enforce(!!vst, "NAL seek failure"); + + nal.resize(fsz); + vst.read(reinterpret_cast(nal.data()), fsz); + Enforce(!!vst, "NAL read failure"); + + for (size_t i = 0; i < nal.size();) { + uint32_t sz = + (nal[i] << 24) | (nal[i+1] << 16) | (nal[i+2] << 8) | nal[i+3]; + + nal[i+0] = 0; + nal[i+1] = 0; + nal[i+2] = 0; + nal[i+3] = 1; + sz += 4; + + const auto ret = dec->DecodeFrameNoDelay(&nal[i], fsz, yuv, &frame); + Enforce(ret == 0, "frame decode failure"); + + Frame cf = {yuv, frame}; + if (fidx%ut > 0) { + EachFrame(cf, pf); + } + pf = std::move(cf); + + ++fidx; + i += sz; + } + } +} + +int main(int argc, char** argv) +try { + param::parser.ParseCLI(argc, argv); + Exec(); + return EXIT_SUCCESS; +} catch (const args::Help&) { + std::cout << param::parser << std::endl; + return EXIT_SUCCESS; +} catch (const std::exception& e) { + std::cerr << e.what() << std::endl; + return EXIT_FAILURE; +} + + +Frame::Frame(uint8_t* yuv[3], const SBufferInfo& frame) { + w = static_cast(frame.UsrData.sSystemBuffer.iWidth); + h = static_cast(frame.UsrData.sSystemBuffer.iHeight); + hw = w/2; + hh = h/2; + + const auto ystride = static_cast(frame.UsrData.sSystemBuffer.iStride[0]); + const auto uvstride = static_cast(frame.UsrData.sSystemBuffer.iStride[1]); + + Y.resize(w*h); + for (int32_t y = 0; y < h; ++y) { + const auto src = yuv[0] + y*ystride; + const auto dst = Y.data() + y*w; + std::memcpy(dst, src, w); + } + + U.resize(hw*hh); + V.resize(hw*hh); + for (int32_t y = 0; y < hh; ++y) { + const auto offset = y*uvstride; + const auto srcu = yuv[0] + y*uvstride; + const auto srcv = yuv[1] + y*uvstride; + const auto dstu = U.data() + y*hw; + const auto dstv = V.data() + y*hw; + std::memcpy(dstu, srcu, hw); + std::memcpy(dstv, srcv, hw); + } +} + +static void CopyNal(std::vector& v, const uint8_t* buf, size_t sz) noexcept { + v.resize(sz+4); + v[0] = 0; + v[1] = 0; + v[2] = 0; + v[3] = 1; + std::memcpy(&v[4], buf, sz); +} diff --git a/thirdparty/CMakeLists.txt b/thirdparty/CMakeLists.txt new file mode 100644 index 0000000..259af91 --- /dev/null +++ b/thirdparty/CMakeLists.txt @@ -0,0 +1,36 @@ +set(CMAKE_POLICY_DEFAULT_CMP0077 NEW) +include(FetchContent) + + +# ---- args ---- +# repository: https://github.com/Taywee/args +# license : MIT + +FetchContent_Declare( + args + URL "https://github.com/Taywee/args/archive/refs/tags/6.3.0.zip" +) + +set(ARGS_BUILD_EXAMPLE OFF) +set(ARGS_BUILD_UNITTESTS OFF) +FetchContent_MakeAvailable(args) + + +# ---- minimp4 ---- +# repository: https://github.com/lieff/minimp4 +# license : CC0 + +FetchContent_Declare( + minimp4 + URL "https://github.com/lieff/minimp4/archive/4575afb4f69ace25a1a048e25cc86bf8c8d14f2b.zip" +) +FetchContent_Populate(minimp4) + +add_library(minimp4) +target_include_directories(minimp4 PUBLIC SYSTEM ${minimp4_SOURCE_DIR}) +target_sources(minimp4 + PUBLIC + "${minimp4_SOURCE_DIR}/minimp4.h" + PRIVATE + minimp4.c +) diff --git a/thirdparty/minimp4.c b/thirdparty/minimp4.c new file mode 100644 index 0000000..909b403 --- /dev/null +++ b/thirdparty/minimp4.c @@ -0,0 +1,2 @@ +#define MINIMP4_IMPLEMENTATION +#include