#include #include #include #include #include #include #include #include #include #include #include #include #include "conv/common.hh" namespace param { using namespace ::args; ArgumentParser parser { "converter: video -> block probability matrix" }; HelpFlag help { parser, "help", "display this menu", {'h', "help"}, }; ValueFlag bw { parser, "128", "width of blocks (px)", {"block-w"}, 128, }; ValueFlag bh { parser, "128", "height of blocks (px)", {"block-h"}, 128, }; ValueFlag utime { parser, "10", "duration of each feature (frame)", {"utime"}, 10, }; ValueFlag bmw { parser, "16", "width of blockmatch region (px)", {"bm-w"}, 16, }; ValueFlag bmh { parser, "16", "height of blockmatch region (px)", {"bm-h"}, 16, }; ValueFlag bmsw { parser, "4", "width of blockmatch search region (px)", {"bm-sw"}, 4, }; ValueFlag bmsh { parser, "4", "height of blockmatch search region (px)", {"bm-sh"}, 4, }; enum Output { kProb, kLen, kVec, kNull, }; const std::unordered_map kOutput = { {"default", kProb}, {"prob", kProb}, {"len", kLen}, {"vec", kVec}, {"null", kNull}, }; MapFlag output { parser, "prob", "output type (len, vec, null)", {"output"}, kOutput, }; Positional vpath { parser, "path", "video file path", }; } // namespace param struct Frame { std::vector Y; std::vector U; std::vector V; int32_t w, h; int32_t hw, hh; Frame() = default; Frame(uint8_t* yuv[3], const SBufferInfo& frame); }; struct Vec { double x, y; }; // utilities static void CopyNal(std::vector&, const uint8_t* buf, size_t sz) noexcept; static Vec BlockMatching(const Frame& cf, const Frame& pf, int32_t bx, int32_t by) { const auto bmw = args::get(param::bmw); const auto bmh = args::get(param::bmh); const auto bmsw = args::get(param::bmsw); const auto bmsh = args::get(param::bmsh); int32_t min_sx = 0, min_sy = 0; double min_score = 1e+100; // INF for (int32_t sy = -bmsh; sy < bmsh; ++sy) { for (int32_t sx = -bmsw; sx < bmsw; ++sx) { double score = 0; for (int32_t y = 0; y < bmw; ++y) { for (int32_t x = 0; x < bmh; ++x) { const auto c_off = (bx+x) + (by+y)*cf.w; const auto p_off = (bx+x+sx) + (by+y+sy)*cf.w; const auto diff = static_cast(cf.Y[c_off] - pf.Y[p_off]); score += diff*diff; } } if (score < min_score) { min_score = score; min_sx = sx; min_sy = sy; } } } const auto sxf = static_cast(min_sx) / static_cast(bmsw); const auto syf = static_cast(min_sy) / static_cast(bmsh); return { .x = sxf, .y = syf, }; } static double EachBlock(const Frame& cf, const Frame& pf, int32_t bx, int32_t by) { const auto v = BlockMatching(cf, pf, bx, by); const auto len = std::sqrt(v.x*v.x + v.y*v.y); switch (args::get(param::output)) { case param::kLen: std::cout << len << '\n'; break; case param::kVec: std::cout << bx << " " << by << " " << v.x << " " << v.y << '\n'; break; default: break; } return len; } static void EachFrame(const Frame& cf, const Frame& pf) { const auto bw = args::get(param::bw); const auto bh = args::get(param::bw); const auto ut = args::get(param::utime); Enforce(cf.w == pf.w && cf.h == pf.h, "variable frame size is not allowed"); Enforce(cf.w > bw && cf.h > bh, "block size must be less than frame size"); static size_t cnt = 0; static std::vector probs; if (cnt%ut == 0) { probs.clear(); probs.resize((cf.w/bw) * (cf.h/bh)); } double* prob = probs.data(); for (int32_t by = 0; by+bh < cf.h; by+=bh) { for (int32_t bx = 0; bx+bw < cf.w; bx+=bw) { *(prob++) += EachBlock(cf, pf, bx, by); } } switch (args::get(param::output)) { case param::kLen: case param::kVec: std::cout << std::endl; break; case param::kProb: if ((cnt+1)%ut == 0) { for (const auto prob : probs) { std::cout << prob/(ut-1)/std::sqrt(2) << ' '; } std::cout << std::endl; } break; default: break; } ++cnt; } static void Exec() { const auto bw = args::get(param::bw); const auto bh = args::get(param::bw); const auto ut = args::get(param::utime); Enforce(bw > 0 && bh > 0, "block size must be greater than 0"); Enforce(ut > 0, "utime must be greater than 0"); const auto bmw = args::get(param::bmw); const auto bmh = args::get(param::bmh); const auto bmsw = args::get(param::bmw); const auto bmsh = args::get(param::bmh); Enforce(bmw > 0 && bmh > 0, "block matching region size must be greater than 0"); Enforce(bmsw > 0 && bmsh > 0, "block matching search region size must be greater than 0"); // open video stream const auto vpath = args::get(param::vpath); std::ifstream vst {vpath.c_str(), std::ifstream::binary | std::ifstream::ate}; Enforce(!!vst, "video stream is invalid"); const auto vsz = vst.tellg(); // init decoder ISVCDecoder* dec; WelsCreateDecoder(&dec); SDecodingParam decp = {}; decp.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT; decp.eEcActiveIdc = ERROR_CON_SLICE_COPY; dec->Initialize(&decp); int declv = WELS_LOG_INFO; dec->SetOption(DECODER_OPTION_TRACE_LEVEL, &declv); uint8_t* yuv[3] = {0}; SBufferInfo frame = {}; // demux MP4D_demux_t dem = {}; MP4D_open(&dem, [](int64_t off, void* buf, size_t sz, void* ptr) { auto& vst = *reinterpret_cast(ptr); vst.seekg(off); Enforce(!!vst, "seek failure"); vst.read(reinterpret_cast(buf), sz); Enforce(!!vst, "read failure"); return 0; }, &vst, vsz); // find video track int ti; for (ti = 0; ti < dem.track_count; ++ti) { const auto& t = dem.track[ti]; if (t.handler_type == MP4D_HANDLER_TYPE_VIDE) { break; } } Enforce(ti < dem.track_count, "no video track"); const auto& t = dem.track[ti]; // consume SPS std::vector nal; for (size_t si = 0;; ++si) { int sz; auto sps = reinterpret_cast(MP4D_read_sps(&dem, ti, si, &sz)); if (!sps) break; CopyNal(nal, sps, sz); const auto ret = dec->DecodeFrameNoDelay(nal.data(), nal.size(), yuv, &frame); Enforce(ret == 0, "SPS decode failure"); } // consume PPS for (size_t si = 0;; ++si) { int sz; auto pps = reinterpret_cast(MP4D_read_pps(&dem, ti, si, &sz)); if (!pps) break; CopyNal(nal, pps, sz); const auto ret = dec->DecodeFrameNoDelay(nal.data(), nal.size(), yuv, &frame); Enforce(ret == 0, "PPS decode failure"); } // decode frame Frame pf = {}; size_t fidx = 0; for (size_t si = 0; si < t.sample_count; ++si) { unsigned fsz, time, dur; const auto off = MP4D_frame_offset(&dem, ti, si, &fsz, &time, &dur); vst.seekg(off); Enforce(!!vst, "NAL seek failure"); nal.resize(fsz); vst.read(reinterpret_cast(nal.data()), fsz); Enforce(!!vst, "NAL read failure"); for (size_t i = 0; i < nal.size();) { uint32_t sz = (nal[i] << 24) | (nal[i+1] << 16) | (nal[i+2] << 8) | nal[i+3]; nal[i+0] = 0; nal[i+1] = 0; nal[i+2] = 0; nal[i+3] = 1; sz += 4; const auto ret = dec->DecodeFrameNoDelay(&nal[i], fsz, yuv, &frame); Enforce(ret == 0, "frame decode failure"); Frame cf = {yuv, frame}; if (fidx%ut > 0) { EachFrame(cf, pf); } pf = std::move(cf); ++fidx; i += sz; } } } int main(int argc, char** argv) try { param::parser.ParseCLI(argc, argv); Exec(); return EXIT_SUCCESS; } catch (const args::Help&) { std::cout << param::parser << std::endl; return EXIT_SUCCESS; } catch (const std::exception& e) { std::cerr << e.what() << std::endl; return EXIT_FAILURE; } Frame::Frame(uint8_t* yuv[3], const SBufferInfo& frame) { w = static_cast(frame.UsrData.sSystemBuffer.iWidth); h = static_cast(frame.UsrData.sSystemBuffer.iHeight); hw = w/2; hh = h/2; const auto ystride = static_cast(frame.UsrData.sSystemBuffer.iStride[0]); const auto uvstride = static_cast(frame.UsrData.sSystemBuffer.iStride[1]); Y.resize(w*h); for (int32_t y = 0; y < h; ++y) { const auto src = yuv[0] + y*ystride; const auto dst = Y.data() + y*w; std::memcpy(dst, src, w); } U.resize(hw*hh); V.resize(hw*hh); for (int32_t y = 0; y < hh; ++y) { const auto offset = y*uvstride; const auto srcu = yuv[0] + y*uvstride; const auto srcv = yuv[1] + y*uvstride; const auto dstu = U.data() + y*hw; const auto dstv = V.data() + y*hw; std::memcpy(dstu, srcu, hw); std::memcpy(dstv, srcv, hw); } } static void CopyNal(std::vector& v, const uint8_t* buf, size_t sz) noexcept { v.resize(sz+4); v[0] = 0; v[1] = 0; v[2] = 0; v[3] = 1; std::memcpy(&v[4], buf, sz); }