add VideoDecoder/Encoder for blocky CLI tool

2022-07-04 17:50:55 +09:00 · 2022-07-04 17:50:55 +09:00 · c9f19b960f
commit c9f19b960f
parent b88b2fe47e
4 changed files with 499 additions and 235 deletions
--- a/blocky/CMakeLists.txt
+++ b/blocky/CMakeLists.txt
@ -7,9 +7,13 @@ target_sources(blocky
    bytes.hh
    common.hh
    video_encoder.hh
    video_decoder.hh
 )
 target_link_libraries(blocky
  PUBLIC
    args
    liblocky
    minimp4
    openh264
 )
--- a/blocky/main.cc
+++ b/blocky/main.cc
@ -2,6 +2,7 @@
 #include <exception>
 #include <iomanip>
 #include <iostream>
 #include <optional>
 #include <tuple>
 #include "common.hh"
@ -10,267 +11,278 @@
 #include <args.hxx>
 #include "video_decoder.hh"
 #include "video_encoder.hh"
 using namespace blky;
 int main(int argc, char** argv) {
  args::ArgumentParser parser(
      "liblocky command line tool",
      "liblocky allow you to embed bits into video data secretly");
-  args::MapFlag<std::string, DataFlow> from {
+args::ArgumentParser parser(
-      parser, kDataFlowList, "input layer specifier", {"from"}, kDataFlowMap, args::Options::Required};
+    "liblocky command line tool",
    "liblocky allow you to embed a bit array into video data secretly");
-  args::MapFlag<std::string, DataFlow> to {
+args::MapFlag<std::string, DataFlow> from {
-      parser, kDataFlowList, "output layer specifier", {"to"}, kDataFlowMap, args::Options::Required};
+    parser, kDataFlowList, "input layer specifier", {"from"}, kDataFlowMap, args::Options::Required};
-  args::Group src_group {
+args::MapFlag<std::string, DataFlow> to {
-    parser, "source specifier", args::Group::Validators::Xor, args::Options::Required};
+    parser, kDataFlowList, "output layer specifier", {"to"}, kDataFlowMap, args::Options::Required};
  args::Flag src_stdin {src_group, "src-stdin", "read from stdin", {"src-stdin", "stdin"}};
  args::Flag src_stdin_hex {src_group, "src-stdin-hex", "read hex text from stdin", {"src-stdin-hex", "stdin-hex"}};
-  args::Group dst_group  {
+args::Group src_group {
-    parser, "destination specifier", args::Group::Validators::Xor, args::Options::Required};
+  parser, "source specifier", args::Group::Validators::Xor, args::Options::Required};
-  args::Flag dst_stdout {dst_group, "dst-stdout", "write to stdout", {"dst-stdout", "stdout"}};
+args::Flag src_stdin {src_group, "src-stdin", "read from stdin", {"src-stdin", "stdin"}};
-  args::Flag dst_stdout_hex {dst_group, "dst-stdout-hex", "write to stdout as hex text", {"dst-stdout-hex", "stdout-hex"}};
+args::Flag src_stdin_hex {src_group, "src-stdin-hex", "read hex text from stdin", {"src-stdin-hex", "stdin-hex"}};
 args::ValueFlag<std::string> src_video {src_group, "path", "video input", {"src-video"}};
-  args::Group param_group {
+args::Group dst_group  {
-    parser, "general parameters", args::Group::Validators::DontCare
+  parser, "destination specifier", args::Group::Validators::Xor, args::Options::Required};
-  };
+args::Flag dst_stdout {dst_group, "dst-stdout", "write to stdout", {"dst-stdout", "stdout"}};
-  args::ValueFlag<std::tuple<uint32_t, uint32_t>> param_block_num {
+args::Flag dst_stdout_hex {dst_group, "dst-stdout-hex", "write to stdout as hex text", {"dst-stdout-hex", "stdout-hex"}};
-    param_group,
+args::ValueFlag<std::string> dst_video {dst_group, "path", "video output", {"dst-video"}};
    "int>0,int>0",
    "number of features",
    {"feature-num"},
    {16, 16}
  };
  args::ValueFlag<uint32_t> param_block_first {
    param_group,
    "int>=0",
    "an index of first block where feature will be embedded. used when encoding",
    {"feature-first-index"},
    0
  };
  args::ValueFlag<uint8_t> param_feat_bits {
    param_group,
    "int>0",
    "number of bits that can be represented by a single feature",
    {"feature-bits"},
    1
  };
  args::ValueFlag<uint8_t> param_seed {
    param_group,
    "int>0",
    "seed number for hopping randomization",
    {"seed"},
    123
  };
-  args::Group probgen_group {
+args::Group param_group {
-    parser, "params for feature probability generator", args::Group::Validators::DontCare
+  parser, "general parameters", args::Group::Validators::DontCare
-  };
+};
-  args::ValueFlag<double> probgen_false_positive {
+args::ValueFlag<std::tuple<uint32_t, uint32_t>> param_block_num {
-    probgen_group,
+  param_group,
-    "0<=double<=1",
+  "int>0,int>0",
-    "false positive ratio in feature probability generation",
+  "number of features",
-    {"probgen-false-positive"},
+  {"feature-num"},
-    0
+  {16, 16}
-  };
+};
-  args::ValueFlag<double> probgen_false_negative {
+args::ValueFlag<uint32_t> param_block_first {
-    probgen_group,
+  param_group,
-    "0<=double<=1",
+  "int>=0",
-    "false negative ratio in feature probability generation",
+  "an index of first block where feature will be embedded. used when encoding",
-    {"probgen-false-negative"},
+  {"feature-first-index"},
-    0
+  0
-  };
+};
-  args::ValueFlag<uint64_t> probgen_seed {
+args::ValueFlag<uint8_t> param_feat_bits {
-    probgen_group,
+  param_group,
-    "int>0",
+  "int>0",
-    "random seed",
+  "number of bits that can be represented by a single feature",
-    {"probgen-seed"},
+  {"feature-bits"},
-    1
+  1
-  };
+};
-  args::Flag probgen_normalize {
+args::ValueFlag<uint8_t> param_seed {
-    probgen_group,
+  param_group,
-    "probgen-normalize",
+  "int>0",
-    "normalize probabilities",
+  "seed number for hopping randomization",
-    {"probgen-normalize"},
+  {"seed"},
-  };
+  123
 };
-  try {
+args::Group probgen_group {
-    parser.ParseCLI(argc, argv);
+  parser, "params for feature probability generator", args::Group::Validators::DontCare
 };
 args::ValueFlag<double> probgen_false_positive {
  probgen_group,
  "0<=double<=1",
  "false positive ratio in feature probability generation",
  {"probgen-false-positive"},
  0
 };
 args::ValueFlag<double> probgen_false_negative {
  probgen_group,
  "0<=double<=1",
  "false negative ratio in feature probability generation",
  {"probgen-false-negative"},
  0
 };
 args::ValueFlag<uint64_t> probgen_seed {
  probgen_group,
  "int>0",
  "random seed",
  {"probgen-seed"},
  1
 };
 args::Flag probgen_normalize {
  probgen_group,
  "probgen-normalize",
  "normalize probabilities",
  {"probgen-normalize"},
 };
    std::vector<uint8_t>  bytes;
    std::vector<uint32_t> features;
    std::vector<double>   feature_probs;
-    // read input
+std::vector<uint8_t>        bytes;
 std::vector<uint32_t>       features;
 std::vector<double>         feature_probs;
 std::optional<VideoDecoder> decoder;
 int main(int argc, char** argv)
 try {
  parser.ParseCLI(argc, argv);
  // read input
  switch (args::get(from)) {
  case kBytes:
    if (src_stdin) {
      std::string temp;
      std::cin >> temp;
      bytes = {temp.begin(), temp.end()};
    } else if (src_stdin_hex) {
      for (;;) {
        char buf[2];
        std::cin >> buf[0] >> buf[1];
        if (std::cin.eof()) break;
        bytes.push_back(ToHex(buf[0]) << 4 | ToHex(buf[1]));
      }
    } else {
      throw std::runtime_error {"invalid source format for bytes"};
    }
    break;
  case kFeatures:
    if (src_stdin) {
      features = ReadAll<uint32_t>(std::cin);
    } else {
      throw std::runtime_error {"invalid source format for features"};
    }
    break;
  case kFeatureProbs:
    if (src_stdin) {
      feature_probs = ReadAll<double>(std::cin);
    } else {
      throw std::runtime_error {"invalid source format for feature probs"};
    }
    break;
  case kVideo:
    if (src_video) {
      decoder.emplace(args::get(src_video));
    } else {
      throw std::runtime_error {"invalid source format for video"};
    }
  }
  if (args::get(from) < args::get(to)) {
    // execute encoding
    switch (args::get(from)) {
    case kBytes:
-      if (src_stdin) {
+      if (args::get(to) == kBytes) break;
-        std::string temp;
+      features = BytesEncoder(
-        std::cin >> temp;
+          bytes,
-        bytes = {temp.begin(), temp.end()};
+          args::get(param_block_num),
-      } else if (src_stdin_hex) {
+          args::get(param_feat_bits),
-        for (;;) {
+          args::get(param_block_first),
-          char buf[2];
+          args::get(param_seed));
-          std::cin >> buf[0] >> buf[1];
+      /* fallthrough */
          if (std::cin.eof()) break;
          bytes.push_back(ToHex(buf[0]) << 4 | ToHex(buf[1]));
        }
      } else {
        throw std::runtime_error {"invalid source format for bytes"};
      }
      break;
    case kFeatures:
-      if (src_stdin) {
+      if (args::get(to) == kFeatures) break;
-        features = ReadAll<uint32_t>(std::cin);
+      if (args::get(to) == kFeatureProbs) {
-      } else {
+        feature_probs = GenerateFeatureProbs(
        throw std::runtime_error {"invalid source format for features"};
      }
      break;
    case kFeatureProbs:
      if (src_stdin) {
        feature_probs = ReadAll<double>(std::cin);
      } else {
        throw std::runtime_error {"invalid source format for feature probs"};
      }
      break;
    case kVideo:
      assert(false);
    }
    if (args::get(from) < args::get(to)) {
      // execute encoding
      switch (args::get(from)) {
      case kBytes:
        if (args::get(to) == kBytes) break;
        features = BytesEncoder(
            bytes,
            args::get(param_block_num),
            args::get(param_feat_bits),
            args::get(param_block_first),
            args::get(param_seed));
        /* fallthrough */
      case kFeatures:
        if (args::get(to) == kFeatures) break;
        if (args::get(to) == kFeatureProbs) {
          feature_probs = GenerateFeatureProbs(
              features,
              args::get(param_block_num),
              args::get(probgen_false_positive),
              args::get(probgen_false_negative),
              args::get(probgen_seed),
              probgen_normalize);
          break;
        }
        // TODO embed into video
        assert(false);
        /* fallthrough */
      case kVideo:
        if (args::get(to) == kVideo) break;
        assert(false);
      case kFeatureProbs:
        throw std::runtime_error("couldn't start flow from the data");
      }
    } else if (args::get(from) > args::get(to)) {
      // execute decoding
      switch (args::get(from)) {
      case kVideo:
        if (args::get(to) == kVideo) break;
        // TODO extract feature probs // features = XX
        assert(false);
      case kFeatureProbs:
        if (args::get(to) == kFeatureProbs) break;
        features = PathfindFeatures(
            feature_probs,
            args::get(param_block_num),
            args::get(param_feat_bits),
            args::get(param_seed));
        /* fallthrough */
      case kFeatures:
        if (args::get(to) == kFeatures) break;
        bytes = BytesDecoder(
            features,
            args::get(param_block_num),
-            args::get(param_feat_bits),
+            args::get(probgen_false_positive),
-            args::get(param_seed));
+            args::get(probgen_false_negative),
-        /* fallthrough */
+            args::get(probgen_seed),
-
+            probgen_normalize);
-      case kBytes:
+        break;
        if (args::get(to) == kBytes) break;
        assert(false);
      }
-    }
+      // TODO embed into video
-
+      assert(false);
-    // output
+      /* fallthrough */
    switch (args::get(to)) {
    case kBytes:
      if (dst_stdout) {
        std::cout << std::string {bytes.begin(), bytes.end()} << std::endl;
      } else if (dst_stdout_hex) {
        for (auto c : bytes) {
          std::cout << std::hex << (int) c;
        }
        std::cout << std::endl;
      } else {
        throw std::runtime_error {"invalid destination format for bytes"};
      }
      break;
    case kFeatures:
      if (dst_stdout) {
        for (auto& f : features) std::cout << f << "\n";
      } else {
        throw std::runtime_error {"invalid destination format for features"};
      }
      break;
    case kFeatureProbs:
      if (dst_stdout) {
        const auto size = args::get(param_block_num);
        const auto cols = std::get<0>(size) * std::get<1>(size);
        for (size_t i = 0; i < feature_probs.size();) {
          for (size_t j = 0; i < feature_probs.size() && j < cols; ++i, ++j) {
            std::cout << feature_probs[i] << " ";
          }
          std::cout << "\n";
        }
      } else {
        throw std::runtime_error {"invalid destination format for feature probs"};
      }
      break;
    case kVideo:
-      break;
+      if (args::get(to) == kVideo) break;
      assert(false);
    case kFeatureProbs:
      throw std::runtime_error("couldn't start flow from the data");
    }
-  } catch (const args::Help&) {
+  } else if (args::get(from) > args::get(to)) {
-    std::cout << parser << std::endl;
+    // execute decoding
-    return 0;
+    switch (args::get(from)) {
    case kVideo:
      if (args::get(to) == kVideo) break;
      // TODO extract feature probs // features = XX
      assert(false);
-  } catch (const args::ParseError& e) {
+    case kFeatureProbs:
-    std::cerr << e.what() << std::endl;
+      if (args::get(to) == kFeatureProbs) break;
-    std::cerr << parser   << std::endl;
+      features = PathfindFeatures(
-    return 1;
+          feature_probs,
          args::get(param_block_num),
          args::get(param_feat_bits),
          args::get(param_seed));
      /* fallthrough */
-  } catch (const args::ValidationError& e) {
+    case kFeatures:
-    std::cerr << e.what() << std::endl;
+      if (args::get(to) == kFeatures) break;
-    std::cerr << parser   << std::endl;
+      bytes = BytesDecoder(
-    return 1;
+          features,
          args::get(param_block_num),
          args::get(param_feat_bits),
          args::get(param_seed));
      /* fallthrough */
-  } catch (const std::runtime_error& e) {
+    case kBytes:
-    std::cerr << "runtime error: " << e.what() << std::endl;
+      if (args::get(to) == kBytes) break;
-    return 1;
+      assert(false);
    }
  }
  // output
  switch (args::get(to)) {
  case kBytes:
    if (dst_stdout) {
      std::cout << std::string {bytes.begin(), bytes.end()} << std::endl;
    } else if (dst_stdout_hex) {
      for (auto c : bytes) {
        std::cout << std::hex << (int) c;
      }
      std::cout << std::endl;
    } else {
      throw std::runtime_error {"invalid destination format for bytes"};
    }
    break;
  case kFeatures:
    if (dst_stdout) {
      for (auto& f : features) std::cout << f << "\n";
    } else {
      throw std::runtime_error {"invalid destination format for features"};
    }
    break;
  case kFeatureProbs:
    if (dst_stdout) {
      const auto size = args::get(param_block_num);
      const auto cols = std::get<0>(size) * std::get<1>(size);
      for (size_t i = 0; i < feature_probs.size();) {
        for (size_t j = 0; i < feature_probs.size() && j < cols; ++i, ++j) {
          std::cout << feature_probs[i] << " ";
        }
        std::cout << "\n";
      }
    } else {
      throw std::runtime_error {"invalid destination format for feature probs"};
    }
    break;
  case kVideo:
    break;
  }
  return 0;
 } catch (const args::Help&) {
  std::cout << parser << std::endl;
  return 0;
 } catch (const args::ParseError& e) {
  std::cerr << e.what() << std::endl;
  std::cerr << parser   << std::endl;
  return 1;
 } catch (const args::ValidationError& e) {
  std::cerr << e.what() << std::endl;
  std::cerr << parser   << std::endl;
  return 1;
 } catch (const std::runtime_error& e) {
  std::cerr << "runtime error: " << e.what() << std::endl;
  return 1;
 }
--- a/blocky/video_decoder.hh
+++ b/blocky/video_decoder.hh
@ -0,0 +1,169 @@
 #pragma once
 #include <cassert>
 #include <cstdint>
 #include <cstring>
 #include <exception>
 #include <fstream>
 #include <string_view>
 #include <vector>
 #include <minimp4.h>
 #include <wels/codec_api.h>
 class VideoDecoder final {
 public:
  VideoDecoder() = delete;
  VideoDecoder(const std::string& path) :
      file_(path, std::ifstream::binary | std::ifstream::ate),
      size_(static_cast<size_t>(file_.tellg())) {
    // init objects
    SDecodingParam dparam = {};
    dparam.sVideoProperty.eVideoBsType = VIDEO_BITSTREAM_DEFAULT;
    dparam.eEcActiveIdc = ERROR_CON_SLICE_COPY;
    WelsCreateDecoder(&decoder_);
    decoder_->Initialize(&dparam);
    int lv = WELS_LOG_DEBUG;
    decoder_->SetOption(DECODER_OPTION_TRACE_LEVEL, &lv);
    demuxer_ = {};
    MP4D_open(&demuxer_, ReadCallback, this, static_cast<int64_t>(size_));
    // find video track
    track_ = SIZE_MAX;
    for (size_t i = 0; i < demuxer_.track_count; ++i) {
      if (demuxer_.track[i].handler_type == MP4D_HANDLER_TYPE_VIDE) {
        if (track_ != SIZE_MAX) {
          throw std::runtime_error {"there are many video tracks"};
        }
        track_ = i;
      }
    }
    if (track_ == SIZE_MAX) {
      throw std::runtime_error {"there is no video track"};
    }
    // setup decoder
    std::vector<uint8_t> temp_;
    for (size_t i = 0;; ++i) {
      int size;
      auto sps = static_cast<const uint8_t*>(MP4D_read_sps(
              &demuxer_,
              static_cast<unsigned int>(track_),
              static_cast<int>(i),
              &size));
      if (!sps) break;
      temp_.resize(static_cast<size_t>(4+size));
      temp_[0] = 0, temp_[1] = 0, temp_[2] = 0, temp_[3] = 1;
      std::memcpy(&temp_[4], sps, static_cast<size_t>(size));
      if (decoder_->DecodeFrameNoDelay(temp_.data(), static_cast<int>(temp_.size()), yuv_, &frame_)) {
        throw std::runtime_error {"failed to decode SPS"};
      }
    }
    for (size_t i = 0;; ++i) {
      int size;
      auto pps = static_cast<const uint8_t*>(MP4D_read_pps(
              &demuxer_,
              static_cast<unsigned int>(track_),
              static_cast<int>(i),
              &size));
      if (!pps) break;
      temp_.resize(static_cast<size_t>(4+size));
      temp_[0] = 0, temp_[1] = 0, temp_[2] = 0, temp_[3] = 1;
      std::memcpy(&temp_[4], pps, static_cast<size_t>(size));
      if (decoder_->DecodeFrameNoDelay(temp_.data(), static_cast<int>(temp_.size()), yuv_, &frame_)) {
        throw std::runtime_error {"failed to decode SPS"};
      }
    }
    temp_.clear();
  }
  ~VideoDecoder() noexcept {
    decoder_->Uninitialize();
    WelsDestroyDecoder(decoder_);
    MP4D_close(&demuxer_);
  }
  VideoDecoder(const VideoDecoder&) = delete;
  VideoDecoder(VideoDecoder&&) = delete;
  VideoDecoder& operator=(const VideoDecoder&) = delete;
  VideoDecoder& operator=(VideoDecoder&&) = delete;
  void Decode() {
    if (temp_consumed_ >= temp_.size()) {
      unsigned size, time, dur;
      const auto off = MP4D_frame_offset(
          &demuxer_,
          static_cast<unsigned int>(track_),
          static_cast<unsigned int>(count_),
          &size, &time, &dur);
      assert(size > 0);
      temp_.resize(size);
      temp_consumed_ = 0;
      file_.seekg(static_cast<std::streamoff>(off));
      assert(file_);
      file_.read((char*) temp_.data(), size);
      assert(file_);
      Decode();
    } else {
      auto& i = temp_consumed_;
      const uint32_t nal_size = 4 +
          static_cast<uint32_t>((temp_[i+0] << 24) |
                                (temp_[i+1] << 16) |
                                (temp_[i+2] <<  8) |
                                (temp_[i+3] <<  0));
      temp_[i  ] = 0;
      temp_[i+1] = 0;
      temp_[i+2] = 0;
      temp_[i+3] = 1;
      if (decoder_->DecodeFrameNoDelay(temp_.data(), static_cast<int>(nal_size), yuv_, &frame_)) {
        throw std::runtime_error {"failed to decode a frame"};
      }
      i += nal_size;
    }
  }
 private:
  std::ifstream file_;
  size_t size_;
  ISVCDecoder* decoder_;
  MP4D_demux_t demuxer_;
  size_t track_;
  uint8_t*    yuv_[3] = {0};
  SBufferInfo frame_  = {};
  size_t      count_  = 0;
  size_t temp_consumed_ = 0;
  std::vector<uint8_t> temp_;
  static int ReadCallback(int64_t off, void* buf, size_t size, void* ptr) noexcept {
    auto self = (VideoDecoder*) ptr;
    auto n = self->size_ - static_cast<size_t>(off) - size;
    if (size < n) n = size;
    self->file_.seekg(off);
    assert(self->file_);
    self->file_.read((char*) buf, static_cast<std::streamsize>(n));
    assert(self->file_);
    return 0;
  }
 };
--- a/blocky/video_encoder.hh
+++ b/blocky/video_encoder.hh
@ -0,0 +1,79 @@
 #pragma once
 #include <cassert>
 #include <cstdint>
 #include <cstring>
 #include <exception>
 #include <fstream>
 #include <string>
 #include <minimp4.h>
 #include <wels/codec_api.h>
 class VideoEncoder final {
 public:
  VideoEncoder() = delete;
  VideoEncoder(const std::string& path, const SEncParamBase& p) :
      file_(path, std::ofstream::binary), fps_(p.fMaxFrameRate) {
    if (WelsCreateSVCEncoder(&encoder_)) {
      throw std::runtime_error {"failed to init openh264 encoder"};
    }
    muxer_ = MP4E_open(false, false, this, WriteCallback);
    if (MP4E_STATUS_OK != mp4_h26x_write_init(&writer_, muxer_, p.iPicWidth, p.iPicHeight, false)) {
      throw std::runtime_error {"failed to init h26x writer"};
    }
    encoder_->Initialize(&p);
    int lv = WELS_LOG_DEBUG;
    encoder_->SetOption(ENCODER_OPTION_TRACE_LEVEL, &lv);
    int fmt = videoFormatI420;
    encoder_->SetOption(ENCODER_OPTION_DATAFORMAT, &fmt);
  }
  ~VideoEncoder() noexcept {
    encoder_->Uninitialize();
    WelsDestroySVCEncoder(encoder_);
    MP4E_close(muxer_);
    mp4_h26x_write_close(&writer_);
  }
  void Encode(const SSourcePicture& pic) {
    SFrameBSInfo info;
    if (cmResultSuccess != encoder_->EncodeFrame(&pic, &info)) {
      throw std::runtime_error {"failed to encode a frame"};
    }
    if (info.eFrameType == videoFrameTypeSkip) return;
    for (size_t i = 0; i < static_cast<size_t>(info.iLayerNum); ++i) {
      const auto& lay = info.sLayerInfo[i];
      uint8_t* buf = lay.pBsBuf;
      for (size_t j = 0; j < static_cast<size_t>(lay.iNalCount); ++j) {
        mp4_h26x_write_nal(&writer_, buf, lay.pNalLengthInByte[j], static_cast<unsigned int>(90000./fps_));
        buf += lay.pNalLengthInByte[j];
      }
    }
  }
 private:
  std::ofstream file_;
  ISVCEncoder*      encoder_;
  MP4E_mux_t*       muxer_;
  mp4_h26x_writer_t writer_;
  double fps_;
  static int WriteCallback(int64_t off, const void* buf, size_t size, void* ptr) noexcept {
    auto self = (VideoEncoder*) ptr;
    self->file_.seekp(off);
    assert(self->file_);
    self->file_.write(static_cast<const char*>(buf), static_cast<std::streamsize>(size));
    return !self->file_;
  }
 };