diff --git a/README.md b/README.md
index e302bab..95c2099 100644
--- a/README.md
+++ b/README.md
@@ -17,3 +17,7 @@ ffmpeg -i in.mp4 -vframes 300 "%d.png"
 ```
 ffmpeg -r 30 -i "%d.png" -vcodec libx264 -pix_fmt yuv420p out.mp4
 ```
+
+```
+ffmpeg -i src.mp4 -t 10s cut.mp4
+```
diff --git a/blocky/embed.hh b/blocky/embed.hh
new file mode 100644
index 0000000..af8712f
--- /dev/null
+++ b/blocky/embed.hh
@@ -0,0 +1,107 @@
+#pragma once
+
+#include <cassert>
+#include <cstring>
+#include <optional>
+
+#include "video_decoder.hh"
+#include "video_encoder.hh"
+
+
+void Embed(const std::vector<uint32_t>& features,
+           const std::string& dst_path,
+           const std::string& video_path,
+           const std::tuple<uint32_t, uint32_t>& div,
+           uint32_t                              utime) {
+  assert(features.size());
+
+  const uint32_t div_x = std::get<0>(div);
+  const uint32_t div_y = std::get<1>(div);
+
+  VideoDecoder dec {video_path};
+  std::optional<VideoEncoder> enc;
+
+  std::vector<uint8_t> Y;
+  std::vector<uint8_t> U;
+  std::vector<uint8_t> V;
+
+  uint32_t last_feat = UINT32_MAX;
+  std::vector<uint8_t> feat_pix;
+  for (uint32_t time = 0; dec.Decode();) {
+    const auto& src = dec.frame();
+    if (src.iBufferStatus != 1) continue;
+    ++time;
+
+    const uint32_t w = static_cast<uint32_t>(src.UsrData.sSystemBuffer.iWidth);
+    const uint32_t h = static_cast<uint32_t>(src.UsrData.sSystemBuffer.iHeight);
+
+    const uint32_t stride_y  = static_cast<uint32_t>(src.UsrData.sSystemBuffer.iStride[0]);
+    const uint32_t stride_uv = static_cast<uint32_t>(src.UsrData.sSystemBuffer.iStride[1]);
+
+    const uint8_t* const* srcp = src.pDst;
+
+    // copy buffer to modify
+    Y.resize(w*h);
+    U.resize(w*h/2/2);
+    V.resize(w*h/2/2);
+    for (uint32_t y = 0; y < h; ++y) {
+      std::memcpy(Y.data()+y*w, srcp[0]+stride_y*y, w);
+    }
+    for (uint32_t y = 0; y < h/2; ++y) {
+      std::memcpy(U.data()+y*(w/2), srcp[1]+stride_uv*y, w/2);
+      std::memcpy(V.data()+y*(w/2), srcp[2]+stride_uv*y, w/2);
+    }
+
+    // embed a feature to the buffer
+    const uint32_t feat = features[(time/utime)%features.size()];
+
+    const uint32_t feat_x = feat%div_x;
+    const uint32_t feat_y = feat/div_x;
+
+    const uint32_t feat_size_x = w/div_x;
+    const uint32_t feat_size_y = h/div_y;
+
+    const uint32_t feat_offset_x = feat_x*feat_size_x;
+    const uint32_t feat_offset_y = feat_y*feat_size_y;
+
+    if (feat != last_feat) {
+      feat_pix.resize(feat_size_x*feat_size_y);
+      for (uint32_t y = 0; y < feat_size_y; ++y) {
+        const uint32_t ay = y+feat_offset_y;
+        std::memcpy(feat_pix.data()+(y/2)*(feat_size_x/2), U.data()+(ay/2)*(w/2)+feat_offset_x/2, feat_size_x/2);
+        std::memcpy(feat_pix.data()+(y/2)*(feat_size_x/2)+feat_pix.size()/2, V.data()+(ay/2)*(w/2)+feat_offset_x/2, feat_size_x/2);
+      }
+      last_feat = feat;
+    }
+    for (uint32_t y = 0; y < feat_size_y; ++y) {
+      const uint32_t ay = y+feat_offset_y;
+      std::memcpy(U.data()+(ay/2)*(w/2)+feat_offset_x/2, feat_pix.data()+(y/2)*(feat_size_x/2), feat_size_x/2);
+      std::memcpy(V.data()+(ay/2)*(w/2)+feat_offset_x/2, feat_pix.data()+(y/2)*(feat_size_x/2)+feat_pix.size()/2, feat_size_x/2);
+    }
+
+    // create an encoder if not yet
+    if (!enc) {
+      SEncParamBase param = {};
+      param.iUsageType     = SCREEN_CONTENT_REAL_TIME;
+      param.iPicWidth      = static_cast<int>(w);
+      param.iPicHeight     = static_cast<int>(h);
+      param.fMaxFrameRate  = 30;
+      param.iTargetBitrate = 5000000;
+      enc.emplace(dst_path, param);
+    }
+
+    // encode
+    SSourcePicture dst = {};
+    dst.iColorFormat = videoFormatI420;
+    dst.pData[0]     = Y.data();
+    dst.pData[1]     = U.data();
+    dst.pData[2]     = V.data();
+    dst.iStride[0]   = static_cast<int>(w);
+    dst.iStride[1]   = static_cast<int>(w/2);
+    dst.iStride[2]   = static_cast<int>(w/2);
+    dst.iPicWidth    = static_cast<int>(w);
+    dst.iPicHeight   = static_cast<int>(h);
+    dst.uiTimeStamp  = static_cast<int64_t>(src.uiOutYuvTimeStamp);
+    enc->Encode(dst);
+  }
+}
diff --git a/blocky/main.cc b/blocky/main.cc
index 56ed901..37259d9 100644
--- a/blocky/main.cc
+++ b/blocky/main.cc
@@ -7,6 +7,7 @@
 
 #include "common.hh"
 #include "bytes.hh"
+#include "embed.hh"
 #include "features.hh"
 
 #include <args.hxx>
@@ -71,6 +72,19 @@ args::ValueFlag<uint8_t> param_seed {
   {"seed"},
   123
 };
+args::ValueFlag<std::string> param_video {
+  param_group,
+  "path",
+  "a video file where information is embed",
+  {"path"},
+};
+args::ValueFlag<uint32_t> param_utime {
+  param_group,
+  "int>0",
+  "a duration (milliseconds) of features",
+  {"utime"},
+  10
+};
 
 args::Group probgen_group {
   parser, "params for feature probability generator", args::Group::Validators::DontCare
@@ -125,7 +139,7 @@ try {
         char buf[2];
         std::cin >> buf[0] >> buf[1];
         if (std::cin.eof()) break;
-        bytes.push_back(ToHex(buf[0]) << 4 | ToHex(buf[1]));
+        bytes.push_back(static_cast<uint8_t>((ToHex(buf[0]) << 4) | ToHex(buf[1])));
       }
     } else {
       throw std::runtime_error {"invalid source format for bytes"};
@@ -181,8 +195,13 @@ try {
             probgen_normalize);
         break;
       }
-      // TODO embed into video
-      assert(false);
+      assert(dst_video);
+      Embed(
+          features,
+          args::get(dst_video),
+          args::get(param_video),
+          args::get(param_block_num),
+          args::get(param_utime));
       /* fallthrough */
 
     case kVideo:
diff --git a/blocky/video_decoder.hh b/blocky/video_decoder.hh
index 762dc5f..7e5becc 100644
--- a/blocky/video_decoder.hh
+++ b/blocky/video_decoder.hh
@@ -97,8 +97,12 @@ class VideoDecoder final {
   VideoDecoder& operator=(const VideoDecoder&) = delete;
   VideoDecoder& operator=(VideoDecoder&&) = delete;
 
-  void Decode() {
+  bool Decode() {
     if (temp_consumed_ >= temp_.size()) {
+      if (count_ >= demuxer_.track[track_].sample_count) {
+        return false;
+      }
+
       unsigned size, time, dur;
       const auto off = MP4D_frame_offset(
           &demuxer_,
@@ -116,6 +120,8 @@ class VideoDecoder final {
       assert(file_);
 
       Decode();
+      ++count_;
+      return true;
 
     } else {
       auto& i = temp_consumed_;
@@ -129,13 +135,17 @@ class VideoDecoder final {
       temp_[i+1] = 0;
       temp_[i+2] = 0;
       temp_[i+3] = 1;
-      if (decoder_->DecodeFrameNoDelay(temp_.data(), static_cast<int>(nal_size), yuv_, &frame_)) {
+      if (decoder_->DecodeFrameNoDelay(temp_.data()+i, static_cast<int>(nal_size), yuv_, &frame_)) {
         throw std::runtime_error {"failed to decode a frame"};
       }
       i += nal_size;
+      return true;
     }
   }
 
+  const SBufferInfo& frame() const noexcept { return frame_; }
+  const uint8_t* const* yuv() const noexcept { return yuv_; }
+
  private:
   std::ifstream file_;
   size_t size_;