diff --git a/blocky/bytes.hh b/blocky/bytes.hh index d589f96..95adcd9 100644 --- a/blocky/bytes.hh +++ b/blocky/bytes.hh @@ -9,18 +9,67 @@ namespace blky { -std::vector BytesEncoder( +std::vector BytesEncoder( const std::vector& bytes, - std::tuple& feats_num, + std::tuple& block_num, uint8_t feat_bits, - uint32_t first, + uint32_t block_idx, uint64_t seed) { - (void) bytes; - (void) feats_num; - (void) feat_bits; - (void) first; - (void) seed; - return {}; + const uint32_t block_num_all = std::get<0>(block_num)*std::get<1>(block_num); + if (block_num_all == 0) throw std::runtime_error {"block num is zero"}; + + blky_encoder_t enc = {}; + enc.block_num = block_num_all; + enc.feat_bits = feat_bits; + enc.block_index = block_idx; + enc.seed = seed; + + std::vector ret; + for (auto c : bytes) { + blky_encoder_feed(&enc, c); + + uint32_t feat; + while (blky_encoder_pop(&enc, &feat, false)) { + ret.push_back(feat); + } + } + + uint32_t feat; + if (blky_encoder_pop(&enc, &feat, true)) { + ret.push_back(feat); + } + return ret; +} + +std::vector BytesDecoder( + const std::vector& features, + std::tuple& block_num, + uint8_t feat_bits, + uint64_t seed) { + const uint32_t block_num_all = std::get<0>(block_num)*std::get<1>(block_num); + if (block_num_all == 0) throw std::runtime_error {"block num is zero"}; + + blky_decoder_t de = {}; + de.block_num = block_num_all; + de.feat_bits = feat_bits; + de.seed = seed; + + std::vector ret; + for (auto c : features) { + if (!blky_decoder_feed(&de, c)) { + throw std::runtime_error {"path corruption"}; + } + + uint8_t feat; + while (blky_decoder_pop(&de, &feat, false)) { + ret.push_back(feat); + } + } + uint8_t feat; + if (blky_decoder_pop(&de, &feat, true)) { + ret.push_back(feat); + } + return ret; } } // namespace blky diff --git a/blocky/common.hh b/blocky/common.hh index fa17d6b..9788a04 100644 --- a/blocky/common.hh +++ b/blocky/common.hh @@ -4,6 +4,7 @@ extern "C" { # include } +#include #include #include #include @@ -16,31 +17,33 @@ namespace blky { enum DataFlow { kBytes, kFeatures, + kFeatureProbs, kVideo, }; static inline const std::string kDataFlowList = "bytes/features/video"; static inline const std::unordered_map kDataFlowMap = { - {"bytes", kBytes}, - {"features", kFeatures}, - {"video", kVideo}, + {"bytes", kBytes}, + {"features", kFeatures}, + {"feature-probs", kFeatureProbs}, + {"video", kVideo}, }; -struct Feature final { - uint64_t index; - uint64_t begin; - uint64_t end; -}; - - -auto& operator<<(auto& ost, const Feature& feat) { - ost << feat.index << " " << feat.begin << " " << feat.end; - return ost; +template +std::vector ReadAll(auto& ist) noexcept { + std::vector ret; + for (;;) { + T v; + ist >> v; + if (ist.eof()) return ret; + ret.push_back(v); + } } -auto& operator>>(auto& ist, Feature& feat) { - ist >> feat.index >> feat.begin >> feat.end; - return ist; + +static uint8_t ToHex(char c) { + if (!std::isxdigit(c)) throw std::runtime_error {"not xdigit"}; + return static_cast(std::isalpha(c)? std::tolower(c)-'a'+0xA: c-'0'); } } // namespace blky diff --git a/blocky/features.hh b/blocky/features.hh new file mode 100644 index 0000000..91dd697 --- /dev/null +++ b/blocky/features.hh @@ -0,0 +1,105 @@ +#pragma once + +#include +#include +#include + +#include "common.hh" + + +namespace blky { + +std::vector PathfindFeatures( + const std::vector& probs, + std::tuple& block_num, + uint8_t feat_bits, + uint64_t seed) { + const uint32_t num = std::get<0>(block_num) * std::get<1>(block_num); + if (num == 0) throw std::runtime_error {"number of blocks is zero"}; + + const uint64_t dur = probs.size()/num; + if (probs.size() == 0) { + throw std::runtime_error {"size of probability matrix is empty"}; + } + if (dur*num != probs.size()) { + throw std::runtime_error {"size of probability matrix is mismatch"}; + } + + blky_pathfinder_t pf = {}; + pf.block_num = num; + pf.feat_bits = feat_bits; + pf.seed = seed; + + blky_pathfinder_init(&pf); + + for (uint64_t t = 0; t < dur; ++t) { + blky_pathfinder_feed(&pf, probs.data()+t*num); + } + assert(pf.step_last); + + uint32_t max_idx = 0; + double max_prob = -1; + for (uint32_t i = 0; i < num; ++i) { + if (max_prob < pf.probs[i]) { + max_prob = pf.probs[i]; + max_idx = i; + } + } + + std::vector ret; + blky_pathfinder_step_t* step = pf.step_last; + uint32_t idx = max_idx; + for (;;) { + ret.push_back(idx); + if (!step) break; + idx = step->indices[idx]; + step = step->prev; + } + std::reverse(ret.begin(), ret.end()); + + blky_pathfinder_deinit(&pf); + return ret; +} + +std::vector GenerateFeatureProbs( + const std::vector& features, + std::tuple& block_num, + double false_positive, + double false_negative, + uint64_t seed, + bool normalize) { + const uint32_t num = std::get<0>(block_num) * std::get<1>(block_num); + if (num == 0) throw std::runtime_error {"number of blocks is zero"}; + + std::vector ret; + for (auto c : features) { + const auto begin = ret.size(); + + double sum = 0; + for (uint32_t bi = 0; bi < num; ++bi) { + seed = blky_numeric_xorshift64(seed); + const double prob1 = static_cast(seed%1000)/1000.; + + seed = blky_numeric_xorshift64(seed); + const double prob2 = static_cast(seed%1000)/1000.; + + seed = blky_numeric_xorshift64(seed); + double fprob; + if ((c == bi && prob1 >= false_negative) || prob2 < false_positive) { + fprob = static_cast(seed%200)/1000. + .8; + } else { + fprob = static_cast(seed%800)/1000.; + } + ret.push_back(fprob); + sum += fprob; + } + if (normalize) { + for (size_t i = begin; i < ret.size(); ++i) { + ret[i] /= sum; + } + } + } + return ret; +} + +} // namespace blky diff --git a/blocky/main.cc b/blocky/main.cc index ef9f31b..0bce957 100644 --- a/blocky/main.cc +++ b/blocky/main.cc @@ -1,10 +1,12 @@ #include #include +#include #include #include #include "common.hh" #include "bytes.hh" +#include "features.hh" #include @@ -25,21 +27,30 @@ int main(int argc, char** argv) { args::Group src_group { parser, "source specifier", args::Group::Validators::Xor, args::Options::Required}; args::Flag src_stdin {src_group, "src-stdin", "read from stdin", {"src-stdin", "stdin"}}; + args::Flag src_stdin_hex {src_group, "src-stdin-hex", "read hex text from stdin", {"src-stdin-hex", "stdin-hex"}}; args::Group dst_group { parser, "destination specifier", args::Group::Validators::Xor, args::Options::Required}; args::Flag dst_stdout {dst_group, "dst-stdout", "write to stdout", {"dst-stdout", "stdout"}}; + args::Flag dst_stdout_hex {dst_group, "dst-stdout-hex", "write to stdout as hex text", {"dst-stdout-hex", "stdout-hex"}}; args::Group param_group { - parser, "processing parameters", args::Group::Validators::DontCare + parser, "general parameters", args::Group::Validators::DontCare }; - args::ValueFlag> param_feat_num { + args::ValueFlag> param_block_num { param_group, "int>0,int>0", "number of features", {"feature-num"}, {16, 16} }; + args::ValueFlag param_block_first { + param_group, + "int>=0", + "an index of first block where feature will be embedded. used when encoding", + {"feature-first-index"}, + 0 + }; args::ValueFlag param_feat_bits { param_group, "int>0", @@ -47,26 +58,51 @@ int main(int argc, char** argv) { {"feature-bits"}, 1 }; - args::ValueFlag param_feat_first { - param_group, - "int>=0", - "an index of first feature. used when encoding", - {"feature-first-index"}, - 0 - }; - args::ValueFlag param_feat_seed { + args::ValueFlag param_seed { param_group, "int>0", - "seed value for feature hopping", - {"feature-hopping-seed"}, + "seed number for hopping randomization", + {"seed"}, + 123 + }; + + args::Group probgen_group { + parser, "params for feature probability generator", args::Group::Validators::DontCare + }; + args::ValueFlag probgen_false_positive { + probgen_group, + "0<=double<=1", + "false positive ratio in feature probability generation", + {"probgen-false-positive"}, + 0 + }; + args::ValueFlag probgen_false_negative { + probgen_group, + "0<=double<=1", + "false negative ratio in feature probability generation", + {"probgen-false-negative"}, + 0 + }; + args::ValueFlag probgen_seed { + probgen_group, + "int>0", + "random seed", + {"probgen-seed"}, 1 }; + args::Flag probgen_normalize { + probgen_group, + "probgen-normalize", + "normalize probabilities", + {"probgen-normalize"}, + }; try { parser.ParseCLI(argc, argv); - std::vector bytes; - std::vector features; + std::vector bytes; + std::vector features; + std::vector feature_probs; // read input switch (args::get(from)) { @@ -75,6 +111,13 @@ int main(int argc, char** argv) { std::string temp; std::cin >> temp; bytes = {temp.begin(), temp.end()}; + } else if (src_stdin_hex) { + for (;;) { + char buf[2]; + std::cin >> buf[0] >> buf[1]; + if (std::cin.eof()) break; + bytes.push_back(ToHex(buf[0]) << 4 | ToHex(buf[1])); + } } else { throw std::runtime_error {"invalid source format for bytes"}; } @@ -82,12 +125,17 @@ int main(int argc, char** argv) { case kFeatures: if (src_stdin) { - while (!std::cin.eof()) { - features.push_back({}); - std::cin >> features.back(); - } + features = ReadAll(std::cin); } else { - throw std::runtime_error {"invalid source format for bytes"}; + throw std::runtime_error {"invalid source format for features"}; + } + break; + + case kFeatureProbs: + if (src_stdin) { + feature_probs = ReadAll(std::cin); + } else { + throw std::runtime_error {"invalid source format for feature probs"}; } break; @@ -102,20 +150,34 @@ int main(int argc, char** argv) { if (args::get(to) == kBytes) break; features = BytesEncoder( bytes, - args::get(param_feat_num), + args::get(param_block_num), args::get(param_feat_bits), - args::get(param_feat_first), - args::get(param_feat_seed)); + args::get(param_block_first), + args::get(param_seed)); /* fallthrough */ case kFeatures: if (args::get(to) == kFeatures) break; + if (args::get(to) == kFeatureProbs) { + feature_probs = GenerateFeatureProbs( + features, + args::get(param_block_num), + args::get(probgen_false_positive), + args::get(probgen_false_negative), + args::get(probgen_seed), + probgen_normalize); + break; + } // TODO embed into video assert(false); + /* fallthrough */ case kVideo: if (args::get(to) == kVideo) break; assert(false); + + case kFeatureProbs: + throw std::runtime_error("couldn't start flow from the data"); } } else if (args::get(from) > args::get(to)) { @@ -123,13 +185,25 @@ int main(int argc, char** argv) { switch (args::get(from)) { case kVideo: if (args::get(to) == kVideo) break; - // TODO extract features // features = XX + // TODO extract feature probs // features = XX assert(false); + case kFeatureProbs: + if (args::get(to) == kFeatureProbs) break; + features = PathfindFeatures( + feature_probs, + args::get(param_block_num), + args::get(param_feat_bits), + args::get(param_seed)); + /* fallthrough */ + case kFeatures: if (args::get(to) == kFeatures) break; - // TODO bytes = DataDecoder(features); - assert(false); + bytes = BytesDecoder( + features, + args::get(param_block_num), + args::get(param_feat_bits), + args::get(param_seed)); /* fallthrough */ case kBytes: @@ -143,10 +217,16 @@ int main(int argc, char** argv) { case kBytes: if (dst_stdout) { std::cout << std::string {bytes.begin(), bytes.end()} << std::endl; + } else if (dst_stdout_hex) { + for (auto c : bytes) { + std::cout << std::hex << (int) c; + } + std::cout << std::endl; } else { throw std::runtime_error {"invalid destination format for bytes"}; } break; + case kFeatures: if (dst_stdout) { for (auto& f : features) std::cout << f << "\n"; @@ -154,6 +234,22 @@ int main(int argc, char** argv) { throw std::runtime_error {"invalid destination format for features"}; } break; + + case kFeatureProbs: + if (dst_stdout) { + const auto size = args::get(param_block_num); + const auto cols = std::get<0>(size) * std::get<1>(size); + for (size_t i = 0; i < feature_probs.size();) { + for (size_t j = 0; i < feature_probs.size() && j < cols; ++i, ++j) { + std::cout << feature_probs[i] << " "; + } + std::cout << "\n"; + } + } else { + throw std::runtime_error {"invalid destination format for feature probs"}; + } + break; + case kVideo: break; } diff --git a/liblocky/CMakeLists.txt b/liblocky/CMakeLists.txt index 7742529..e4baa47 100644 --- a/liblocky/CMakeLists.txt +++ b/liblocky/CMakeLists.txt @@ -11,6 +11,8 @@ target_sources(liblocky liblocky.h PRIVATE block.c + decoder.c + encoder.c extractor.c image.c pathfinder.c diff --git a/liblocky/decoder.c b/liblocky/decoder.c new file mode 100644 index 0000000..f872a7b --- /dev/null +++ b/liblocky/decoder.c @@ -0,0 +1,51 @@ +#include "liblocky.h" + +#include + + +bool blky_decoder_feed(blky_decoder_t* de, uint32_t block_index) { + assert(de->block_num > 0); + assert(de->feat_bits > 0); + assert(de->seed > 0); + + const uint64_t seed = blky_numeric_xorshift64(de->seed); + + if (de->count++) { + const uint32_t feat_max = 1 << de->feat_bits; + assert(feat_max < de->block_num); + + uint32_t feat = 0; + for (; feat < feat_max; ++feat) { + if (blky_numeric_hop(de->block_index, feat, seed)%de->block_num == block_index) { + break; + } + } + if (feat >= feat_max) return false; + + assert(de->scrap_bits+8 <= 32); + de->scrap |= feat << de->scrap_bits; + de->scrap_bits += de->feat_bits; + } + + de->seed = seed; + de->block_index = block_index; + return true; +} + +bool blky_decoder_pop(blky_decoder_t* de, uint8_t* b, bool force) { + if (force) { + if (de->scrap_bits > 0) return false; + } else { + if (de->scrap_bits < 8) return false; + } + + *b = (uint8_t) (de->scrap & 0xFF); + + de->scrap >>= 8; + if (de->scrap_bits >= 8) { + de->scrap_bits -= 8; + } else { + de->scrap_bits = 0; + } + return true; +} diff --git a/liblocky/encoder.c b/liblocky/encoder.c new file mode 100644 index 0000000..c74d4de --- /dev/null +++ b/liblocky/encoder.c @@ -0,0 +1,41 @@ +#include "liblocky.h" + +#include +#include + + +void blky_encoder_feed(blky_encoder_t* enc, uint8_t data) { + assert(enc); + assert(enc->block_num > 0); + assert(enc->feat_bits > 0); + assert(enc->feat_bits <= sizeof(enc->scrap)*8); + assert(enc->seed > 0); + + if (enc->count++ == 0) enc->scrap_bits = 1; + + assert(enc->scrap_bits+8 <= 32); + enc->scrap |= data << enc->scrap_bits; + enc->scrap_bits += 8; +} + +bool blky_encoder_pop(blky_encoder_t* enc, uint32_t* feat, bool force) { + assert(enc); + assert(feat); + + if (force) { + if (enc->scrap_bits > 0) return false; + } else { + if (enc->scrap_bits < enc->feat_bits) return false; + } + + const uint32_t feat_max = 1 << enc->feat_bits; + assert(feat_max < enc->block_num); + + enc->seed = blky_numeric_xorshift64(enc->seed); + *feat = blky_numeric_hop(enc->block_index, enc->scrap%feat_max, enc->seed)%enc->block_num; + + enc->block_index = *feat; + enc->scrap >>= enc->feat_bits; + enc->scrap_bits -= enc->feat_bits; + return true; +} diff --git a/liblocky/liblocky.h b/liblocky/liblocky.h index 6fdd881..3eae0d3 100644 --- a/liblocky/liblocky.h +++ b/liblocky/liblocky.h @@ -97,22 +97,19 @@ blky_extractor_feed( const uint8_t* img, uint32_t w, uint32_t h, const double verts[8]); -/* ---- pathfinder ---- */ +/* ---- Pathfinder ---- */ typedef struct blky_pathfinder_step_t { - struct blky_pathfinder_step_t* next; + struct blky_pathfinder_step_t* prev; uint32_t indices[1]; } blky_pathfinder_step_t; typedef struct blky_pathfinder_t { // must be filled before init() uint32_t block_num; - uint32_t step_branch; + uint32_t feat_bits; uint64_t seed; - uint8_t hopping_algo; -# define BLKY_PATHFINDER_HOPPING_ALGO_XORSHFIT 0 // internal state - blky_pathfinder_step_t* step_first; blky_pathfinder_step_t* step_last; uint64_t steps; @@ -136,6 +133,58 @@ blky_pathfinder_feed( const double* probs); +/* ---- Encoder ---- + * converts byte to feature */ + +typedef struct blky_encoder_t { + uint32_t block_num; + uint32_t block_index; + uint8_t feat_bits; + uint64_t seed; + + uint64_t count; + uint32_t scrap; + uint8_t scrap_bits; +} blky_encoder_t; + +void +blky_encoder_feed( + blky_encoder_t* enc, + uint8_t data); + +bool +blky_encoder_pop( + blky_encoder_t* enc, + uint32_t* feat, + bool force); + + +/* ---- Decoder ---- + * converts block indices to byte */ + +typedef struct blky_decoder_t { + uint32_t block_num; + uint8_t feat_bits; + uint64_t seed; + + uint64_t count; + uint32_t block_index; + uint32_t scrap; + uint8_t scrap_bits; +} blky_decoder_t; + +bool +blky_decoder_feed( + blky_decoder_t* de, + uint32_t block_index); + +bool +blky_decoder_pop( + blky_decoder_t* de, + uint8_t* b, + bool force); + + /* ---- Image utility ---- */ void blky_image_convert_to_normalized_coord( const double verts[8], double* x, double* y); @@ -145,27 +194,13 @@ uint64_t blky_image_offset( /* ---- numeric utility ---- */ -static inline uint64_t blky_numeric_xorrshift64_rev(uint64_t x, uint8_t n) { - for (int16_t i = 64-n; i >= 0; --i) { - x ^= (1 << i) & (x >> n); - } - return x; -} -static inline uint64_t blky_numeric_xorlshift64_rev(uint64_t x, uint8_t n) { - for (uint8_t i = n; i < 64; ++i) { - x ^= (1 << i) & (x << n); - } - return x; -} static inline uint64_t blky_numeric_xorshift64(uint64_t x) { x ^= x << 13; x ^= x >> 7; x ^= x << 17; return x; } -static inline uint64_t blky_numeric_xorshift64_rev(uint64_t x) { - x = blky_numeric_xorlshift64_rev(x, 13); - x = blky_numeric_xorrshift64_rev(x, 7); - x = blky_numeric_xorlshift64_rev(x, 17); - return x; +static inline uint32_t blky_numeric_hop(uint32_t prev, uint32_t offset, uint64_t seed) { + seed = (seed^blky_numeric_xorshift64(prev+seed)) + (offset << 4); + return (uint32_t) ((seed & 0xFFFFFFFF) ^ (seed >> 32)); } diff --git a/liblocky/pathfinder.c b/liblocky/pathfinder.c index 56a1af6..5678fc2 100644 --- a/liblocky/pathfinder.c +++ b/liblocky/pathfinder.c @@ -5,14 +5,13 @@ void blky_pathfinder_init(blky_pathfinder_t* pf) { - assert(pf->block_num > 0); - assert(pf->step_branch > 0); - assert(pf->seed > 0); + assert(pf->block_num > 0); + assert(pf->feat_bits > 0); + assert(pf->feat_bits < 32); + assert(pf->seed > 0); - assert(pf->block_num >= pf->step_branch); - assert(pf->hopping_algo == BLKY_PATHFINDER_HOPPING_ALGO_XORSHFIT); + assert(pf->block_num > (uint32_t) (1 << pf->feat_bits)); - pf->step_first = NULL; pf->step_last = NULL; pf->step_bytes = @@ -29,40 +28,47 @@ void blky_pathfinder_init(blky_pathfinder_t* pf) { void blky_pathfinder_deinit(blky_pathfinder_t* pf) { free(pf->probs); free(pf->probs_prev); + + blky_pathfinder_step_t* step = pf->step_last; + while (step) { + blky_pathfinder_step_t* temp = step->prev; + free(step); + step = temp; + } } void blky_pathfinder_feed(blky_pathfinder_t* pf, const double* probs) { + double* temp = pf->probs; + pf->probs = pf->probs_prev; + pf->probs_prev = temp; + blky_pathfinder_step_t* step = NULL; if (++pf->steps > 1) { step = calloc(pf->step_bytes, 1); assert(step); } + const uint32_t feat_max = 1 << pf->feat_bits; + assert(feat_max < pf->block_num); + + pf->seed = blky_numeric_xorshift64(pf->seed); for (uint32_t bi = 0; bi < pf->block_num; ++bi) { - const double prob = probs[bi]; - const uint64_t pbase = blky_numeric_xorshift64_rev(bi); - for (uint32_t si = 0; si < pf->step_branch; ++si) { - const uint64_t prev_seed = pbase - si*pf->block_num/pf->step_branch; - const uint32_t prev_bi = (uint32_t) (prev_seed % pf->block_num); - const double prev_prob = pf->probs_prev[prev_bi]; - const double sum = prev_prob + prob; - if (pf->probs[bi] < sum) { - pf->probs[bi] = sum; - if (step) step->indices[prev_bi] = bi; + const double prob = probs[bi]; + for (uint32_t pbi = 0; pbi < pf->block_num; ++pbi) { + for (uint32_t fi = 0; fi < feat_max; ++fi) { + if (blky_numeric_hop(pbi, fi, pf->seed)%pf->block_num != bi) continue; + + const double sum = pf->probs_prev[pbi] + prob; + if (pf->probs[bi] < sum) { + pf->probs[bi] = sum; + if (step) step->indices[bi] = pbi; + } } } } if (step) { - if (pf->step_last) { - pf->step_last->next = step; - pf->step_last = step; - } else { - pf->step_first = pf->step_last = step; - } + step->prev = pf->step_last; + pf->step_last = step; } - - double* temp = pf->probs; - pf->probs = pf->probs_prev; - pf->probs_prev = temp; }