blocky/conv/aprob_fprob.cc
2023-01-13 22:35:56 +09:00

112 lines
3.1 KiB
C++

#include <algorithm>
#include <fstream>
#include <iostream>
#include <numeric>
#include <string>
#include <args.hxx>
#include "common.hh"
namespace param {
using namespace ::args;
ArgumentParser parser {
"converter: alter-probability matrix -> feature probability matrix"
};
HelpFlag help {
parser, "help", "display this menu", {'h', "help"},
};
ValueFlag<std::string> fmap {
parser, "path", "feature map file path", {"fmap"},
};
ValueFlag<size_t> negative_sample {
parser, "samples", "number of samples used to calculate negative factor", {"negative-sample"}, 16,
};
} // namespace param
static void Exec() {
const auto aprobs = ReadMatrix<double>(std::cin);
Enforce(aprobs.size() > 0 && aprobs[0].size() > 0, "empty matrix");
std::ifstream fmap_st {args::get(param::fmap)};
Enforce(!!fmap_st, "fmap path is invalid");
const auto fmap = ReadTensor3<uint32_t>(fmap_st);
Enforce(fmap.size() > 0 && fmap[0].size() > 0, "empty fmap");
std::unordered_set<uint32_t> used_blocks_map;
for (auto& fmap_t : fmap) {
Enforce(fmap_t.size() == fmap[0].size(), "fmap is broken");
for (auto& fmap_f : fmap_t) {
std::copy(fmap_f.begin(), fmap_f.end(), std::inserter(used_blocks_map, used_blocks_map.end()));
}
}
std::vector<uint32_t> used_blocks;
used_blocks.reserve(used_blocks_map.size());
std::copy(used_blocks_map.begin(), used_blocks_map.end(), std::back_inserter(used_blocks));
std::cerr << "deb: " << used_blocks.size() << std::endl;
std::vector<double> negatives;
for (size_t t = 0; t < aprobs.size(); ++t) {
const auto tidx = t % fmap.size();
for (size_t c = 0; c < fmap[tidx].size(); ++c) {
const auto& blocks = fmap[tidx][c];
const auto negative_sample = std::min(
args::get(param::negative_sample),
aprobs[t].size() - blocks.size());
negatives.reserve(negative_sample+1);
negatives.clear();
double positive = 0;
for (const auto b : used_blocks) {
if (blocks.end() != std::find(blocks.begin(), blocks.end(), b)) {
positive += aprobs[t][b];
} else {
auto itr = std::lower_bound(
negatives.begin(), negatives.end(), aprobs[t][b], std::greater {});
negatives.insert(itr, aprobs[t][b]);
if (negatives.size() > negative_sample) {
negatives.resize(negative_sample);
}
}
}
if (blocks.size() > 0) {
positive /= blocks.size();
} else {
positive = 1;
}
double negative = 0;
if (negative_sample > 0) {
negative =
std::accumulate(negatives.begin(), negatives.end(), 0.) /
negative_sample;
}
const auto prob = positive * (1-negative);
std::cout << prob << ' ';
}
std::cout << '\n';
}
}
int main(int argc, char** argv)
try {
param::parser.ParseCLI(argc, argv);
Exec();
return EXIT_SUCCESS;
} catch (const args::Help&) {
std::cout << param::parser << std::endl;
return EXIT_SUCCESS;
} catch (const std::exception& e) {
std::cerr << e.what() << std::endl;
return EXIT_FAILURE;
}