112 lines
3.1 KiB
C++
112 lines
3.1 KiB
C++
#include <algorithm>
|
|
#include <fstream>
|
|
#include <iostream>
|
|
#include <numeric>
|
|
#include <string>
|
|
|
|
#include <args.hxx>
|
|
|
|
#include "common.hh"
|
|
|
|
|
|
namespace param {
|
|
using namespace ::args;
|
|
|
|
ArgumentParser parser {
|
|
"converter: alter-probability matrix -> feature probability matrix"
|
|
};
|
|
HelpFlag help {
|
|
parser, "help", "display this menu", {'h', "help"},
|
|
};
|
|
|
|
ValueFlag<std::string> fmap {
|
|
parser, "path", "feature map file path", {"fmap"},
|
|
};
|
|
|
|
ValueFlag<size_t> negative_sample {
|
|
parser, "samples", "number of samples used to calculate negative factor", {"negative-sample"}, 16,
|
|
};
|
|
|
|
} // namespace param
|
|
|
|
|
|
static void Exec() {
|
|
const auto aprobs = ReadMatrix<double>(std::cin);
|
|
Enforce(aprobs.size() > 0 && aprobs[0].size() > 0, "empty matrix");
|
|
|
|
std::ifstream fmap_st {args::get(param::fmap)};
|
|
Enforce(!!fmap_st, "fmap path is invalid");
|
|
const auto fmap = ReadTensor3<uint32_t>(fmap_st);
|
|
Enforce(fmap.size() > 0 && fmap[0].size() > 0, "empty fmap");
|
|
|
|
std::unordered_set<uint32_t> used_blocks_map;
|
|
for (auto& fmap_t : fmap) {
|
|
Enforce(fmap_t.size() == fmap[0].size(), "fmap is broken");
|
|
for (auto& fmap_f : fmap_t) {
|
|
std::copy(fmap_f.begin(), fmap_f.end(), std::inserter(used_blocks_map, used_blocks_map.end()));
|
|
}
|
|
}
|
|
std::vector<uint32_t> used_blocks;
|
|
used_blocks.reserve(used_blocks_map.size());
|
|
std::copy(used_blocks_map.begin(), used_blocks_map.end(), std::back_inserter(used_blocks));
|
|
std::cerr << "deb: " << used_blocks.size() << std::endl;
|
|
|
|
std::vector<double> negatives;
|
|
for (size_t t = 0; t < aprobs.size(); ++t) {
|
|
const auto tidx = t % fmap.size();
|
|
for (size_t c = 0; c < fmap[tidx].size(); ++c) {
|
|
const auto& blocks = fmap[tidx][c];
|
|
|
|
const auto negative_sample = std::min(
|
|
args::get(param::negative_sample),
|
|
aprobs[t].size() - blocks.size());
|
|
negatives.reserve(negative_sample+1);
|
|
negatives.clear();
|
|
|
|
double positive = 0;
|
|
for (const auto b : used_blocks) {
|
|
if (blocks.end() != std::find(blocks.begin(), blocks.end(), b)) {
|
|
positive += aprobs[t][b];
|
|
} else {
|
|
auto itr = std::lower_bound(
|
|
negatives.begin(), negatives.end(), aprobs[t][b], std::greater {});
|
|
negatives.insert(itr, aprobs[t][b]);
|
|
if (negatives.size() > negative_sample) {
|
|
negatives.resize(negative_sample);
|
|
}
|
|
}
|
|
}
|
|
|
|
if (blocks.size() > 0) {
|
|
positive /= blocks.size();
|
|
} else {
|
|
positive = 1;
|
|
}
|
|
|
|
double negative = 0;
|
|
if (negative_sample > 0) {
|
|
negative =
|
|
std::accumulate(negatives.begin(), negatives.end(), 0.) /
|
|
negative_sample;
|
|
}
|
|
|
|
const auto prob = positive * (1-negative);
|
|
std::cout << prob << ' ';
|
|
}
|
|
std::cout << '\n';
|
|
}
|
|
}
|
|
|
|
int main(int argc, char** argv)
|
|
try {
|
|
param::parser.ParseCLI(argc, argv);
|
|
Exec();
|
|
return EXIT_SUCCESS;
|
|
} catch (const args::Help&) {
|
|
std::cout << param::parser << std::endl;
|
|
return EXIT_SUCCESS;
|
|
} catch (const std::exception& e) {
|
|
std::cerr << e.what() << std::endl;
|
|
return EXIT_FAILURE;
|
|
}
|