From ef8a3f6c3e20178ee520f1e6bedbc866e3c9b490 Mon Sep 17 00:00:00 2001 From: scratko Date: Sun, 3 Aug 2025 02:28:24 +0300 Subject: Initial commit: added source code, resources and README --- detectionalgorithm.cpp | 321 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 321 insertions(+) create mode 100755 detectionalgorithm.cpp (limited to 'detectionalgorithm.cpp') diff --git a/detectionalgorithm.cpp b/detectionalgorithm.cpp new file mode 100755 index 0000000..eda909e --- /dev/null +++ b/detectionalgorithm.cpp @@ -0,0 +1,321 @@ +#include "detectionalgorithm.h" +#include "utilities.h" +#include +#include +#include +#include +#include +#include +#include + +DetectionAlgorithm::DetectionAlgorithm() + : + QObject(), + chunk_size(400), + n_gram(5), + target_doc(), + human_chunks_count(0), + robot_chunks(NUMBER_OF_GROUPS), + total_robot_chunks(0), + freq_of_robot_ngramm(NUMBER_OF_GROUPS), + selection_alg(0) +{ + +} + +void DetectionAlgorithm::setOptions(int chunk, int n_g, const std::string& d, int select) +{ + chunk_size = chunk; + n_gram = n_g; + target_doc = d; + selection_alg = select; +} + +void DetectionAlgorithm::run_algorithm() +{ + + QTime t; + t.start(); + QApplication::processEvents(); + using std::string; + using std::vector; + std::ifstream file; + for (int i = 0; i < NUMBER_OF_GROUPS; ++i) { + QFile file(":resource/R" + QString::number(i+1) + ".txt"); + if (!file.open(QIODevice::ReadOnly)) { + qWarning("Cannot open file for reading"); + } + QTextStream in(&file); + QString q_doc; + while (!in.atEnd()) + q_doc = in.readAll(); + file.close(); + std::string rob_doc = q_doc.toLocal8Bit().constData(); + prepare(rob_doc); + robot_docs.push_back(rob_doc); + } + prepare(target_doc); + + for (int i = 0; i < NUMBER_OF_GROUPS; ++i) { + robot_chunks_count.push_back(robot_docs[i].size() / chunk_size); + string temp; + string::iterator it; + int j; + for (it = robot_docs[i].begin(), j = 1; it != robot_docs[i].end(); ++it, ++j) { + temp += *it; + if (!(j % chunk_size)) { + robot_chunks[i].push_back(temp); + temp.clear(); + } + } + temp.clear(); + } + + emit value(1); + + string::iterator it; + int j = 0; + string build_chunk; + human_chunks_count = target_doc.size() / chunk_size; + int a = target_doc.size(); + qDebug() << a; + for (it = target_doc.begin(), j = 1; it != target_doc.end(); ++it, ++j) { + build_chunk += *it; + if (!(j % chunk_size)) { + human_chunks.push_back(build_chunk); + build_chunk.clear(); + } + } + try { + if (human_chunks_count < 11) + throw std::logic_error("wrong size of text, using " + std::to_string(human_chunks_count)); + } catch (std::logic_error& e) { + qDebug() << e.what(); + emit terminate(); + } + + for (int i : robot_chunks_count) total_robot_chunks += i; + + for (int i = 0; i < total_robot_chunks-100; ++i) + real_lable.push_back(1); + for (int i = 0; i < human_chunks_count-10; ++i) + real_lable.push_back(0); + + string doc; + for (const std::string& st : robot_docs) doc += st; + doc += target_doc; + + emit value(2); + + n_gram_calc(doc, n_gram); + + std::ifstream dictionary_file("dictionary.txt"); + string temp; + while (std::getline(dictionary_file, temp)) { + dictionary.push_back(temp); + } + dictionary_file.close(); + + for (int i = 0; i < NUMBER_OF_GROUPS; ++i) { + std::vector::iterator it; + + for (it = robot_chunks[i].begin(); it != robot_chunks[i].end(); ++it) + freq_of_robot_ngramm[i].push_back(freq_in_chunk(*it, dictionary)); + } + + for (vector::iterator chunk_iter = human_chunks.begin(); chunk_iter != human_chunks.end(); ++chunk_iter) + freq_of_human_ngramm.push_back(freq_in_chunk(*chunk_iter, dictionary)); + + emit value(3); + + vector > dzv_matrix; + vector row; + + /*for the robot docs 1*/ + for (int i = 0; i < NUMBER_OF_GROUPS; ++i) { + for (int j = T; j < robot_chunks_count[i]; ++j) { + for (int k = 0; k < NUMBER_OF_GROUPS; ++k) + if (i != k) + for (int n = T; n < robot_chunks_count[k]; ++n) + row.push_back( + dzv_calc( + T, freq_of_robot_ngramm[i][j], + freq_of_robot_ngramm[k][n], + j, n, + freq_of_robot_ngramm[i], + freq_of_robot_ngramm[k] + )); + + /*for all the other docs*/ + for (int n = T; n < human_chunks_count; ++n) + row.push_back( + dzv_calc( + T, freq_of_robot_ngramm[i][j], + freq_of_human_ngramm[n], + j, n, + freq_of_robot_ngramm[i], + freq_of_human_ngramm + )); + dzv_matrix.push_back(row); + row.clear(); + + } + } + + /*for the human docs*/ + for (int j = T; j < human_chunks_count; ++j) { + /*for all the other docs*/ + for (int k = 0; k < NUMBER_OF_GROUPS; ++k) + for (int n = T; n < robot_chunks_count[k]; ++n) + row.push_back( + dzv_calc( + T, freq_of_human_ngramm[j], + freq_of_robot_ngramm[k][n], + j, n, + freq_of_human_ngramm, + freq_of_robot_ngramm[k] + )); + dzv_matrix.push_back(row); + row.clear(); + + } + +// ///*write matrix with const columns 1*/ + + std::ofstream write_matrix("matrix.txt"); + unsigned int colums = dzv_matrix[0].size(); + std::for_each(dzv_matrix.begin(), dzv_matrix.end(), + [&colums](vector& col) mutable { + if (col.size() > colums) colums = col.size(); + }); + for (vector v : dzv_matrix) { + for (unsigned int i = 0; i < v.size(); ++i) { + write_matrix << v[i]; + if (i != (v.size() - 1)) + write_matrix << ' '; + } + for (unsigned int j = 0; j < colums - v.size(); ++j) { + write_matrix << ' ' << 0; + if (j != (colums - v.size() - 1)) + write_matrix << ' '; + } + write_matrix << '\n'; + } + write_matrix.close(); + + /*reading from text 1*/ + + std::ifstream infile("matrix.txt"); + std::string line; + while (getline(infile, line)) { + std::istringstream is(line); + dzv.push_back( + std::vector(std::istream_iterator(is), + std::istream_iterator())); + } + infile.close(); + + emit value(4); + + std::vector prediction_array; + + /*using boost*/ + namespace bn = boost::python::numpy; + namespace bp = boost::python; + _putenv_s("PYTHONPATH", "."); + Py_Initialize(); + bn::initialize(); + try + { + bp::object my_python_class_module = bp::import("mymodule"); + + int n_rows = dzv.size(); + + bp::list total; + for (int i = 0; i < n_rows; ++i) { + bp::tuple shape = bp::make_tuple(dzv[i].size()); + bn::dtype dtype = bn::dtype::get_builtin(); + bn::ndarray py_mas = bn::zeros(shape, dtype); + //vector::iterator it = dzv[i].begin(); + for (size_t j = 0; j < dzv[i].size(); ++j) + py_mas[j] = dzv[i][j]; + total.append(py_mas); + } + bn::ndarray array = bn::from_object(total); + const char* algorithm = nullptr; + switch (selection_alg) { + case 0: + algorithm = "kmedoids"; + break; + case 1: + algorithm = "kmeans"; + break; + case 2: + algorithm = "agglClus"; + break; + default: + algorithm = "kmedoids"; + } + auto result = my_python_class_module.attr(algorithm)(array); + auto result_array = bp::extract(result); + const bn::ndarray& ret = result_array(); + int input_size = ret.shape(0); + int* input_ptr = reinterpret_cast(ret.get_data()); + for (int i = 0; i < input_size; ++i) + prediction_array.push_back(*(input_ptr + i)); + } + catch (const bp::error_already_set&) + { + PyErr_Print(); + qWarning("py error"); + } + + Py_Finalize(); + + /** + * using QCharts to show results + */ + QList pred_first_class, pred_second_class, real_first, real_second; + for (size_t i = 0; i < dzv.size(); ++i){ + if(prediction_array[i]) + pred_first_class << QPointF(dzv[i][0], dzv[i][1]); + else + pred_second_class << QPointF(dzv[i][0], dzv[i][1]); + (real_lable[i]) ? real_first << QPointF(dzv[i][0], dzv[i][1]) : + real_second << QPointF(dzv[i][0], dzv[i][1]); + } + + /** + * class membership calculation + */ + /** + * for robot docs + */ + std::vector::iterator end_robot_it = prediction_array.begin()+(total_robot_chunks-100); + int zero_robot_count = std::count(prediction_array.begin(), + end_robot_it, + 0); + int one_robot_count = std::count(prediction_array.begin(), + end_robot_it, + 1); + int rob_clus = 0, ret_clus = 0; + (one_robot_count > zero_robot_count) ? rob_clus = 1 : rob_clus = 0; + + /** + * for retrieved doc + */ + int zero_ret_count = std::count(end_robot_it+1, + prediction_array.end(), + 0); + int one_ret_count = std::count(end_robot_it+1, + prediction_array.end(), + 1); + (one_ret_count > zero_ret_count) ? ret_clus = 1 : ret_clus = 0; + + + qDebug() << t.elapsed(); + + emit value(5); + emit run_result(rob_clus, ret_clus); + emit run_chart(pred_first_class, pred_second_class, real_first, real_second); +} -- cgit v1.2.3