#include "detectionalgorithm.h" #include "utilities.h" #include #include #include #include #include #include #include DetectionAlgorithm::DetectionAlgorithm() : QObject(), chunk_size(400), n_gram(5), target_doc(), human_chunks_count(0), robot_chunks(NUMBER_OF_GROUPS), total_robot_chunks(0), freq_of_robot_ngramm(NUMBER_OF_GROUPS), selection_alg(0) { } void DetectionAlgorithm::setOptions(int chunk, int n_g, const std::string& d, int select) { chunk_size = chunk; n_gram = n_g; target_doc = d; selection_alg = select; } void DetectionAlgorithm::run_algorithm() { QTime t; t.start(); QApplication::processEvents(); using std::string; using std::vector; std::ifstream file; for (int i = 0; i < NUMBER_OF_GROUPS; ++i) { QFile file(":resource/R" + QString::number(i+1) + ".txt"); if (!file.open(QIODevice::ReadOnly)) { qWarning("Cannot open file for reading"); } QTextStream in(&file); QString q_doc; while (!in.atEnd()) q_doc = in.readAll(); file.close(); std::string rob_doc = q_doc.toLocal8Bit().constData(); prepare(rob_doc); robot_docs.push_back(rob_doc); } prepare(target_doc); for (int i = 0; i < NUMBER_OF_GROUPS; ++i) { robot_chunks_count.push_back(robot_docs[i].size() / chunk_size); string temp; string::iterator it; int j; for (it = robot_docs[i].begin(), j = 1; it != robot_docs[i].end(); ++it, ++j) { temp += *it; if (!(j % chunk_size)) { robot_chunks[i].push_back(temp); temp.clear(); } } temp.clear(); } emit value(1); string::iterator it; int j = 0; string build_chunk; human_chunks_count = target_doc.size() / chunk_size; int a = target_doc.size(); qDebug() << a; for (it = target_doc.begin(), j = 1; it != target_doc.end(); ++it, ++j) { build_chunk += *it; if (!(j % chunk_size)) { human_chunks.push_back(build_chunk); build_chunk.clear(); } } try { if (human_chunks_count < 11) throw std::logic_error("wrong size of text, using " + std::to_string(human_chunks_count)); } catch (std::logic_error& e) { qDebug() << e.what(); emit terminate(); } for (int i : robot_chunks_count) total_robot_chunks += i; for (int i = 0; i < total_robot_chunks-100; ++i) real_lable.push_back(1); for (int i = 0; i < human_chunks_count-10; ++i) real_lable.push_back(0); string doc; for (const std::string& st : robot_docs) doc += st; doc += target_doc; emit value(2); n_gram_calc(doc, n_gram); std::ifstream dictionary_file("dictionary.txt"); string temp; while (std::getline(dictionary_file, temp)) { dictionary.push_back(temp); } dictionary_file.close(); for (int i = 0; i < NUMBER_OF_GROUPS; ++i) { std::vector::iterator it; for (it = robot_chunks[i].begin(); it != robot_chunks[i].end(); ++it) freq_of_robot_ngramm[i].push_back(freq_in_chunk(*it, dictionary)); } for (vector::iterator chunk_iter = human_chunks.begin(); chunk_iter != human_chunks.end(); ++chunk_iter) freq_of_human_ngramm.push_back(freq_in_chunk(*chunk_iter, dictionary)); emit value(3); vector > dzv_matrix; vector row; /*for the robot docs 1*/ for (int i = 0; i < NUMBER_OF_GROUPS; ++i) { for (int j = T; j < robot_chunks_count[i]; ++j) { for (int k = 0; k < NUMBER_OF_GROUPS; ++k) if (i != k) for (int n = T; n < robot_chunks_count[k]; ++n) row.push_back( dzv_calc( T, freq_of_robot_ngramm[i][j], freq_of_robot_ngramm[k][n], j, n, freq_of_robot_ngramm[i], freq_of_robot_ngramm[k] )); /*for all the other docs*/ for (int n = T; n < human_chunks_count; ++n) row.push_back( dzv_calc( T, freq_of_robot_ngramm[i][j], freq_of_human_ngramm[n], j, n, freq_of_robot_ngramm[i], freq_of_human_ngramm )); dzv_matrix.push_back(row); row.clear(); } } /*for the human docs*/ for (int j = T; j < human_chunks_count; ++j) { /*for all the other docs*/ for (int k = 0; k < NUMBER_OF_GROUPS; ++k) for (int n = T; n < robot_chunks_count[k]; ++n) row.push_back( dzv_calc( T, freq_of_human_ngramm[j], freq_of_robot_ngramm[k][n], j, n, freq_of_human_ngramm, freq_of_robot_ngramm[k] )); dzv_matrix.push_back(row); row.clear(); } // ///*write matrix with const columns 1*/ std::ofstream write_matrix("matrix.txt"); unsigned int colums = dzv_matrix[0].size(); std::for_each(dzv_matrix.begin(), dzv_matrix.end(), [&colums](vector& col) mutable { if (col.size() > colums) colums = col.size(); }); for (vector v : dzv_matrix) { for (unsigned int i = 0; i < v.size(); ++i) { write_matrix << v[i]; if (i != (v.size() - 1)) write_matrix << ' '; } for (unsigned int j = 0; j < colums - v.size(); ++j) { write_matrix << ' ' << 0; if (j != (colums - v.size() - 1)) write_matrix << ' '; } write_matrix << '\n'; } write_matrix.close(); /*reading from text 1*/ std::ifstream infile("matrix.txt"); std::string line; while (getline(infile, line)) { std::istringstream is(line); dzv.push_back( std::vector(std::istream_iterator(is), std::istream_iterator())); } infile.close(); emit value(4); std::vector prediction_array; /*using boost*/ namespace bn = boost::python::numpy; namespace bp = boost::python; _putenv_s("PYTHONPATH", "."); Py_Initialize(); bn::initialize(); try { bp::object my_python_class_module = bp::import("mymodule"); int n_rows = dzv.size(); bp::list total; for (int i = 0; i < n_rows; ++i) { bp::tuple shape = bp::make_tuple(dzv[i].size()); bn::dtype dtype = bn::dtype::get_builtin(); bn::ndarray py_mas = bn::zeros(shape, dtype); //vector::iterator it = dzv[i].begin(); for (size_t j = 0; j < dzv[i].size(); ++j) py_mas[j] = dzv[i][j]; total.append(py_mas); } bn::ndarray array = bn::from_object(total); const char* algorithm = nullptr; switch (selection_alg) { case 0: algorithm = "kmedoids"; break; case 1: algorithm = "kmeans"; break; case 2: algorithm = "agglClus"; break; default: algorithm = "kmedoids"; } auto result = my_python_class_module.attr(algorithm)(array); auto result_array = bp::extract(result); const bn::ndarray& ret = result_array(); int input_size = ret.shape(0); int* input_ptr = reinterpret_cast(ret.get_data()); for (int i = 0; i < input_size; ++i) prediction_array.push_back(*(input_ptr + i)); } catch (const bp::error_already_set&) { PyErr_Print(); qWarning("py error"); } Py_Finalize(); /** * using QCharts to show results */ QList pred_first_class, pred_second_class, real_first, real_second; for (size_t i = 0; i < dzv.size(); ++i){ if(prediction_array[i]) pred_first_class << QPointF(dzv[i][0], dzv[i][1]); else pred_second_class << QPointF(dzv[i][0], dzv[i][1]); (real_lable[i]) ? real_first << QPointF(dzv[i][0], dzv[i][1]) : real_second << QPointF(dzv[i][0], dzv[i][1]); } /** * class membership calculation */ /** * for robot docs */ std::vector::iterator end_robot_it = prediction_array.begin()+(total_robot_chunks-100); int zero_robot_count = std::count(prediction_array.begin(), end_robot_it, 0); int one_robot_count = std::count(prediction_array.begin(), end_robot_it, 1); int rob_clus = 0, ret_clus = 0; (one_robot_count > zero_robot_count) ? rob_clus = 1 : rob_clus = 0; /** * for retrieved doc */ int zero_ret_count = std::count(end_robot_it+1, prediction_array.end(), 0); int one_ret_count = std::count(end_robot_it+1, prediction_array.end(), 1); (one_ret_count > zero_ret_count) ? ret_clus = 1 : ret_clus = 0; qDebug() << t.elapsed(); emit value(5); emit run_result(rob_clus, ret_clus); emit run_chart(pred_first_class, pred_second_class, real_first, real_second); }