diff options
Diffstat (limited to 'detectionalgorithm.cpp')
-rwxr-xr-x | detectionalgorithm.cpp | 321 |
1 files changed, 321 insertions, 0 deletions
diff --git a/detectionalgorithm.cpp b/detectionalgorithm.cpp new file mode 100755 index 0000000..eda909e --- /dev/null +++ b/detectionalgorithm.cpp @@ -0,0 +1,321 @@ +#include "detectionalgorithm.h"
+#include "utilities.h"
+#include <QScatterSeries>
+#include <QChart>
+#include <QChartView>
+#include <QtWidgets/QHBoxLayout>
+#include <QDebug>
+#include <stdexcept>
+#include <QTime>
+
+DetectionAlgorithm::DetectionAlgorithm()
+ :
+ QObject(),
+ chunk_size(400),
+ n_gram(5),
+ target_doc(),
+ human_chunks_count(0),
+ robot_chunks(NUMBER_OF_GROUPS),
+ total_robot_chunks(0),
+ freq_of_robot_ngramm(NUMBER_OF_GROUPS),
+ selection_alg(0)
+{
+
+}
+
+void DetectionAlgorithm::setOptions(int chunk, int n_g, const std::string& d, int select)
+{
+ chunk_size = chunk;
+ n_gram = n_g;
+ target_doc = d;
+ selection_alg = select;
+}
+
+void DetectionAlgorithm::run_algorithm()
+{
+
+ QTime t;
+ t.start();
+ QApplication::processEvents();
+ using std::string;
+ using std::vector;
+ std::ifstream file;
+ for (int i = 0; i < NUMBER_OF_GROUPS; ++i) {
+ QFile file(":resource/R" + QString::number(i+1) + ".txt");
+ if (!file.open(QIODevice::ReadOnly)) {
+ qWarning("Cannot open file for reading");
+ }
+ QTextStream in(&file);
+ QString q_doc;
+ while (!in.atEnd())
+ q_doc = in.readAll();
+ file.close();
+ std::string rob_doc = q_doc.toLocal8Bit().constData();
+ prepare(rob_doc);
+ robot_docs.push_back(rob_doc);
+ }
+ prepare(target_doc);
+
+ for (int i = 0; i < NUMBER_OF_GROUPS; ++i) {
+ robot_chunks_count.push_back(robot_docs[i].size() / chunk_size);
+ string temp;
+ string::iterator it;
+ int j;
+ for (it = robot_docs[i].begin(), j = 1; it != robot_docs[i].end(); ++it, ++j) {
+ temp += *it;
+ if (!(j % chunk_size)) {
+ robot_chunks[i].push_back(temp);
+ temp.clear();
+ }
+ }
+ temp.clear();
+ }
+
+ emit value(1);
+
+ string::iterator it;
+ int j = 0;
+ string build_chunk;
+ human_chunks_count = target_doc.size() / chunk_size;
+ int a = target_doc.size();
+ qDebug() << a;
+ for (it = target_doc.begin(), j = 1; it != target_doc.end(); ++it, ++j) {
+ build_chunk += *it;
+ if (!(j % chunk_size)) {
+ human_chunks.push_back(build_chunk);
+ build_chunk.clear();
+ }
+ }
+ try {
+ if (human_chunks_count < 11)
+ throw std::logic_error("wrong size of text, using " + std::to_string(human_chunks_count));
+ } catch (std::logic_error& e) {
+ qDebug() << e.what();
+ emit terminate();
+ }
+
+ for (int i : robot_chunks_count) total_robot_chunks += i;
+
+ for (int i = 0; i < total_robot_chunks-100; ++i)
+ real_lable.push_back(1);
+ for (int i = 0; i < human_chunks_count-10; ++i)
+ real_lable.push_back(0);
+
+ string doc;
+ for (const std::string& st : robot_docs) doc += st;
+ doc += target_doc;
+
+ emit value(2);
+
+ n_gram_calc(doc, n_gram);
+
+ std::ifstream dictionary_file("dictionary.txt");
+ string temp;
+ while (std::getline(dictionary_file, temp)) {
+ dictionary.push_back(temp);
+ }
+ dictionary_file.close();
+
+ for (int i = 0; i < NUMBER_OF_GROUPS; ++i) {
+ std::vector<std::string>::iterator it;
+
+ for (it = robot_chunks[i].begin(); it != robot_chunks[i].end(); ++it)
+ freq_of_robot_ngramm[i].push_back(freq_in_chunk(*it, dictionary));
+ }
+
+ for (vector<string>::iterator chunk_iter = human_chunks.begin(); chunk_iter != human_chunks.end(); ++chunk_iter)
+ freq_of_human_ngramm.push_back(freq_in_chunk(*chunk_iter, dictionary));
+
+ emit value(3);
+
+ vector <vector<double>> dzv_matrix;
+ vector <double> row;
+
+ /*for the robot docs 1*/
+ for (int i = 0; i < NUMBER_OF_GROUPS; ++i) {
+ for (int j = T; j < robot_chunks_count[i]; ++j) {
+ for (int k = 0; k < NUMBER_OF_GROUPS; ++k)
+ if (i != k)
+ for (int n = T; n < robot_chunks_count[k]; ++n)
+ row.push_back(
+ dzv_calc(
+ T, freq_of_robot_ngramm[i][j],
+ freq_of_robot_ngramm[k][n],
+ j, n,
+ freq_of_robot_ngramm[i],
+ freq_of_robot_ngramm[k]
+ ));
+
+ /*for all the other docs*/
+ for (int n = T; n < human_chunks_count; ++n)
+ row.push_back(
+ dzv_calc(
+ T, freq_of_robot_ngramm[i][j],
+ freq_of_human_ngramm[n],
+ j, n,
+ freq_of_robot_ngramm[i],
+ freq_of_human_ngramm
+ ));
+ dzv_matrix.push_back(row);
+ row.clear();
+
+ }
+ }
+
+ /*for the human docs*/
+ for (int j = T; j < human_chunks_count; ++j) {
+ /*for all the other docs*/
+ for (int k = 0; k < NUMBER_OF_GROUPS; ++k)
+ for (int n = T; n < robot_chunks_count[k]; ++n)
+ row.push_back(
+ dzv_calc(
+ T, freq_of_human_ngramm[j],
+ freq_of_robot_ngramm[k][n],
+ j, n,
+ freq_of_human_ngramm,
+ freq_of_robot_ngramm[k]
+ ));
+ dzv_matrix.push_back(row);
+ row.clear();
+
+ }
+
+// ///*write matrix with const columns 1*/
+
+ std::ofstream write_matrix("matrix.txt");
+ unsigned int colums = dzv_matrix[0].size();
+ std::for_each(dzv_matrix.begin(), dzv_matrix.end(),
+ [&colums](vector<double>& col) mutable {
+ if (col.size() > colums) colums = col.size();
+ });
+ for (vector<double> v : dzv_matrix) {
+ for (unsigned int i = 0; i < v.size(); ++i) {
+ write_matrix << v[i];
+ if (i != (v.size() - 1))
+ write_matrix << ' ';
+ }
+ for (unsigned int j = 0; j < colums - v.size(); ++j) {
+ write_matrix << ' ' << 0;
+ if (j != (colums - v.size() - 1))
+ write_matrix << ' ';
+ }
+ write_matrix << '\n';
+ }
+ write_matrix.close();
+
+ /*reading from text 1*/
+
+ std::ifstream infile("matrix.txt");
+ std::string line;
+ while (getline(infile, line)) {
+ std::istringstream is(line);
+ dzv.push_back(
+ std::vector<double>(std::istream_iterator<double>(is),
+ std::istream_iterator<double>()));
+ }
+ infile.close();
+
+ emit value(4);
+
+ std::vector<int> prediction_array;
+
+ /*using boost*/
+ namespace bn = boost::python::numpy;
+ namespace bp = boost::python;
+ _putenv_s("PYTHONPATH", ".");
+ Py_Initialize();
+ bn::initialize();
+ try
+ {
+ bp::object my_python_class_module = bp::import("mymodule");
+
+ int n_rows = dzv.size();
+
+ bp::list total;
+ for (int i = 0; i < n_rows; ++i) {
+ bp::tuple shape = bp::make_tuple(dzv[i].size());
+ bn::dtype dtype = bn::dtype::get_builtin<double>();
+ bn::ndarray py_mas = bn::zeros(shape, dtype);
+ //vector<double>::iterator it = dzv[i].begin();
+ for (size_t j = 0; j < dzv[i].size(); ++j)
+ py_mas[j] = dzv[i][j];
+ total.append(py_mas);
+ }
+ bn::ndarray array = bn::from_object(total);
+ const char* algorithm = nullptr;
+ switch (selection_alg) {
+ case 0:
+ algorithm = "kmedoids";
+ break;
+ case 1:
+ algorithm = "kmeans";
+ break;
+ case 2:
+ algorithm = "agglClus";
+ break;
+ default:
+ algorithm = "kmedoids";
+ }
+ auto result = my_python_class_module.attr(algorithm)(array);
+ auto result_array = bp::extract<bn::ndarray>(result);
+ const bn::ndarray& ret = result_array();
+ int input_size = ret.shape(0);
+ int* input_ptr = reinterpret_cast<int*>(ret.get_data());
+ for (int i = 0; i < input_size; ++i)
+ prediction_array.push_back(*(input_ptr + i));
+ }
+ catch (const bp::error_already_set&)
+ {
+ PyErr_Print();
+ qWarning("py error");
+ }
+
+ Py_Finalize();
+
+ /**
+ * using QCharts to show results
+ */
+ QList<QPointF> pred_first_class, pred_second_class, real_first, real_second;
+ for (size_t i = 0; i < dzv.size(); ++i){
+ if(prediction_array[i])
+ pred_first_class << QPointF(dzv[i][0], dzv[i][1]);
+ else
+ pred_second_class << QPointF(dzv[i][0], dzv[i][1]);
+ (real_lable[i]) ? real_first << QPointF(dzv[i][0], dzv[i][1]) :
+ real_second << QPointF(dzv[i][0], dzv[i][1]);
+ }
+
+ /**
+ * class membership calculation
+ */
+ /**
+ * for robot docs
+ */
+ std::vector<int>::iterator end_robot_it = prediction_array.begin()+(total_robot_chunks-100);
+ int zero_robot_count = std::count(prediction_array.begin(),
+ end_robot_it,
+ 0);
+ int one_robot_count = std::count(prediction_array.begin(),
+ end_robot_it,
+ 1);
+ int rob_clus = 0, ret_clus = 0;
+ (one_robot_count > zero_robot_count) ? rob_clus = 1 : rob_clus = 0;
+
+ /**
+ * for retrieved doc
+ */
+ int zero_ret_count = std::count(end_robot_it+1,
+ prediction_array.end(),
+ 0);
+ int one_ret_count = std::count(end_robot_it+1,
+ prediction_array.end(),
+ 1);
+ (one_ret_count > zero_ret_count) ? ret_clus = 1 : ret_clus = 0;
+
+
+ qDebug() << t.elapsed();
+
+ emit value(5);
+ emit run_result(rob_clus, ret_clus);
+ emit run_chart(pred_first_class, pred_second_class, real_first, real_second);
+}
|