Commit b61665a8 authored by Louis Jean's avatar Louis Jean
Browse files

WIP dataset

parent d3f677fe
......@@ -75,13 +75,12 @@ private:
//@{
/// Argument
Wood &wood;
// DataSet &data;
Dataset &data;
int num_feature;
// size_t numExample() const;
// size_t numExample[2];
DTOptions &options;
vector<vector<int>> example[2];
vector<int> relevant_features;
dynamic_bitset<> feature_set;
......@@ -317,12 +316,9 @@ private:
public:
ErrorPolicy<E_t> error_policy;
vector<instance> dataset[2];
vector<dynamic_bitset<>> reverse_dataset[2];
/*!@name Constructors*/
//@{
explicit BacktrackingAlgorithm(Wood &w, DTOptions &o);
explicit BacktrackingAlgorithm(Wood &w, Dataset& dataset, DTOptions &o);
void setData(const DataSet &data);
void setReverse();
void seed(const int s);
......
#include <vector>
#include <boost/dynamic_bitset.hpp>
using namespace boost;
using namespace std;
namespace primer {
typedef dynamic_bitset<> instance;
class Dataset {
public:
/// List of positive features
vector<vector<int>> example[2];
/// Dataset as a bitset
vector<instance> dataset[2];
/// For each feature, does the example has it?
vector<dynamic_bitset<>> reverse_dataset[2];
};
template <typename T>
class WeightedDataset : public Dataset {
public:
vector<T> weights[2];
};
}
......@@ -117,8 +117,9 @@ void WeightedError<E_t>::clear_examples(Algo &algo) {
template <template<typename> class ErrorPolicy, typename E_t>
BacktrackingAlgorithm<ErrorPolicy, E_t>::BacktrackingAlgorithm(Wood &w,
Dataset &dataset,
DTOptions &opt)
: wood(w), options(opt) {
: wood(w), dataset(dataset), options(opt) {
// start_time = cpu_time();
......@@ -181,23 +182,23 @@ void BacktrackingAlgorithm<ErrorPolicy, E_t>::setReverse() {
}
template <template<typename> class ErrorPolicy, typename E_t>
void BacktrackingAlgorithm<ErrorPolicy, E_t>::setData(const DataSet &data) {
num_feature = static_cast<int>(data.numFeature());
void BacktrackingAlgorithm<ErrorPolicy, E_t>::setData(const DataSet &dset) {
num_feature = static_cast<int>(dset.numFeature());
f_error.resize(num_feature, 1);
f_entropy.resize(num_feature, 1);
f_gini.resize(num_feature, 1);
for (int y{0}; y < 2; ++y) {
dataset[y].resize(data.example[y].count());
example[y].resize(data.example[y].count());
dataset[y].resize(dset.example[y].count());
example[y].resize(dset.example[y].count());
auto k{0};
for (auto i : data.example[y]) {
for (auto i : dset.example[y]) {
// cout << k << ":";
dataset[y][k].resize(num_feature);
dataset[y][k] = data[i];
dataset[y][k] = dset[i];
for (auto j{0}; j < num_feature; ++j)
if (data.hasFeature(i, j)) {
if (dset.hasFeature(i, j)) {
example[y][k].push_back(j);
// cout << " " << j;
}
......
#include "Dataset.hpp"
namespace primer {
}
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment