Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Emmanuel Hebrard
Blossom
Commits
e0faac3a
Commit
e0faac3a
authored
Jun 08, 2022
by
ehebrard
Browse files
update
parent
06a08b72
Changes
4
Hide whitespace changes
Inline
Side-by-side
examples/src/blossom.cpp
View file @
e0faac3a
...
...
@@ -47,15 +47,50 @@ int run_algorithm(DTOptions &opt) {
read_non_binary
(
input
,
opt
);
}
// vector<size_t> subset;
WeightedDataset
<
E_t
>
*
test_set
=
new
WeightedDataset
<
E_t
>
();
WeightedDataset
<
E_t
>
*
training_set
=
new
WeightedDataset
<
E_t
>
();
if
(
opt
.
test_sample
!=
0
)
{
std
::
vector
<
int
>::
iterator
endx
[
2
]
=
{
input
.
examples
[
0
].
bbegin
(),
input
.
examples
[
1
].
bbegin
()};
input
.
drawSample
(
opt
.
test_sample
,
*
training_set
,
*
test_set
,
opt
.
seed
);
if
(
opt
.
sample_only
)
{
for
(
auto
y
{
0
};
y
<
2
;
++
y
)
{
cout
<<
y
<<
" "
<<
(
endx
[
y
]
-
input
.
examples
[
y
].
bbegin
());
for
(
auto
x
{
input
.
examples
[
y
].
bbegin
()};
x
!=
endx
[
y
];
++
x
)
{
cout
<<
" "
<<
*
x
;
}
cout
<<
endl
;
// cout << input.examples[y] << endl;
}
// return 0;
}
// cout << *training_set << endl;
// cout << *test_set << endl;
}
else
{
training_set
=
&
input
;
}
// cout << training_set << endl;
// cout << input.example_count() << endl;
if
(
opt
.
verbosity
>=
DTOptions
::
NORMAL
)
cout
<<
"d readtime="
<<
cpu_time
()
<<
endl
;
////// PREPROCESING
if
(
opt
.
preprocessing
)
input
.
preprocess
(
opt
.
verbosity
>=
DTOptions
::
NORMAL
);
if
(
opt
.
preprocessing
)
{
training_set
->
preprocess
(
opt
.
verbosity
>=
DTOptions
::
NORMAL
);
}
////// CREATING THE ALGORITHM
BacktrackingAlgorithm
<
ErrorPolicy
,
E_t
>
A
(
inpu
t
,
opt
);
BacktrackingAlgorithm
<
ErrorPolicy
,
E_t
>
A
(
*
training_se
t
,
opt
);
if
(
opt
.
verbosity
>=
DTOptions
::
NORMAL
)
cout
<<
"d inputtime="
<<
cpu_time
()
<<
endl
;
...
...
@@ -78,7 +113,7 @@ int run_algorithm(DTOptions &opt) {
if
(
opt
.
minsize
)
A
.
set_size_objective
();
A
.
minimize_error
();
}
}
Tree
<
E_t
>
sol
=
A
.
getSolution
();
...
...
@@ -86,7 +121,7 @@ int run_algorithm(DTOptions &opt) {
E_t
tree_error
=
0
;
for
(
auto
y
{
0
};
y
<
2
;
++
y
)
{
auto
X
{
input
[
y
]};
auto
X
{
(
*
training_set
)
[
y
]};
for
(
auto
i
:
X
)
tree_error
+=
(
sol
.
predict
(
X
[
i
])
!=
y
)
*
X
.
weight
(
i
);
}
...
...
@@ -100,47 +135,76 @@ int run_algorithm(DTOptions &opt) {
if
(
opt
.
pruning
)
{
cout
<<
"p post-pruning (additional error up to "
<<
opt
.
pruning
<<
")
\n
"
;
// cout << sol << endl;
size_t
total
[
2
]
=
{
input
.
total
(
0
),
input
.
total
(
1
)};
E_t
limit
{
static_cast
<
E_t
>
(
opt
.
pruning
)
-
A
.
error
()
-
input
.
numInconsistent
()};
if
(
limit
<
0
)
limit
=
0
;
cout
<<
"p post-pruning (additional error up to "
<<
opt
.
pruning
<<
")
\n
"
;
size_t
total
[
2
]
=
{
training_set
->
total
(
0
),
training_set
->
total
(
1
)};
E_t
limit
{
static_cast
<
E_t
>
(
opt
.
pruning
)
-
A
.
error
()
-
training_set
->
numInconsistent
()};
if
(
limit
<
0
)
limit
=
0
;
sol
.
prune
(
total
,
limit
,
false
);
E_t
tree_error
=
0
;
for
(
auto
y
{
0
};
y
<
2
;
++
y
)
{
auto
X
{
input
[
y
]};
auto
X
{
(
*
training_set
)
[
y
]};
for
(
auto
i
:
X
)
tree_error
+=
(
sol
.
predict
(
X
[
i
])
!=
y
)
*
X
.
weight
(
i
);
}
// double t{cpu_time() - start_time};
double
accuracy
{
1.0
-
static_cast
<
double
>
(
tree_error
+
input
.
numInconsistent
())
/
static_cast
<
double
>
(
input
.
input_example_count
())};
// double t{cpu_time() - start_time};
cout
<<
left
<<
"d accuracy="
<<
setw
(
6
)
<<
setprecision
(
4
)
<<
fixedwidthfloat
(
accuracy
,
4
)
<<
" error="
<<
setw
(
4
)
<<
tree_error
+
input
.
numInconsistent
()
<<
" depth="
<<
setw
(
3
)
<<
sol
.
depth
()
<<
" size="
<<
setw
(
3
)
<<
sol
.
size
()
// << " time=" << setprecision(max(4, static_cast<int>(log10(t))))
// << fixedwidthfloat(t, 3) << right
<<
endl
;
double
accuracy
{
1.0
-
static_cast
<
double
>
(
tree_error
+
training_set
->
numInconsistent
())
/
static_cast
<
double
>
(
training_set
->
input_example_count
())};
cout
<<
left
<<
"d accuracy="
<<
setw
(
6
)
<<
setprecision
(
4
)
<<
fixedwidthfloat
(
accuracy
,
4
)
<<
" error="
<<
setw
(
4
)
<<
tree_error
+
training_set
->
numInconsistent
()
<<
" depth="
<<
setw
(
3
)
<<
sol
.
depth
()
<<
" size="
<<
setw
(
3
)
<<
sol
.
size
()
// << " time=" << setprecision(max(4, static_cast<int>(log10(t))))
// << fixedwidthfloat(t, 3) << right
<<
endl
;
// cout << "after pruning: " << tree_error << endl;
// cout << sol.size() << " " << sol.depth() << endl;
}
if
(
opt
.
tree_file
!=
""
)
{
ofstream
treefile
(
opt
.
tree_file
,
ios_base
::
out
);
// treefile << A << endl;
treefile
<<
sol
<<
endl
;
}
if
(
opt
.
test_sample
!=
0
)
{
E_t
tree_error
=
0
;
for
(
auto
y
{
0
};
y
<
2
;
++
y
)
{
auto
X
{(
*
test_set
)[
y
]};
for
(
auto
i
:
X
)
{
assert
(
X
.
weight
(
i
)
==
1
);
tree_error
+=
(
sol
.
predict
(
X
[
i
])
!=
y
)
*
X
.
weight
(
i
);
}
}
// assert(tree_error == A.error());
cout
<<
std
::
setprecision
(
std
::
numeric_limits
<
long
double
>::
digits10
+
1
)
<<
std
::
setw
(
0
)
<<
"d test_error="
<<
tree_error
<<
" test_accuracy="
<<
setprecision
(
7
)
<<
1.0
-
static_cast
<
double
>
(
tree_error
)
/
static_cast
<
double
>
(
test_set
->
example_count
())
<<
endl
;
}
if
(
opt
.
print_sol
)
{
cout
<<
sol
<<
endl
;
}
return
1
;
return
0
;
}
...
...
src/include/CmdLine.hpp
View file @
e0faac3a
...
...
@@ -18,6 +18,8 @@ public:
// the actual options
string
cmdline
;
// for reference
string
instance_file
;
string
tree_file
;
// string test;
string
debug
;
string
output
;
string
format
;
...
...
@@ -35,7 +37,7 @@ public:
bool
verified
;
double
sample
;
double
test_
sample
;
int
width
;
double
focus
;
...
...
@@ -79,25 +81,27 @@ public:
double
pruning
;
bool
sample_only
;
DTOptions
(){};
DTOptions
(
const
DTOptions
&
opt
)
:
cmdline
(
opt
.
cmdline
),
instance_file
(
opt
.
instance_file
),
debug
(
opt
.
debug
),
output
(
opt
.
output
),
format
(
opt
.
format
),
verbosity
(
opt
.
verbosity
),
seed
(
opt
.
seed
),
print_sol
(
opt
.
print_sol
),
print_
par
(
opt
.
print_
par
),
print_
ins
(
opt
.
print_
ins
),
print_
sta
(
opt
.
print_
sta
),
print_
cmd
(
opt
.
print_
cmd
),
verified
(
opt
.
verified
),
sample
(
opt
.
sample
),
width
(
opt
.
width
),
focus
(
opt
.
focus
),
max_depth
(
opt
.
max_depth
),
restart_base
(
opt
.
restart_base
),
restart_
factor
(
opt
.
restart_
factor
),
time
(
opt
.
time
),
search
(
opt
.
search
),
bounding
(
opt
.
bounding
),
node_strategy
(
opt
.
node_strategy
),
tree_file
(
opt
.
tree_file
),
debug
(
opt
.
debug
),
output
(
opt
.
output
),
format
(
opt
.
format
),
verbosity
(
opt
.
verbosity
),
seed
(
opt
.
seed
),
print_
sol
(
opt
.
print_
sol
),
print_
par
(
opt
.
print_
par
),
print_
ins
(
opt
.
print_
ins
),
print_
sta
(
opt
.
print_
sta
),
print_cmd
(
opt
.
print_cmd
),
verified
(
opt
.
verified
),
test_sample
(
opt
.
test_sample
),
width
(
opt
.
width
),
focus
(
opt
.
focus
),
max_depth
(
opt
.
max_depth
),
restart_
base
(
opt
.
restart_
base
),
restart_factor
(
opt
.
restart_factor
),
time
(
opt
.
time
),
search
(
opt
.
search
),
bounding
(
opt
.
bounding
),
node_strategy
(
opt
.
node_strategy
),
feature_strategy
(
opt
.
feature_strategy
),
split
(
opt
.
split
),
ada_it
(
opt
.
ada_it
),
ada_stop
(
opt
.
ada_stop
),
filter
(
opt
.
filter
),
reference_class
(
opt
.
reference_class
),
mindepth
(
opt
.
mindepth
),
minsize
(
opt
.
minsize
),
preprocessing
(
opt
.
preprocessing
),
progress
(
opt
.
progress
),
delimiter
(
opt
.
delimiter
),
intarget
(
opt
.
intarget
),
outtarget
(
opt
.
outtarget
),
pruning
(
opt
.
pruning
)
{
}
intarget
(
opt
.
intarget
),
outtarget
(
opt
.
outtarget
),
pruning
(
opt
.
pruning
)
,
sample_only
(
opt
.
sample_only
)
{
}
ostream
&
display
(
ostream
&
os
);
};
...
...
src/include/WeightedDataset.hpp
View file @
e0faac3a
...
...
@@ -26,14 +26,16 @@ public:
void
addExample
(
const
vector
<
int
>
&
x
);
void
addBitsetExample
(
instance
&
x
,
const
bool
y
);
void
rmExample
(
const
bool
y
,
const
int
idx
);
void
addBitsetExample
(
instance
&
x
,
const
bool
y
,
const
E_t
w
=
1
);
// template <class Algo> void toInc(Algo &algo);
// template <class Algo> void setup(Algo &algo) const;
void
preprocess
(
const
bool
verbose
=
false
);
// randomly select ratio * count(c) examples from classes c in {0,1}
void
sample
(
const
double
ratio
,
const
long
seed
=
12345
);
//
// randomly select ratio * count(c) examples from classes c in {0,1}
//
void sample(const double ratio, const long seed
=
12345);
size_t
input_count
(
const
bool
c
)
const
{
return
data
[
c
].
size
();
}
size_t
input_example_count
()
const
{
return
input_count
(
0
)
+
input_count
(
1
);
}
...
...
@@ -49,30 +51,13 @@ public:
template
<
class
selector
>
void
printDatasetToFile
(
ostream
&
outfile
,
const
string
&
delimiter
,
const
string
&
endline
,
selector
not_redundant
,
const
bool
first
=
true
,
const
bool
header
=
false
)
const
;
const
bool
first
=
true
,
const
bool
header
=
false
,
const
bool
weighted
=
false
)
const
;
template
<
class
selector
>
void
printHeader
(
ostream
&
outfile
,
const
string
&
delimiter
,
const
string
&
endline
,
const
string
&
label
,
selector
not_redundant
,
const
bool
first
=
true
)
const
;
// void printDatasetToTextFile(ostream &outfile, const bool first =
// true)
// const;
// template <class selector>
// void printDatasetToTextFile(ostream &outfile, selector s,
// const bool first) const;
// void printDatasetToCSVFile(ostream &outfile, const string &delimiter
// = ",",
// const bool first = false) const;
// template <class selector>
// void printDatasetToCSVFile(ostream &outfile, const string &delimiter
// = ",",
// const bool first = false) const;
class
List
{
public:
...
...
@@ -106,23 +91,39 @@ public:
size_t
numInconsistent
()
const
{
return
suppression_count
;
}
// // remove datapoint in indices and add them to subset
// template <class Container>
// void split(WeightedDataset<E_t> &subset, Container &choice);
void
drawSample
(
const
double
ratio
,
WeightedDataset
<
E_t
>
&
training
,
WeightedDataset
<
E_t
>
&
test
,
const
long
seed
=
12345
);
private:
vector
<
instance
>
data
[
2
];
vector
<
E_t
>
weight
[
2
];
public:
SparseSet
examples
[
2
];
private:
// vector<pair<bool, size_t>> exlog;
E_t
total_weight
[
2
]{
0
,
0
};
size_t
suppression_count
{
0
};
};
template
<
typename
E_t
>
void
WeightedDataset
<
E_t
>::
addBitsetExample
(
instance
&
x
,
const
bool
y
)
{
void
WeightedDataset
<
E_t
>::
addBitsetExample
(
instance
&
x
,
const
bool
y
,
const
E_t
w
)
{
// exlog.push_back({y, data[y].size()});
data
[
y
].
push_back
(
x
);
examples
[
y
].
reserve
(
data
[
y
].
capacity
());
examples
[
y
].
add
(
data
[
y
].
size
()
-
1
);
weight
[
y
].
push_back
(
1
);
examples
[
y
].
reserve
(
data
[
y
].
capacity
());
examples
[
y
].
add
(
data
[
y
].
size
()
-
1
);
weight
[
y
].
push_back
(
w
);
++
total_weight
[
y
];
}
...
...
@@ -131,6 +132,89 @@ inline void WeightedDataset<E_t>::addExample(const vector<int> &example) {
return
addExample
(
example
.
begin
(),
example
.
end
(),
-
1
,
1
);
}
// template <typename E_t>
// template <class Container>
// void WeightedDataset<E_t>::split(WeightedDataset<E_t> &subset,
// Container &tests) {
// // for (auto y{0}; y < 2; ++y) {
// // for (auto i : indices[y]) {
// // auto x{examples[y][i]};
// // subset.addBitsetExample(data[y][x], y, weight[y][x]);
// // total_weight -= weight[y][x];
// // examples[y].remove_back(x);
// // }
// // }
// for (auto t : tests) {
// auto y{exlog[t].first};
// auto x{exlog[t].second};
// subset.addBitsetExample(data[y][x], y, weight[y][x]);
// total_weight[y] -= weight[y][x];
// examples[y].remove_back(x);
// }
// }
template
<
typename
E_t
>
void
WeightedDataset
<
E_t
>::
rmExample
(
const
bool
y
,
const
int
x
)
{
auto
cur_pos
{
examples
[
y
].
index
(
x
)};
auto
z
{
examples
[
y
].
back
()};
examples
[
y
].
remove_back
(
x
);
assert
(
examples
[
y
][
cur_pos
]
==
z
);
// cout << "replace " << x << " by " << z << endl;
// // cout << data[y][cur_pos] << endl;
// // cout << data[y][z] << endl;
//
//
// assert(data[y].size() > x);
// assert(data[y].size() > z);
// data[y][x] = data[y][z];
// data[y].pop_back();
}
template
<
typename
E_t
>
void
WeightedDataset
<
E_t
>::
drawSample
(
const
double
ratio
,
WeightedDataset
<
E_t
>
&
training
,
WeightedDataset
<
E_t
>
&
test
,
const
long
seed
)
{
mt19937
random_generator
;
random_generator
.
seed
(
seed
);
for
(
auto
y
{
0
};
y
<
2
;
++
y
)
{
size_t
target
{
static_cast
<
size_t
>
(
static_cast
<
double
>
(
count
(
y
))
*
(
1.0
-
ratio
))};
// cout << target << " / " << count(y) << endl;
// auto last{examples[y].bbegin()};
while
(
count
(
y
)
>
target
)
{
auto
i
{
random_generator
()
%
count
(
y
)};
auto
x
{
examples
[
y
][
i
]};
// cout << x << " -> test" << endl;
// cout //<< " " << i
// << " " << x ;
test
.
addBitsetExample
(
data
[
y
][
x
],
y
,
weight
[
y
][
x
]);
// total_weight[y] -= weight[y][x];
// // examples[y].remove_back(x);
//
rmExample
(
y
,
x
);
// cout << (count(y) - target) << endl;
}
// cout << endl;
for
(
auto
x
:
examples
[
y
])
{
training
.
addBitsetExample
(
data
[
y
][
x
],
y
,
weight
[
y
][
x
]);
}
// cout << examples[y] << endl;
}
}
template
<
typename
E_t
>
template
<
class
rIter
>
inline
void
WeightedDataset
<
E_t
>::
addExample
(
rIter
beg_row
,
rIter
end_row
,
...
...
@@ -140,6 +224,8 @@ inline void WeightedDataset<E_t>::addExample(rIter beg_row, rIter end_row,
auto
column
{(
width
+
target
)
%
width
};
auto
y
{
*
(
beg_row
+
column
)};
// exlog.push_back({y, data[y].size()});
++
total_weight
[
y
];
if
(
data
[
y
].
size
()
==
data
[
y
].
capacity
())
{
...
...
@@ -155,8 +241,8 @@ inline void WeightedDataset<E_t>::addExample(rIter beg_row, rIter end_row,
int
f
{
0
};
for
(
auto
x
{
beg_row
};
x
!=
end_row
;
++
x
)
{
// assert(*x == 0 or *x == 1);
if
(
*
x
!=
0
and
*
x
!=
1
)
throw
0
;
if
(
*
x
!=
0
and
*
x
!=
1
)
throw
0
;
if
(
x
-
beg_row
!=
column
)
{
if
(
*
x
)
data
[
y
].
back
().
set
(
f
);
...
...
@@ -179,18 +265,20 @@ inline void WeightedDataset<E_t>::addExample(rIter beg_row, rIter end_row,
// algo.setErrorOffset(suppression_count);
// }
template
<
typename
E_t
>
void
WeightedDataset
<
E_t
>::
sample
(
const
double
ratio
,
const
long
seed
)
{
mt19937
random_generator
;
random_generator
.
seed
(
seed
);
for
(
auto
y
{
0
};
y
<
2
;
++
y
)
{
size_t
target
{
static_cast
<
size_t
>
(
static_cast
<
double
>
(
count
(
y
))
*
ratio
)};
while
(
count
(
y
)
>
target
)
{
auto
i
{
random_generator
()
%
count
(
y
)};
examples
[
y
].
remove_back
(
examples
[
y
][
i
]);
}
}
}
// template <typename E_t> void WeightedDataset<E_t>::sample(const double ratio,
// const long seed) {
// mt19937 random_generator;
// random_generator.seed(seed);
//
// for (auto y{0}; y < 2; ++y) {
// size_t target{static_cast<size_t>(static_cast<double>(count(y)) *
// ratio)};
// while (count(y) > target) {
// auto i{random_generator() % count(y)};
// examples[y].remove_back(examples[y][i]);
// }
// }
// }
template
<
typename
E_t
>
void
WeightedDataset
<
E_t
>::
preprocess
(
const
bool
verbose
)
{
...
...
@@ -269,10 +357,10 @@ template <typename E_t> void WeightedDataset<E_t>::preprocess(const bool verbose
// assert(i[y] < weight[y].size());
if
(
x
[
y
]
!=
end
[
y
])
wght
[
y
]
=
weight
[
y
][
i
[
y
]];
else
wght
[
y
]
=
0
;
if
(
x
[
y
]
!=
end
[
y
])
wght
[
y
]
=
weight
[
y
][
i
[
y
]];
else
wght
[
y
]
=
0
;
}
}
}
...
...
@@ -347,6 +435,174 @@ template <typename E_t> void WeightedDataset<E_t>::preprocess(const bool verbose
// // cout << suppression_count << endl;
}
// template <typename E_t> void WeightedDataset<E_t>::preprocess(const bool
// verbose) {
//
// auto t{cpu_time()};
//
// suppression_count = 0;
// // unsigned long dup_count = 0; // for statistics
//
// for (int y = 0; y < 2; ++y) {
// std::sort(examples[y].begin(), examples[y].end(), [&](const int i, const
// int j) {return data[y][i] <= data[y][j]});
// }
//
// if (verbose)
// cout << "d sorttime=" << cpu_time() - t << endl;
//
// vector<instance>::iterator x[2] = {data[0].begin(), data[1].begin()};
// vector<instance>::iterator end[2] = {data[0].end(), data[1].end()};
//
// // int wght[2] = {1, 1};
//
// int i[2] = {0, 0};
// E_t wght[2] = {weight[0][i[0]], weight[1][i[1]]};
//
// // cout << endl << setw(3) << data[0].size() << " " << setw(3) <<
// // data[1].size() << endl;
//
// while (x[0] != end[0] and x[1] != end[1]) {
//
// // cout << endl << setw(3) << i[0] << " " << setw(3) << i[1] << endl;
//
// for (int y = 0; y < 2; ++y)
// while (x[y] != (end[y] - 1) and *(x[y]) == *(x[y] + 1)) {
// // cout << "remove (" << y << ") " << i[y] << endl;
// examples[y].remove_back(i[y]);
// ++x[y];
// ++i[y];
// wght[y] += weight[y][i[y]];
// }
//
// // cout << setw(3) << i[0] << " " << setw(3) << i[1] << endl;
//
// if (*x[0] < *x[1]) {
// weight[0][i[0]] = wght[0];
// ++x[0];
// ++i[0];
// wght[0] = weight[0][i[0]];
// } else if (*x[0] > *x[1]) {
// weight[1][i[1]] = wght[1];
// ++x[1];
// ++i[1];
// wght[1] = weight[1][i[1]];
// } else {
// if (wght[0] < wght[1]) {
// weight[1][i[1]] = wght[1] - wght[0];
//
// // cout << "remove0 " << i[0] << endl;
// examples[0].remove_back(i[0]);
// suppression_count += wght[0];
// } else if (wght[0] > wght[1]) {
// weight[0][i[0]] = wght[0] - wght[1];
//
// // cout << "remove1 " << i[1] << endl;
// examples[1].remove_back(i[1]);
// suppression_count += wght[1];
// } else {
// suppression_count += wght[1];
//
// // cout << "remove " << i[0] << " and " << i[1] << endl;
//
// examples[0].remove_back(i[0]);
// examples[1].remove_back(i[1]);
// }
// for (int y = 0; y < 2; ++y) {
// ++x[y];
// ++i[y];
//
// // assert(i[y] < weight[y].size());
//
// if (x[y] != end[y])
// wght[y] = weight[y][i[y]];
// else
// wght[y] = 0;
// }
// }
// }
//
// for (int y = 0; y < 2; ++y) {
//
// // cout << "wght[y]: " << wght[y] << endl;
//
// wght[y] = 0;
//
// for (; x[y] != end[y]; ++x[y]) {
// assert(x[1 - y] == end[1 - y]);
//
// wght[y] += weight[y][i[y]];
//
// if (x[y] == end[y] - 1 or *x[y] != *(x[y] + 1)) {
// weight[y][i[y]] = wght[y];
// wght[y] = 0;
// } else {
//
// // cout << "remove end " << i[y] << endl;
//
// examples[y].remove_back(i[y]);
// }
// ++i[y];
// }
// }
//
// auto dup_count{input_count(0) + input_count(1) - count(0) - count(1) -
// 2 * suppression_count};
// if (verbose)
// std::cout << "d duplicate=" << dup_count
// << " suppressed=" << suppression_count << " ratio="
// << float(dup_count + 2 * suppression_count) /
// input_example_count()
// << " count=" << input_example_count() << " negative=" <<
// count(0)
// << " positive=" << count(1) << " final_count=" <<
// example_count()
// << "\nd preprocesstime=" << cpu_time() - t << endl;
//
// for (auto i{0}; i < 2; ++i)
// total_weight[i] -= suppression_count;