Commit a149a9de authored by ehebrard's avatar ehebrard
Browse files

binarizer

parent a5550681
......@@ -39,7 +39,6 @@ void printToFile(WeightedDataset<E_t> &input, DTOptions &opt) {
std::function<bool(const int f)> relevant = [](const int f) { return true; };
BacktrackingAlgorithm<WeightedError, E_t> A(opt);
if (opt.filter) {
......@@ -99,10 +98,11 @@ int main(int argc, char *argv[]) {
// std::function<bool(const int f)> relevant = [](const int f) { return true; };
// input.printDatasetToFile(cout, string(" "), string(""), relevant,
// opt.outtarget != -1, false);
// // cout << input << endl;
if (opt.sample < 1)
input.sample(opt.sample);
// cout << input << endl;
if (opt.sample < 1)
input.sample(opt.sample);
////// PREPROCESING
if (opt.preprocessing)
......
......@@ -14,7 +14,7 @@ namespace csv
// template< >
template <typename header_declaration, typename data_declaration>
void read(const std::string &fn, header_declaration notify_header,
data_declaration notify_data, std::string delimeter = ",; |\t") {
data_declaration notify_data, std::string delimeter = ",;|\t") {
using std::cerr;
try {
std::ifstream ifs(fn);
......
......@@ -9,95 +9,95 @@
namespace blossom
{
template <typename E_t>
void read_non_binary(WeightedDataset<E_t> &base, DTOptions &opt) {
template <typename E_t>
void read_non_binary(WeightedDataset<E_t> &base, DTOptions &opt) {
TypedDataSet input;
TypedDataSet input;
string ext{opt.instance_file.substr(opt.instance_file.find_last_of(".") + 1)};
string ext{opt.instance_file.substr(opt.instance_file.find_last_of(".") + 1)};
auto target_column{-1};
auto target_column{-1};
if (opt.format != "guess")
target_column = opt.intarget;
if (opt.format != "guess")
target_column = opt.intarget;
if (opt.format == "csv" or (opt.format == "guess" and ext == "csv"))
csv::read(opt.instance_file,
[&](vector<string> &f) {
input.setFeatures(f.begin(), f.end(), target_column);
},
[&](vector<string> &data) {
input.addExample(data.begin(), data.end(), target_column);
});
if (opt.format == "csv" or (opt.format == "guess" and ext == "csv"))
csv::read(opt.instance_file,
[&](vector<string> &f) {
input.setFeatures(f.begin(), f.end(), target_column);
},
[&](vector<string> &data) {
input.addExample(data.begin(), data.end(), target_column);
});
else if (opt.format == "data" or
(opt.format == "guess" and ext == "data")) {
else if (opt.format == "data" or (opt.format == "guess" and ext == "data")) {
cout << "data file\n";
cout << "data file\n";
auto names = opt.instance_file.substr(
0, opt.instance_file.find_last_of("/") + 1) +
string("names");
auto names =
opt.instance_file.substr(0, opt.instance_file.find_last_of("/") + 1) +
string("names");
cout << names << endl;
cout << names << endl;
csv::read(names,
[&](vector<string> &f) {
input.setFeatures(f.begin(), f.end(), target_column);
},
[&](vector<string> &data) {});
csv::read(names,
[&](vector<string> &f) {
input.setFeatures(f.begin(), f.end(), target_column);
},
[&](vector<string> &data) {});
cout << input << endl;
// names::read()
cout << input << endl;
// names::read()
exit(1);
// exit(1);
} else {
} else {
if (opt.format == "dl8" or (opt.format == "guess" and ext == "dl8")) {
target_column = 0;
}
if (opt.format == "dl8" or (opt.format == "guess" and ext == "dl8")) {
target_column = 0;
}
txt::read(opt.instance_file, [&](vector<string> &data) {
input.addExample(data.begin(), data.end(), target_column);
});
}
txt::read(opt.instance_file, [&](vector<string> &data) {
input.addExample(data.begin(), data.end(), target_column);
});
}
cout << input << endl;
// cout << input << endl;
input.binarize(base);
}
input.binarize(base);
template <typename E_t>
void read_binary(WeightedDataset<E_t> &input, DTOptions &opt) {
// WeightedDataset input;
// cout << base << endl;
}
string ext{opt.instance_file.substr(
opt.instance_file.find_last_of(".") + 1)};
template <typename E_t>
void read_binary(WeightedDataset<E_t> &input, DTOptions &opt) {
// WeightedDataset input;
auto target_column{-1};
string ext{opt.instance_file.substr(opt.instance_file.find_last_of(".") + 1)};
if (opt.format != "guess")
target_column = opt.intarget;
auto target_column{-1};
if (opt.format == "csv" or (opt.format == "guess" and ext == "csv")) {
csv::read_binary(opt.instance_file, [&](vector<int> &data) {
input.addExample(data.begin(), data.end(), target_column);
});
} else {
if (opt.format != "guess")
target_column = opt.intarget;
if (opt.format == "dl8" or (opt.format == "guess" and ext == "dl8")) {
target_column = 0;
}
if (opt.format == "csv" or (opt.format == "guess" and ext == "csv")) {
csv::read_binary(opt.instance_file, [&](vector<int> &data) {
input.addExample(data.begin(), data.end(), target_column);
});
} else {
txt::read_binary(opt.instance_file, [&](vector<int> &data) {
input.addExample(data.begin(), data.end(), target_column);
});
}
if (opt.format == "dl8" or (opt.format == "guess" and ext == "dl8")) {
target_column = 0;
}
txt::read_binary(opt.instance_file, [&](vector<int> &data) {
input.addExample(data.begin(), data.end(), target_column);
});
}
// input.toInc(A);
}
// input.toInc(A);
}
} // namespace blossom
#endif
......@@ -238,7 +238,12 @@ public:
template <typename T> class Order : public ClassicEncoding<T> {
private:
size_t num_examples;
public:
Order(const size_t n) : ClassicEncoding<T>(), num_examples(n) {}
virtual size_t size() const {
return ClassicEncoding<T>::value_set.size() - 1;
}
......@@ -254,7 +259,22 @@ public:
// for (auto v : ClassicEncoding<T>::value_set)
// cout << " " << v;
// cout << endl;
cout << ClassicEncoding<T>::value_set.size() << " " << (num_examples) ;
if(ClassicEncoding<T>::value_set.size() < sqrt(num_examples)) {
cout << " full\n";
full_encoding();
} else {
cout << " reduced\n";
reduced_encoding();
}
}
void full_encoding() {
auto vb{ClassicEncoding<T>::value_set.begin()};
auto ve{ClassicEncoding<T>::value_set.end() - 1};
......@@ -264,7 +284,40 @@ public:
e.resize(ve - vb, true);
ClassicEncoding<T>::encoding_map[*i] = e;
}
}
}
void reduced_encoding() {
size_t num_intervals{static_cast<size_t>(log(static_cast<double>(ClassicEncoding<T>::value_set.size())))};
if(num_intervals < 1)
num_intervals = 1;
vector<size_t> boundary;
size_t i_size{ClassicEncoding<T>::value_set.size() / num_intervals};
size_t next_boundary{i_size};
while (next_boundary < ClassicEncoding<T>::value_set.size()) {
boundary.push_back(next_boundary);
next_boundary += i_size;
}
auto vb{ClassicEncoding<T>::value_set.begin()};
auto ve{ClassicEncoding<T>::value_set.end() - 1};
size_t b{0};
for (auto i{vb}; i <= ve; ++i) {
dynamic_bitset<> e;
while (b < boundary.size() and
ClassicEncoding<T>::value_set[boundary[b]] < *i)
++b;
e.resize(b, false);
e.resize(boundary.size(), true);
ClassicEncoding<T>::encoding_map[*i] = e;
}
}
virtual const string getType() const {return "order";}
......@@ -276,6 +329,83 @@ public:
}
};
template <typename T> class Interval : public ClassicEncoding<T> {
// private:
// size_t num_intervals;
public:
// Interval(const size_t n) : ClassicEncoding<T>(), num_intervals(n) {}
virtual size_t size() const {
return ClassicEncoding<T>::value_set.size() - 1;
}
// encode the values of the iterator
// template <typename RandomIt>
void encode(typename std::vector<T>::iterator beg,
typename std::vector<T>::iterator end) {
ClassicEncoding<T>::encode(beg, end);
// for (auto v{ClassicEncoding<T>::value_set.begin() + 1};
// v < ClassicEncoding<T>::value_set.end(); ++v) {
// assert(*(v - 1) < *v);
// }
size_t num_intervals{static_cast<size_t>(log(static_cast<double>(ClassicEncoding<T>::value_set.size())))};
if(num_intervals < 1)
num_intervals = 1;
vector<size_t> boundary;
size_t i_size{ClassicEncoding<T>::value_set.size() / num_intervals};
size_t next_boundary{i_size};
while (next_boundary < ClassicEncoding<T>::value_set.size()) {
boundary.push_back(next_boundary);
next_boundary += i_size;
}
// for(auto v : boundary) {
// cout << v << ": " << ClassicEncoding<T>::value_set[v] << endl;
// }
// cout << ClassicEncoding<T>::value_set.size() << endl;
// exit(1);
// cout << "unary order encode\n";
// for (auto v : ClassicEncoding<T>::value_set)
// cout << " " << v;
// cout << endl;
auto vb{ClassicEncoding<T>::value_set.begin()};
auto ve{ClassicEncoding<T>::value_set.end() - 1};
// auto b{boundary.begin()};
size_t b{0};
for (auto i{vb}; i <= ve; ++i) {
dynamic_bitset<> e;
while (b < boundary.size() and
ClassicEncoding<T>::value_set[boundary[b]] < *i)
++b;
e.resize(b, false);
e.resize(boundary.size(), true);
ClassicEncoding<T>::encoding_map[*i] = e;
}
}
virtual const string getType() const { return "order"; }
// returns (in string format) the test x[i]
const string getLabel(const int i, const int v) const {
std::stringstream ss;
ss << (v ? "<=" : ">") << ClassicEncoding<T>::value_set[i];
return ss.str();
}
};
template <typename T> class Direct : public ClassicEncoding<T> {
public:
......@@ -492,7 +622,7 @@ public:
// int_encoder.push_back(enc);
// }
else {
enc = new Order<int>();
enc = new Order<int>(int_value[feature_rank[f]].size());
enc->encode(int_buffer.begin(), int_buffer.end());
int_encoder.push_back(enc);
}
......@@ -505,18 +635,42 @@ public:
} else if (feature_type[f] == FLOAT) {
// cout << "compute set\n";
computeSet(float_value[feature_rank[f]], float_buffer);
Encoding<float> *enc = new Order<float>();
// cout << feature_label[f] << " " << float_buffer.size() << endl;
//
// for(auto x : float_buffer)
// cout << " " << x;
// cout << endl;
//
// exit(1);
// cout << "constructor\n";
Encoding<float> *enc = new Order<float>(float_value[feature_rank[f]].size());
// cout << "encode\n";
enc->encode(float_buffer.begin(), float_buffer.end());
// cout << "push enc\n";
float_encoder.push_back(enc);
// cout << "float (" << feature_rank[f] << "/" << float_encoder.size()
// << "):";
// for (auto v : float_buffer)
// cout << "\n" << v << " -> " << enc->getEncoding(v);
// cout << endl;
// cout << endl;
// cout << "float (" << feature_rank[f] << "/" <<
// float_encoder.size()
// << "):";
// for (auto v : float_buffer)
// cout << "\n" << v << " -> " << enc->getEncoding(v);
// cout << endl;
// cout << endl;
//
// exit(1);
} else if (feature_type[f] == SYMBOL) {
......@@ -579,7 +733,11 @@ public:
// bin.duplicate_format(binex, db);
// bin.add(db, label[i] != min_label);
bin.addBitsetExample(binex, label[i] != min_label);
// cout << binex << endl;
}
// cout << bin.example_count() << endl;
}
......
......@@ -56,6 +56,9 @@ public:
void printHeader(ostream &outfile, const string &delimiter,
const string &endline, const string &label,
selector not_redundant, const bool first = true) const;
// void printDatasetToTextFile(ostream &outfile, const bool first =
// true)
......@@ -116,6 +119,10 @@ private:
template <typename E_t>
void WeightedDataset<E_t>::addBitsetExample(instance &x, const bool y) {
data[y].push_back(x);
examples[y].reserve(data[y].capacity());
examples[y].add(data[y].size()-1);
weight[y].push_back(1);
++total_weight[y];
}
......@@ -388,6 +395,14 @@ void WeightedDataset<E_t>::printDatasetToFile(
}
}
}
template <typename E_t>
std::ostream &operator<<(std::ostream &os, const WeightedDataset<E_t> &x) {
x.printDatasetToFile(os, string(" "), string(""),
[](const int f) { return true; }, 0,
true);
return os;
}
}
#endif // _BLOSSOM_WEIGHTEDDATASET_HPP
\ No newline at end of file
......@@ -349,7 +349,7 @@ Let $\afeat_i <_{\abranch} \afeat_j$ if and only if feature $\afeat_i$ is select
\begin{itemize}
\item \sequence\ represents the current decision tree: if $(\abranch,\afeat) \in \sequence$, then the current tree tests feature $\afeat$ at the extremity of branch $\abranch$. We say that the branch $\abranch$ is in the current tree, and that feature $\afeat$ is tested on branch $\abranch$.
\item If $(\abranch,\afeat) \in \sequence$, then every sub-tree of $\abranch$ starting with a feature test $\aofeat <_{\abranch} \afeat$ has already been explored and $\best[\abranch]$ contains the minimum of their errors. The set $\dom[\abranch]$ contains all \emph{untried} feature tests for branch $\abranch$ ($\dom[\abranch] = \{\aofeat \mid \aofeat \in \features ~\wedge~ \afeat <_{\abranch} \aofeat$).
\item If $(\abranch,\afeat) \in \sequence$, then every subtree of $\abranch$ starting with a feature test $\aofeat <_{\abranch} \afeat$ has already been explored and $\best[\abranch]$ contains the minimum of their errors. The set $\dom[\abranch]$ contains all \emph{untried} feature tests for branch $\abranch$ ($\dom[\abranch] = \{\aofeat \mid \aofeat \in \features ~\wedge~ \afeat <_{\abranch} \aofeat$).
\item If $(\abranch,\afeat) \in \sequence$ but one of its children $\grow{\abranch}{\afeat}$ or $\grow{\abranch}{\afeat}$ (call it $\aobranch$) is not in the current tree, then either:
......@@ -370,7 +370,7 @@ expand the tree with the test $\afeat$ at branch $\abranch$. The two children $\
If there is no bud ($\bud = \emptyset$), then the current tree is complete: every branch $\abranch$ is either terminal or optimal. In that case we pop the last assignment $(\abranch,\afeat)$ from \sequence\
%, mark the feature $\afeat$ as tried for branch $\abranch$
and update the best error of its subtrees. If there is at least one untried feature for branch $\abranch$, we add $\abranch$ to $\bud$.
Otherwise, it is optimal since all features have been tried, and $\best[\abranch]$ contains the minimum error for any sub-tree of branch $\abranch$.
Otherwise, it is optimal since all features have been tried, and $\best[\abranch]$ contains the minimum error for any subtree of branch $\abranch$.
%and its error is the sum of the errors of its best subtrees.
This branch will never be expanded anymore since it is not added to $\bud$.
......@@ -652,17 +652,13 @@ Notice that to simplify the pseudo-code, we use branches to index array-like dat
\KwData{$\negex,\posex, \maxd$}
\KwResult{The minimum error on $\negex,\posex$ for decision trees of depth $\maxd$}
$\sequence \gets []$\;
$\bud \gets \{\emptyset\}$\;
% $\error \gets \min(|\negex|,|\posex|)$\;
% $\dom \gets (\lambda : {2^{\features}} \mapsto \features)$\;
% $\best \gets (\lambda : {2^{\features}} \mapsto \infty)$\;
% $\dom \gets (\lambda : 2^{\features \cup \bar{\features}} \mapsto \features)$\;
% $\best \gets (\lambda : 2^{\features \cup \bar{\features}} \mapsto \infty)$\;
% \HiLi $\opt \gets (\lambda : 2^{\features \cup \bar{\features}} \mapsto 0)$\;
% $\bud \gets \emptyset$\;
$\bud \gets \newbud(\emptyset,\emptyset)$\;
$\dom[\emptyset] \gets \features$\;
$\best[\emptyset] \gets \min(\negex, \posex)$\;
\HiLi $\opt[\emptyset] \gets \texttt{false}$\;
% $\bud \gets \{\emptyset\}$\;
% $\dom[\emptyset] \gets \features$\;
% $\best[\emptyset] \gets \min(\negex, \posex)$\;
% \HiLi $\opt[\emptyset] \gets \texttt{false}$\;
\While{$|\sequence| + |\bud| > 0$}{
......@@ -678,13 +674,17 @@ Notice that to simplify the pseudo-code, we use branches to index array-like dat
\lnl{line:assignment} pick and remove $\afeat$ from $\dom[\abranch]$\;
% $\dom[\abranch] \gets \dom[\abranch] \setminus \{\afeat\}$\;
push $(\abranch,\afeat)$ on $\sequence$\;
split $\negex(\abranch)$ and $\posex(\abranch)$ w.r.t. $\afeat$\;
\lnl{line:branching}\ForEach{$v \in \{\afeat, \bar{\afeat}\}$}{
\lnl{line:newbud}$\bud \gets \bud \cup \{\grow{\abranch}{v}\}$\;
\lnl{line:domain}$\dom(\grow{\abranch}{v}) \gets \features \setminus \{\afeat \mid \afeat \in \abranch ~\vee~ \bar{\afeat} \in \abranch\}$\;
$\best(\grow{\abranch}{v}) \gets \min(\negex[\grow{\abranch}{v}], \posex[\grow{\abranch}{v}])$\;
\HiLi $\opt(\grow{\abranch}{v}) \gets \texttt{false}$\;
}
% split $\negex(\abranch)$ and $\posex(\abranch)$ w.r.t. $\afeat$\;
% \lnl{line:branching}\ForEach{$v \in \{\afeat, \bar{\afeat}\}$}{
% \lnl{line:newbud}$\bud \gets \bud \cup \{\grow{\abranch}{v}\}$\;
% \lnl{line:domain}$\dom(\grow{\abranch}{v}) \gets \features \setminus \{\afeat \mid \afeat \in \abranch ~\vee~ \bar{\afeat} \in \abranch\}$\;
% $\best(\grow{\abranch}{v}) \gets \min(\negex[\grow{\abranch}{v}], \posex[\grow{\abranch}{v}])$\;
% \HiLi $\opt(\grow{\abranch}{v}) \gets \texttt{false}$\;
% }
% }
\lnl{line:branching} \lForEach{$v \in \{\afeat, \bar{\afeat}\}$}{
$\bud \gets \newbud{\bud,\grow{\abranch}{v}}$ %\newbud{$\grow{\abranch}{v}$}
}
}
}
\lnl{line:else}\Else {
......@@ -702,9 +702,9 @@ Notice that to simplify the pseudo-code, we use branches to index array-like dat
}
% \lIf{$\opt[\abranch]$}{$\error \gets \error - \best[\abranch]$}
}
\HiLi \lnl{line:markoptimal} \lElse{
$\opt[\abranch] \gets \texttt{true}$
}
% \HiLi \lnl{line:markoptimal} \lElse{
% $\opt[\abranch] \gets \texttt{true}$
% }
% \lElse {
% $\error \gets \error + \best[\abranch]$%$\error[\abranch,\afeat]$
% }
......@@ -736,6 +736,16 @@ Notice that to simplify the pseudo-code, we use branches to index array-like dat
}
}
\Return $\best[\emptyset]$\;
% \setcounter{AlgoLine}{0}
\SetKwProg{myproc}{Procedure}{}{}
\myproc{\newbud{$\bud, \abranch$}}{
% \lnl{line:newbud}$\bud \gets \bud \cup \{\abranch\}$\;
\lnl{line:splitting}compute $\negex(\abranch)$ and $\posex(\abranch)$ \colorbox{yellow!50}{and $p(\afeat,\negex(\abranch))$ and $p(\afeat,\posex(\abranch)), \forall \afeat \in \features$}\;
\lnl{line:domain}$\dom(\abranch) \gets \features \setminus \{\afeat \mid \afeat \in \abranch ~\vee~ \bar{\afeat} \in \abranch\}$ \colorbox{yellow!50}{sorted by increasing Gini score}\;
$\best(\abranch) \gets \min(\negex[\abranch], \posex[\abranch])$\;
% \colorbox{yellow!50}{$\opt(\abranch) \gets \texttt{false}$}\;
\Return{$\bud \cup \{\abranch\}$}\;
}
\end{footnotesize}
\end{algorithm}
......@@ -1031,7 +1041,7 @@ The feature tests at Line~\ref{line:assignment} of Algorithm~\ref{alg:bud} are e
In the data sets we used, the Gini impurity was significantly better, and hence all reported experiment results are using Gini impurity unless stated otherwise. For branches of length $\mdepth-1$, however, we use the error instead. Indeed, the optimal feature $\afeat$ for a branch $\abranch$ that cannot be extended further is the one minimizing
% $\error[\abranch,\afeat]$.
$\error[\grow{\abranch}{\afeat}] + \error[\grow{\abranch}{\bar{\afeat}}]$.
This means that we actually do not have to try other features for that node. This is implemented by the highlited code at Line~\ref{line:optimal}: since one cannot improve on the first feature for test at depth $\mdepth$, branches of length $\mdepth-1$ do not have to be put back into \bud, and can be backtracked upon.
This means that we actually do not have to try other features for that node. This is implemented by the highlighted code at Line~\ref{line:optimal}: since one cannot improve on the first feature for test at depth $\mdepth$, branches of length $\mdepth-1$ do not have to be put back into \bud, and can be backtracked upon.
% which means that we effectively restrict search to branches of length $\mdepth-1$.
......@@ -1039,12 +1049,14 @@ This means that we actually do not have to try other features for that node. Thi
%We order the possible features for branch $\abranch$ in non-decreasing order with respect to a score above and
%explore the features in that order in Line~\ref{line:assignment}.
Computing the frequencies $p(\afeat,{\negex(\abranch)})$ and $p(\afeat,{\posex(\abranch)})$ of every feature $\afeat$ can be done in $\Theta(\numfeat\numex)$ time where
$\numex = |\negex(\abranch)|+|\posex(\abranch)|$.\footnote{$p(\bar{\afeat},{\negex(\abranch)})$ and $p(\bar{\afeat},{\posex(\abranch)})$ can then be deduced in $\Theta(\numfeat)$ time} In other words this is more expensive than the splitting procedure by a factor $\numfeat$, but can be similarly amortized. However, since the depth of the branches is effectively reduced by one, the number of terminal branches is reduced by the same factor $\numfeat$, hence this incurs no asymptotic increase in complexity.
Furthermore, ordering the features (in $\dom[\abranch]$)
$\numex = |\negex(\abranch)|+|\posex(\abranch)|$.\footnote{$p(\bar{\afeat},{\negex(\abranch)}) = |\negex(\abranch)| - p({\afeat},{\negex(\abranch)})$ and $p(\bar{\afeat},{\posex(\abranch)}) = |\posex(\abranch)| - p({\afeat},{\posex(\abranch)})$ can then be queried in constant time} In other words this is more expensive than the splitting procedure by a factor $\numfeat$, but can be similarly amortized. However, since the depth of the branches is effectively reduced by one, the number of terminal branches is reduced by the same factor $\numfeat$, hence this incurs no asymptotic increase in complexity.
Furthermore, ordering the features (at Line~\ref{line:domain})
%Computing this order
costs $\Theta(\numfeat \log \numfeat)$ for each of the $2^{\mdepth-1}\numfeat^{\mdepth-1}$ branches added to $\bud$ at Line~\ref{line:branching}. Again, since the depth of the branches is effectively reduced by one, the resulting complexity
%(excluding the time for splitting the data set)
is $O((\numex + 2^{\mdepth} \log \numfeat) \numfeat^{\mdepth})$. This very slight increase is often inconsequencial, as long as we have $\numex \geq 2^{\mdepth} \log \numfeat$.
is $O((\numex + 2^{\mdepth} \log \numfeat) \numfeat^{\mdepth})$. This very slight increase is often inconsequencial, as
$\numex$ is still often the dominating term.
% long as we have $\numex \geq 2^{\mdepth} \log \numfeat$.
The feature ordering has a very significant impact on how quickly the algorithm can improve the accuracy of the classifier. Moreover, it also has an impact (though indirect and much less significant) on the computational time necessary to explore the whole search space and prove optimality, because of the lower bound technique detailed in the next section.
......@@ -1052,7 +1064,9 @@ The feature ordering has a very significant impact on how quickly the algorithm
\subsection{Lower Bound}
\label{sec:lb}
It is possible to fail early using a lower bound on the error given prior decisions in the same way as \murtree, following the idea introduced in \cite{dl8}. The idea is that once some subtrees along a branch $\abranch$ are optimal and the sum of their errors is larger than the current upper bound (the best solution found so far) then there is no need to continue exploring branch $\abranch$.
It is possible to fail early using a lower bound on the error given prior decisions in the same way as \dleight~\cite{dl8}.
%, following the idea introduced in \cite{dl8}.
The idea is that once some subtrees along a branch $\abranch$ are optimal and the sum of their errors is larger than the current upper bound (the best solution found so far) then there is no need to continue exploring branch $\abranch$.
%Line~\ref{line:leaves} can be changed to ``\textbf{If} $\bud \neq \emptyset ~\& \not\exists \abranch \in \bud, \dominated{\abranch}$ \textbf{then}''. %Notice that when a branch is ``pruned'' in this way, its
......@@ -1064,23 +1078,30 @@ It is possible to fail early using a lower bound on the error given prior decisi
First, observe that $\best[\abranch]$ is an upper bound on the classification error for any subtree rooted at $\abranch$, since this value comes from an actual tree (of depth $\mdepth - |\abranch|$ for the data set $\langle \negex(\abranch),\posex(\abranch) \rangle$). It is possible to propagate this upper bound to parent nodes efficiently (in $O(|\abranch|)$ time). Here we assume that this is done every time the value $\best[\abranch]$ is actually updated, by recursively applying the same update procedure to the parent.
Now, when the condition in Line~\ref{line:optimal} fails