Point Cloud Library (PCL)  1.10.0
decision_tree_trainer.h
1 /*
2  * Software License Agreement (BSD License)
3  *
4  * Point Cloud Library (PCL) - www.pointclouds.org
5  * Copyright (c) 2010-2011, Willow Garage, Inc.
6  *
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * * Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * * Redistributions in binary form must reproduce the above
16  * copyright notice, this list of conditions and the following
17  * disclaimer in the documentation and/or other materials provided
18  * with the distribution.
19  * * Neither the name of Willow Garage, Inc. nor the names of its
20  * contributors may be used to endorse or promote products derived
21  * from this software without specific prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
27  * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
31  * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
33  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34  * POSSIBILITY OF SUCH DAMAGE.
35  *
36  */
37 
38 #pragma once
39 
40 #include <pcl/common/common.h>
41 
42 #include <pcl/ml/dt/decision_tree.h>
43 #include <pcl/ml/dt/decision_tree_data_provider.h>
44 #include <pcl/ml/feature_handler.h>
45 #include <pcl/ml/stats_estimator.h>
46 
47 #include <vector>
48 
49 namespace pcl {
50 
51 /** Trainer for decision trees. */
52 template <class FeatureType,
53  class DataSet,
54  class LabelType,
55  class ExampleIndex,
56  class NodeType>
58 
59 public:
60  /** Constructor. */
62 
63  /** Destructor. */
64  virtual ~DecisionTreeTrainer();
65 
66  /** Sets the feature handler used to create and evaluate features.
67  *
68  * \param[in] feature_handler the feature handler
69  */
70  inline void
73  {
74  feature_handler_ = &feature_handler;
75  }
76 
77  /** Sets the object for estimating the statistics for tree nodes.
78  *
79  * \param[in] stats_estimator the statistics estimator
80  */
81  inline void
84  {
85  stats_estimator_ = &stats_estimator;
86  }
87 
88  /** Sets the maximum depth of the learned tree.
89  *
90  * \param[in] max_tree_depth maximum depth of the learned tree
91  */
92  inline void
93  setMaxTreeDepth(const std::size_t max_tree_depth)
94  {
95  max_tree_depth_ = max_tree_depth;
96  }
97 
98  /** Sets the number of features used to find optimal decision features.
99  *
100  * \param[in] num_of_features the number of features
101  */
102  inline void
103  setNumOfFeatures(const std::size_t num_of_features)
104  {
105  num_of_features_ = num_of_features;
106  }
107 
108  /** Sets the number of thresholds tested for finding the optimal decision
109  * threshold on the feature responses.
110  *
111  * \param[in] num_of_threshold the number of thresholds
112  */
113  inline void
114  setNumOfThresholds(const std::size_t num_of_threshold)
115  {
116  num_of_thresholds_ = num_of_threshold;
117  }
118 
119  /** Sets the input data set used for training.
120  *
121  * \param[in] data_set the data set used for training
122  */
123  inline void
124  setTrainingDataSet(DataSet& data_set)
125  {
126  data_set_ = data_set;
127  }
128 
129  /** Example indices that specify the data used for training.
130  *
131  * \param[in] examples the examples
132  */
133  inline void
134  setExamples(std::vector<ExampleIndex>& examples)
135  {
136  examples_ = examples;
137  }
138 
139  /** Sets the label data corresponding to the example data.
140  *
141  * \param[in] label_data the label data
142  */
143  inline void
144  setLabelData(std::vector<LabelType>& label_data)
145  {
146  label_data_ = label_data;
147  }
148 
149  /** Sets the minimum number of examples to continue growing a tree.
150  *
151  * \param[in] n number of examples
152  */
153  inline void
154  setMinExamplesForSplit(std::size_t n)
155  {
156  min_examples_for_split_ = n;
157  }
158 
159  /** Specify the thresholds to be used when evaluating features.
160  *
161  * \param[in] thres the threshold values
162  */
163  void
164  setThresholds(std::vector<float>& thres)
165  {
166  thresholds_ = thres;
167  }
168 
169  /** Specify the data provider.
170  *
171  * \param[in] dtdp the data provider that should implement getDatasetAndLabels()
172  * function
173  */
174  void
176  typename pcl::DecisionTreeTrainerDataProvider<FeatureType,
177  DataSet,
178  LabelType,
179  ExampleIndex,
180  NodeType>::Ptr& dtdp)
181  {
182  decision_tree_trainer_data_provider_ = dtdp;
183  }
184 
185  /** Specify if the features are randomly generated at each split node.
186  *
187  * \param[in] b do it or not
188  */
189  void
191  {
192  random_features_at_split_node_ = b;
193  }
194 
195  /** Trains a decision tree using the set training data and settings.
196  *
197  * \param[out] tree destination for the trained tree
198  */
199  void
200  train(DecisionTree<NodeType>& tree);
201 
202 protected:
203  /** Trains a decision tree node from the specified features, label data, and
204  * examples.
205  *
206  * \param[in] features the feature pool used for training
207  * \param[in] examples the examples used for training
208  * \param[in] label_data the label data corresponding to the examples
209  * \param[in] max_depth the maximum depth of the remaining tree
210  * \param[out] node the resulting node
211  */
212  void
213  trainDecisionTreeNode(std::vector<FeatureType>& features,
214  std::vector<ExampleIndex>& examples,
215  std::vector<LabelType>& label_data,
216  std::size_t max_depth,
217  NodeType& node);
218 
219  /** Creates uniformely distrebuted thresholds over the range of the supplied
220  * values.
221  *
222  * \param[in] num_of_thresholds the number of thresholds to create
223  * \param[in] values the values for estimating the expected value range
224  * \param[out] thresholds the resulting thresholds
225  */
226  static void
227  createThresholdsUniform(const std::size_t num_of_thresholds,
228  std::vector<float>& values,
229  std::vector<float>& thresholds);
230 
231 private:
232  /** Maximum depth of the learned tree. */
233  std::size_t max_tree_depth_;
234  /** Number of features used to find optimal decision features. */
235  std::size_t num_of_features_;
236  /** Number of thresholds. */
237  std::size_t num_of_thresholds_;
238 
239  /** FeatureHandler instance, responsible for creating and evaluating features. */
241  /** StatsEstimator instance, responsible for gathering stats about a node. */
243 
244  /** The training data set. */
245  DataSet data_set_;
246  /** The label data. */
247  std::vector<LabelType> label_data_;
248  /** The example data. */
249  std::vector<ExampleIndex> examples_;
250 
251  /** Minimum number of examples to split a node. */
252  std::size_t min_examples_for_split_;
253  /** Thresholds to be used instead of generating uniform distributed thresholds. */
254  std::vector<float> thresholds_;
255  /** The data provider which is called before training a specific tree, if pointer is
256  * NULL, then data_set_ is used. */
257  typename pcl::DecisionTreeTrainerDataProvider<FeatureType,
258  DataSet,
259  LabelType,
260  ExampleIndex,
261  NodeType>::Ptr
262  decision_tree_trainer_data_provider_;
263  /** If true, random features are generated at each node, otherwise, at start of
264  * training the tree */
265  bool random_features_at_split_node_;
266 };
267 
268 } // namespace pcl
269 
270 #include <pcl/ml/impl/dt/decision_tree_trainer.hpp>
pcl
This file defines compatibility wrappers for low level I/O functions.
Definition: convolution.h:45
pcl::DecisionTreeTrainer::setRandomFeaturesAtSplitNode
void setRandomFeaturesAtSplitNode(bool b)
Specify if the features are randomly generated at each split node.
Definition: decision_tree_trainer.h:190
pcl::DecisionTreeTrainer::setMinExamplesForSplit
void setMinExamplesForSplit(std::size_t n)
Sets the minimum number of examples to continue growing a tree.
Definition: decision_tree_trainer.h:154
common.h
pcl::DecisionTreeTrainer::setLabelData
void setLabelData(std::vector< LabelType > &label_data)
Sets the label data corresponding to the example data.
Definition: decision_tree_trainer.h:144
pcl::DecisionTreeTrainer::setDecisionTreeDataProvider
void setDecisionTreeDataProvider(typename pcl::DecisionTreeTrainerDataProvider< FeatureType, DataSet, LabelType, ExampleIndex, NodeType >::Ptr &dtdp)
Specify the data provider.
Definition: decision_tree_trainer.h:175
pcl::DecisionTree
Class representing a decision tree.
Definition: decision_tree.h:49
pcl::FeatureHandler
Utility class interface which is used for creating and evaluating features.
Definition: feature_handler.h:49
pcl::DecisionTreeTrainer
Trainer for decision trees.
Definition: decision_tree_trainer.h:57
pcl::DecisionTreeTrainerDataProvider
Definition: decision_tree_data_provider.h:50
pcl::DecisionTreeTrainer::setExamples
void setExamples(std::vector< ExampleIndex > &examples)
Example indices that specify the data used for training.
Definition: decision_tree_trainer.h:134
pcl::DecisionTreeTrainer::setNumOfThresholds
void setNumOfThresholds(const std::size_t num_of_threshold)
Sets the number of thresholds tested for finding the optimal decision threshold on the feature respon...
Definition: decision_tree_trainer.h:114
pcl::DecisionTreeTrainer::setFeatureHandler
void setFeatureHandler(pcl::FeatureHandler< FeatureType, DataSet, ExampleIndex > &feature_handler)
Sets the feature handler used to create and evaluate features.
Definition: decision_tree_trainer.h:71
pcl::DecisionTreeTrainer::setTrainingDataSet
void setTrainingDataSet(DataSet &data_set)
Sets the input data set used for training.
Definition: decision_tree_trainer.h:124
pcl::DecisionTreeTrainer::setNumOfFeatures
void setNumOfFeatures(const std::size_t num_of_features)
Sets the number of features used to find optimal decision features.
Definition: decision_tree_trainer.h:103
pcl::DecisionTreeTrainer::setMaxTreeDepth
void setMaxTreeDepth(const std::size_t max_tree_depth)
Sets the maximum depth of the learned tree.
Definition: decision_tree_trainer.h:93
pcl::DecisionTreeTrainer::setThresholds
void setThresholds(std::vector< float > &thres)
Specify the thresholds to be used when evaluating features.
Definition: decision_tree_trainer.h:164
pcl::StatsEstimator< LabelType, NodeType, DataSet, ExampleIndex >
pcl::DecisionTreeTrainer::setStatsEstimator
void setStatsEstimator(pcl::StatsEstimator< LabelType, NodeType, DataSet, ExampleIndex > &stats_estimator)
Sets the object for estimating the statistics for tree nodes.
Definition: decision_tree_trainer.h:82
PCL_EXPORTS
#define PCL_EXPORTS
Definition: pcl_macros.h:253