Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
87 changes: 59 additions & 28 deletions include/xgboost/c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,8 @@ XGB_DLL int XGBSetGlobalConfig(char const *config);

/**
* @brief Get current global configuration (collection of parameters that apply globally).
* @param out_config pointer to received returned global configuration, represented as a JSON string.
* @param out_config pointer to received returned global configuration, represented as a JSON
* string.
* @return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBGetGlobalConfig(char const **out_config);
Expand Down Expand Up @@ -149,12 +150,14 @@ XGB_DLL int XGDMatrixCreateFromFile(const char *fname, int silent, DMatrixHandle
* @brief load a data matrix
*
* @param config JSON encoded parameters for DMatrix construction. Accepted fields are:
* - uri: The URI of the input file. The URI parameter `format` is required when loading text data.
* - uri: The URI of the input file. The URI parameter `format` is required when loading text
* data.
* @verbatim embed:rst:leading-asterisk
* See :doc:`/tutorials/input_format` for more info.
* @endverbatim
* - silent (optional): Whether to print message during loading. Default to true.
* - data_split_mode (optional): Whether the file was split by row or column beforehand for distributed computing. Default to row.
* - data_split_mode (optional): Whether the file was split by row or column beforehand for
* distributed computing. Default to row.
* @param out a loaded data matrix
* @return 0 when success, -1 when failure happens
*/
Expand Down Expand Up @@ -243,7 +246,8 @@ XGB_DLL int XGDMatrixCreateFromCSR(char const *indptr, char const *indices, char
* @param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default
* to row.
* @param out The created DMatrix
*
* @return 0 when success, -1 when failure happens
Expand Down Expand Up @@ -310,7 +314,8 @@ XGB_DLL int XGDMatrixCreateFromCudaColumnar(char const *data, char const *config
* @param config JSON encoded configuration. Required values are:
* - missing: Which value to represent missing value.
* - nthread (optional): Number of threads used for initializing DMatrix.
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default to row.
* - data_split_mode (optional): Whether the data was split by row or column beforehand. Default
* to row.
* @param out created dmatrix
* @return 0 when success, -1 when failure happens
*/
Expand Down Expand Up @@ -493,7 +498,8 @@ XGB_EXTERN_C typedef void DataIterResetCallback(DataIterHandle handle); // NOLI
* @param next Callback function yielding the next batch of data.
* @param config JSON encoded parameters for DMatrix construction. Accepted fields are:
* - missing: Which value to represent missing value
* - cache_prefix: The path of cache file, caller must initialize all the directories in this path.
* - cache_prefix: The path of cache file, caller must initialize all the directories in this
* path.
* - nthread (optional): Number of threads used for initializing DMatrix.
* @param[out] out The created external memory DMatrix
*
Expand Down Expand Up @@ -558,7 +564,8 @@ XGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHand
* @param next Callback function yielding the next batch of data.
* @param config JSON encoded parameters for DMatrix construction. Accepted fields are:
* - missing: Which value to represent missing value
* - cache_prefix: The path of cache file, caller must initialize all the directories in this path.
* - cache_prefix: The path of cache file, caller must initialize all the directories in this
* path.
* - nthread (optional): Number of threads used for initializing DMatrix.
* - max_bin (optional): Maximum number of bins for building histogram. Must be consistent with
* the corresponding booster training parameter.
Expand Down Expand Up @@ -1143,16 +1150,16 @@ XGB_DLL int XGBoosterEvalOneIter(BoosterHandle handle, int iter, DMatrixHandle d
* 1:output margin instead of transformed value
* 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree
* 4:output feature contributions to individual predictions
* @param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees
* when the parameter is set to 0, we will use all the trees
* @param ntree_limit limit number of trees used for prediction, this is only valid for boosted
* trees when the parameter is set to 0, we will use all the trees
* @param training Whether the prediction function is used as part of a training loop.
* Prediction can be run in 2 scenarios:
* 1. Given data matrix X, obtain prediction y_pred from the model.
* 2. Obtain the prediction for computing gradients. For example, DART booster performs dropout
* during training, and the prediction result will be different from the one obtained by normal
* inference step due to dropped trees.
* Set training=false for the first scenario. Set training=true for the second scenario.
* The second scenario applies when you are defining a custom objective function.
* during training, and the prediction result will be different from the one obtained by
* normal inference step due to dropped trees. Set training=false for the first scenario. Set
* training=true for the second scenario. The second scenario applies when you are defining a custom
* objective function.
* @param out_len used to store length of returning result
* @param out_result used to set a pointer to array
* @return 0 when success, -1 when failure happens
Expand Down Expand Up @@ -1183,21 +1190,17 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle, DMatrixHandle dmat, int optio
*
* Prediction can be run in 2 scenarios:
* 1. Given data matrix X, obtain prediction y_pred from the model.
* 2. Obtain the prediction for computing gradients. For example, DART booster performs dropout
* during training, and the prediction result will be different from the one obtained by normal
* inference step due to dropped trees.
* Set training=false for the first scenario. Set training=true for the second
* scenario. The second scenario applies when you are defining a custom objective
* function.
* "iteration_begin": int
* Beginning iteration of prediction.
* 2. Obtain the prediction for computing gradients. For example, DART booster performs
* dropout during training, and the prediction result will be different from the one obtained by
* normal inference step due to dropped trees. Set training=false for the first scenario. Set
* training=true for the second scenario. The second scenario applies when you are defining a
* custom objective function. "iteration_begin": int Beginning iteration of prediction.
* "iteration_end": int
* End iteration of prediction. Set to 0 this will become the size of tree model (all the trees).
* "strict_shape": bool
* Whether should we reshape the output with stricter rules. If set to true,
* normal/margin/contrib/interaction predict will output consistent shape
* disregarding the use of multi-class model, and leaf prediction will output 4-dim
* array representing: (n_samples, n_iterations, n_classes, n_trees_in_forest)
* End iteration of prediction. Set to 0 this will become the size of tree model (all the
* trees). "strict_shape": bool Whether should we reshape the output with stricter rules. If set to
* true, normal/margin/contrib/interaction predict will output consistent shape disregarding the use
* of multi-class model, and leaf prediction will output 4-dim array representing: (n_samples,
* n_iterations, n_classes, n_trees_in_forest)
*
* Example JSON input for running a normal prediction with strict output shape, 2 dim
* for softprob , 1 dim for others.
Expand All @@ -1217,7 +1220,8 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle, DMatrixHandle dmat, int optio
*
* @return 0 when success, -1 when failure happens
*
* @see XGBoosterPredictFromDense XGBoosterPredictFromCSR XGBoosterPredictFromCudaArray XGBoosterPredictFromCudaColumnar
* @see XGBoosterPredictFromDense XGBoosterPredictFromCSR XGBoosterPredictFromCudaArray
* XGBoosterPredictFromCudaColumnar
*/
XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle, DMatrixHandle dmat,
char const *config, bst_ulong const **out_shape,
Expand Down Expand Up @@ -1649,6 +1653,33 @@ XGB_DLL int XGBoosterFeatureScore(BoosterHandle handle, const char *config,
bst_ulong *out_n_features, char const ***out_features,
bst_ulong *out_dim, bst_ulong const **out_shape,
float const **out_scores);

/**
* @brief Get per-tree weights for leaf similarity computation.
*
* @param config A JSON string with the following format:
*
* {
* "weight_type": str,
* "iteration_begin": int,
* "iteration_end": int
* }
*
* - weight_type: A JSON string with following possible values:
* * 'uniform': assign equal weight to each tree.
* * 'gain': sum split gain for each tree.
* * 'cover': sum split cover for each tree.
* - iteration_begin: Beginning iteration used when extracting tree weights.
* - iteration_end: End iteration used when extracting tree weights. 0 means
* using all remaining iterations.
*
* @param out_len Length of output tree weight array.
* @param out_weights Pointer to the output tree weight array.
*
* @return 0 when success, -1 when failure happens
*/
XGB_DLL int XGBoosterGetLeafSimilarityWeights(BoosterHandle handle, const char *config,
bst_ulong *out_len, float const **out_weights);
/**@}*/ // End of Booster

/**
Expand Down
18 changes: 10 additions & 8 deletions include/xgboost/gbm.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,8 +116,7 @@ class GradientBooster : public Model, public Configurable {
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
*/
virtual void PredictLeaf(DMatrix *dmat,
HostDeviceVector<bst_float> *out_preds,
virtual void PredictLeaf(DMatrix* dmat, HostDeviceVector<bst_float>* out_preds,
unsigned layer_begin, unsigned layer_end) = 0;

/*!
Expand Down Expand Up @@ -147,10 +146,13 @@ class GradientBooster : public Model, public Configurable {
[[nodiscard]] virtual std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
std::string format) const = 0;

virtual void FeatureScore(std::string const& importance_type,
common::Span<int32_t const> trees,
virtual void FeatureScore(std::string const& importance_type, common::Span<int32_t const> trees,
std::vector<bst_feature_t>* features,
std::vector<float>* scores) const = 0;

virtual void LeafSimilarityWeights(std::string const& weight_type, bst_layer_t iteration_begin,
bst_layer_t iteration_end,
std::vector<float>* weights) const = 0;
/**
* @brief Getter for categories.
*/
Expand Down Expand Up @@ -190,10 +192,10 @@ struct GradientBoosterReg
* });
* \endcode
*/
#define XGBOOST_REGISTER_GBM(UniqueId, Name) \
static DMLC_ATTRIBUTE_UNUSED ::xgboost::GradientBoosterReg & \
__make_ ## GradientBoosterReg ## _ ## UniqueId ## __ = \
::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->__REGISTER__(Name)
#define XGBOOST_REGISTER_GBM(UniqueId, Name) \
static DMLC_ATTRIBUTE_UNUSED ::xgboost::GradientBoosterReg& \
__make_##GradientBoosterReg##_##UniqueId##__ = \
::dmlc::Registry< ::xgboost::GradientBoosterReg>::Get()->__REGISTER__(Name)

} // namespace xgboost
#endif // XGBOOST_GBM_H_
19 changes: 11 additions & 8 deletions include/xgboost/learner.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,8 +96,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \param data_names name of each dataset
* \return a string corresponding to the evaluation result
*/
virtual std::string EvalOneIter(int iter,
const std::vector<std::shared_ptr<DMatrix>>& data_sets,
virtual std::string EvalOneIter(int iter, const std::vector<std::shared_ptr<DMatrix>>& data_sets,
const std::vector<std::string>& data_names) = 0;
/*!
* \brief get prediction given the model.
Expand All @@ -107,7 +106,8 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \param layer_begin Beginning of boosted tree layer used for prediction.
* \param layer_end End of booster layer. 0 means do not limit trees.
* \param training Whether the prediction result is used for training
* \param pred_leaf whether to only predict the leaf index of each tree in a boosted tree predictor
* \param pred_leaf whether to only predict the leaf index of each tree in a boosted tree
* predictor
* \param pred_contribs whether to only predict the feature contributions
* \param approx_contribs whether to approximate the feature contributions for speed
* \param pred_interactions whether to compute the feature pair contributions
Expand Down Expand Up @@ -140,6 +140,10 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
std::vector<bst_feature_t>* features,
std::vector<float>* scores) = 0;

virtual void CalcLeafSimilarityWeights(std::string const& weight_type,
bst_layer_t iteration_begin, bst_layer_t iteration_end,
std::vector<float>* weights) = 0;

/*
* \brief Get number of boosted rounds from gradient booster.
*/
Expand Down Expand Up @@ -206,7 +210,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \brief Set the feature names for current booster.
* \param fn Input feature names
*/
virtual void SetFeatureNames(std::vector<std::string> const& fn) = 0;
virtual void SetFeatureNames(std::vector<std::string> const& fn) = 0;
/*!
* \brief Get the feature names for current booster.
* \param fn Output feature names
Expand Down Expand Up @@ -245,8 +249,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \param format the format to dump the model in
* \return a vector of dump for boosters.
*/
virtual std::vector<std::string> DumpModel(const FeatureMap& fmap,
bool with_stats,
virtual std::vector<std::string> DumpModel(const FeatureMap& fmap, bool with_stats,
std::string format) = 0;

virtual XGBAPIThreadLocalEntry& GetThreadLocal() const = 0;
Expand All @@ -259,7 +262,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
* \param cache_data The matrix to cache the prediction.
* \return Created learner.
*/
static Learner* Create(const std::vector<std::shared_ptr<DMatrix> >& cache_data);
static Learner* Create(const std::vector<std::shared_ptr<DMatrix>>& cache_data);
/**
* \brief Return the context object of this Booster.
*/
Expand All @@ -276,7 +279,7 @@ class Learner : public Model, public Configurable, public dmlc::Serializable {
/*! \brief The gradient booster used by the model*/
std::unique_ptr<GradientBooster> gbm_;
/*! \brief The evaluation metrics used to evaluate the model. */
std::vector<std::unique_ptr<Metric> > metrics_;
std::vector<std::unique_ptr<Metric>> metrics_;
/*! \brief Training parameter. */
Context ctx_;
};
Expand Down
Loading