Skip to contents

Gradient boosting algorithm that also supports categorical data. Calls catboost::catboost.train() from package 'catboost'.

Dictionary

This Learner can be instantiated via the dictionary mlr_learners or with the associated sugar function lrn():

mlr_learners$get("regr.catboost")
lrn("regr.catboost")

Meta Information

  • Task type: “regr”

  • Predict Types: “response”

  • Feature Types: “numeric”, “factor”, “ordered”

  • Required Packages: mlr3, mlr3extralearners, catboost

Parameters

IdTypeDefaultLevelsRange
loss_functioncharacterRMSEMAE, MAPE, Poisson, Quantile, RMSE, LogLinQuantile, Lq, Huber, Expectile, Tweedie-
iterationsinteger1000\([1, \infty)\)
learning_ratenumeric0.03\([0.001, 1]\)
random_seedinteger0\([0, \infty)\)
l2_leaf_regnumeric3\([0, \infty)\)
bootstrap_typecharacter-Bayesian, Bernoulli, MVS, Poisson, No-
bagging_temperaturenumeric1\([0, \infty)\)
subsamplenumeric-\([0, 1]\)
sampling_frequencycharacterPerTreeLevelPerTree, PerTreeLevel-
sampling_unitcharacterObjectObject, Group-
mvs_regnumeric-\([0, \infty)\)
random_strengthnumeric1\([0, \infty)\)
depthinteger6\([1, 16]\)
grow_policycharacterSymmetricTreeSymmetricTree, Depthwise, Lossguide-
min_data_in_leafinteger1\([1, \infty)\)
max_leavesinteger31\([1, \infty)\)
has_timelogicalFALSETRUE, FALSE-
rsmnumeric1\([0.001, 1]\)
nan_modecharacterMinMin, Max-
fold_permutation_blockinteger-\([1, 256]\)
leaf_estimation_methodcharacter-Newton, Gradient, Exact-
leaf_estimation_iterationsinteger-\([1, \infty)\)
leaf_estimation_backtrackingcharacterAnyImprovementNo, AnyImprovement, Armijo-
fold_len_multipliernumeric2\([1.001, \infty)\)
approx_on_full_historylogicalTRUETRUE, FALSE-
boosting_typecharacter-Ordered, Plain-
boost_from_averagelogical-TRUE, FALSE-
langevinlogicalFALSETRUE, FALSE-
diffusion_temperaturenumeric10000\([0, \infty)\)
score_functioncharacterCosineCosine, L2, NewtonCosine, NewtonL2-
monotone_constraintsuntyped--
feature_weightsuntyped--
first_feature_use_penaltiesuntyped--
penalties_coefficientnumeric1\([0, \infty)\)
per_object_feature_penaltiesuntyped--
model_shrink_ratenumeric-\((-\infty, \infty)\)
model_shrink_modecharacter-Constant, Decreasing-
target_bordernumeric-\((-\infty, \infty)\)
border_countinteger-\([1, 65535]\)
feature_border_typecharacterGreedyLogSumMedian, Uniform, UniformAndQuantiles, MaxLogSum, MinEntropy, GreedyLogSum-
per_float_feature_quantizationuntyped--
thread_countinteger1\([-1, \infty)\)
task_typecharacterCPUCPU, GPU-
devicesuntyped--
logging_levelcharacterSilentSilent, Verbose, Info, Debug-
metric_periodinteger1\([1, \infty)\)
train_diruntypedcatboost_info-
model_size_regnumeric0.5\([0, 1]\)
allow_writing_fileslogicalFALSETRUE, FALSE-
save_snapshotlogicalFALSETRUE, FALSE-
snapshot_fileuntyped--
snapshot_intervalinteger600\([1, \infty)\)
simple_ctruntyped--
combinations_ctruntyped--
ctr_target_border_countinteger-\([1, 255]\)
counter_calc_methodcharacterFullSkipTest, Full-
max_ctr_complexityinteger-\([1, \infty)\)
ctr_leaf_count_limitinteger-\([1, \infty)\)
store_all_simple_ctrlogicalFALSETRUE, FALSE-
final_ctr_computation_modecharacterDefaultDefault, Skip-
verboselogicalFALSETRUE, FALSE-
ntree_startinteger0\([0, \infty)\)
ntree_endinteger0\([0, \infty)\)

Initial parameter values

  • logging_level:

    • Actual default: "Verbose"

    • Adjusted default: "Silent"

    • Reason for change: consistent with other mlr3 learners

  • thread_count:

    • Actual default: -1

    • Adjusted default: 1

    • Reason for change: consistent with other mlr3 learners

  • allow_writing_files:

    • Actual default: TRUE

    • Adjusted default: FALSE

    • Reason for change: consistent with other mlr3 learners

  • save_snapshot:

    • Actual default: TRUE

    • Adjusted default: FALSE

    • Reason for change: consistent with other mlr3 learners

References

Dorogush, Veronika A, Ershov, Vasily, Gulin, Andrey (2018). “CatBoost: gradient boosting with categorical features support.” arXiv preprint arXiv:1810.11363.

See also

Author

sumny

Super classes

mlr3::Learner -> mlr3::LearnerRegr -> LearnerRegrCatboost

Methods

Inherited methods


Method new()

Create a LearnerRegrCatboost object.

Usage


Method importance()

The importance scores are calculated using catboost.get_feature_importance, setting type = "FeatureImportance", returned for 'all'.

Usage

LearnerRegrCatboost$importance()

Returns

Named numeric().


Method clone()

The objects of this class are cloneable with this method.

Usage

LearnerRegrCatboost$clone(deep = FALSE)

Arguments

deep

Whether to make a deep clone.

Examples

learner = mlr3::lrn("regr.catboost")
print(learner)
#> <LearnerRegrCatboost:regr.catboost>: Gradient Boosting
#> * Model: -
#> * Parameters: loss_function=RMSE, logging_level=Silent, thread_count=1,
#>   allow_writing_files=FALSE, save_snapshot=FALSE
#> * Packages: mlr3, mlr3extralearners, catboost
#> * Predict Types:  [response]
#> * Feature Types: numeric, factor, ordered
#> * Properties: importance, missings, weights

# available parameters:
learner$param_set$ids()
#>  [1] "loss_function"                  "iterations"                    
#>  [3] "learning_rate"                  "random_seed"                   
#>  [5] "l2_leaf_reg"                    "bootstrap_type"                
#>  [7] "bagging_temperature"            "subsample"                     
#>  [9] "sampling_frequency"             "sampling_unit"                 
#> [11] "mvs_reg"                        "random_strength"               
#> [13] "depth"                          "grow_policy"                   
#> [15] "min_data_in_leaf"               "max_leaves"                    
#> [17] "has_time"                       "rsm"                           
#> [19] "nan_mode"                       "fold_permutation_block"        
#> [21] "leaf_estimation_method"         "leaf_estimation_iterations"    
#> [23] "leaf_estimation_backtracking"   "fold_len_multiplier"           
#> [25] "approx_on_full_history"         "boosting_type"                 
#> [27] "boost_from_average"             "langevin"                      
#> [29] "diffusion_temperature"          "score_function"                
#> [31] "monotone_constraints"           "feature_weights"               
#> [33] "first_feature_use_penalties"    "penalties_coefficient"         
#> [35] "per_object_feature_penalties"   "model_shrink_rate"             
#> [37] "model_shrink_mode"              "target_border"                 
#> [39] "border_count"                   "feature_border_type"           
#> [41] "per_float_feature_quantization" "thread_count"                  
#> [43] "task_type"                      "devices"                       
#> [45] "logging_level"                  "metric_period"                 
#> [47] "train_dir"                      "model_size_reg"                
#> [49] "allow_writing_files"            "save_snapshot"                 
#> [51] "snapshot_file"                  "snapshot_interval"             
#> [53] "simple_ctr"                     "combinations_ctr"              
#> [55] "ctr_target_border_count"        "counter_calc_method"           
#> [57] "max_ctr_complexity"             "ctr_leaf_count_limit"          
#> [59] "store_all_simple_ctr"           "final_ctr_computation_mode"    
#> [61] "verbose"                        "ntree_start"                   
#> [63] "ntree_end"