Skip to contents

Calls lightgbm::lgb.train from package lightgbm.

Dictionary

This Learner can be instantiated via the dictionary mlr_learners or with the associated sugar function lrn():

mlr_learners$get("regr.lightgbm")
lrn("regr.lightgbm")

Meta Information

  • Task type: “regr”

  • Predict Types: “response”

  • Feature Types: “numeric”, “integer”

  • Required Packages: mlr3extralearners, lightgbm

Parameters

IdTypeDefaultLevelsRange
nroundsinteger5\([1, \infty)\)
objectivecharacterregressionregression, regression_l1, huber, fair, poisson, quantile, mape, gamma, tweedie\((-\infty, \infty)\)
metriccharacter, None, l1, l2, rmse, quantile, mape, huber, fair, poisson, ...\((-\infty, \infty)\)
custom_evallistNULL\((-\infty, \infty)\)
verboseinteger1\((-\infty, \infty)\)
recordlogicalTRUETRUE, FALSE\((-\infty, \infty)\)
eval_freqinteger1\([1, \infty)\)
init_modellistNULL\((-\infty, \infty)\)
early_stopping_roundsinteger-\([1, \infty)\)
callbackslist-\((-\infty, \infty)\)
reset_datalogicalFALSETRUE, FALSE\((-\infty, \infty)\)
categorical_featurelist\((-\infty, \infty)\)
boostingcharactergbdtgbdt, rf, dart, goss\((-\infty, \infty)\)
linear_treelogicalFALSETRUE, FALSE\((-\infty, \infty)\)
num_iterationsinteger100\([0, \infty)\)
learning_ratenumeric0.1\([0, \infty)\)
num_leavesinteger31\([1, 131072]\)
tree_learnercharacterserialserial, feature, data, voting\((-\infty, \infty)\)
num_threadsinteger0\([0, \infty)\)
device_typecharactercpucpu, gpu\((-\infty, \infty)\)
seedinteger-\((-\infty, \infty)\)
deterministiclogicalFALSETRUE, FALSE\((-\infty, \infty)\)
force_col_wiselogicalFALSETRUE, FALSE\((-\infty, \infty)\)
force_row_wiselogicalFALSETRUE, FALSE\((-\infty, \infty)\)
histogram_pool_sizeinteger-1\((-\infty, \infty)\)
max_depthinteger-1\((-\infty, \infty)\)
min_data_in_leafinteger20\([0, \infty)\)
min_sum_hessian_in_leafnumeric0.001\([0, \infty)\)
bagging_fractionnumeric1\([0, 1]\)
bagging_freqinteger0\([0, \infty)\)
bagging_seedinteger3\((-\infty, \infty)\)
feature_fractionnumeric1\([0, 1]\)
feature_fraction_bynodenumeric1\([0, 1]\)
feature_fraction_seedinteger2\((-\infty, \infty)\)
extra_treeslogicalFALSETRUE, FALSE\((-\infty, \infty)\)
extra_seedinteger6\((-\infty, \infty)\)
first_metric_onlylogicalFALSETRUE, FALSE\((-\infty, \infty)\)
max_delta_stepnumeric0\((-\infty, \infty)\)
lambda_l1numeric0\([0, \infty)\)
lambda_l2numeric0\([0, \infty)\)
linear_lambdanumeric0\([0, \infty)\)
min_gain_to_splitnumeric0\([0, \infty)\)
drop_ratenumeric0.1\([0, 1]\)
max_dropinteger50\((-\infty, \infty)\)
skip_dropnumeric0.5\([0, 1]\)
xgboost_dart_modelogicalFALSETRUE, FALSE\((-\infty, \infty)\)
uniform_droplogicalFALSETRUE, FALSE\((-\infty, \infty)\)
drop_seedinteger4\((-\infty, \infty)\)
top_ratenumeric0.2\([0, 1]\)
other_ratenumeric0.1\([0, 1]\)
min_data_per_groupinteger100\([1, \infty)\)
max_cat_thresholdinteger32\([1, \infty)\)
cat_l2numeric10\([0, \infty)\)
cat_smoothnumeric10\([0, \infty)\)
max_cat_to_onehotinteger4\([1, \infty)\)
top_kinteger20\([1, \infty)\)
monotone_constraintslistNULL\((-\infty, \infty)\)
monotone_constraints_methodcharacterbasicbasic, intermediate, advanced\((-\infty, \infty)\)
monotone_penaltynumeric0\([0, \infty)\)
feature_contrilistNULL\((-\infty, \infty)\)
forcedsplits_filenamelist\((-\infty, \infty)\)
refit_decay_ratenumeric0.9\([0, 1]\)
cegb_tradeoffnumeric1\([0, \infty)\)
cegb_penalty_splitnumeric0\([0, \infty)\)
cegb_penalty_feature_lazylist-\((-\infty, \infty)\)
cegb_penalty_feature_coupledlist-\((-\infty, \infty)\)
path_smoothnumeric0\([0, \infty)\)
interaction_constraintslist-\((-\infty, \infty)\)
input_modellist\((-\infty, \infty)\)
output_modellistLightGBM_model.txt\((-\infty, \infty)\)
saved_feature_importance_typeinteger0\([0, 1]\)
snapshot_freqinteger-1\((-\infty, \infty)\)
max_bininteger255\([2, \infty)\)
max_bin_by_featurelistNULL\((-\infty, \infty)\)
min_data_in_bininteger3\([1, \infty)\)
bin_construct_sample_cntinteger200000\([1, \infty)\)
data_random_seedinteger1\((-\infty, \infty)\)
is_enable_sparselogicalTRUETRUE, FALSE\((-\infty, \infty)\)
enable_bundlelogicalTRUETRUE, FALSE\((-\infty, \infty)\)
use_missinglogicalTRUETRUE, FALSE\((-\infty, \infty)\)
zero_as_missinglogicalFALSETRUE, FALSE\((-\infty, \infty)\)
feature_pre_filterlogicalTRUETRUE, FALSE\((-\infty, \infty)\)
pre_partitionlogicalFALSETRUE, FALSE\((-\infty, \infty)\)
two_roundlogicalFALSETRUE, FALSE\((-\infty, \infty)\)
headerlogicalFALSETRUE, FALSE\((-\infty, \infty)\)
group_columnlist\((-\infty, \infty)\)
forcedbins_filenamelist\((-\infty, \infty)\)
save_binarylogicalFALSETRUE, FALSE\((-\infty, \infty)\)
boost_from_averagelogicalTRUETRUE, FALSE\((-\infty, \infty)\)
reg_sqrtlogicalFALSETRUE, FALSE\((-\infty, \infty)\)
alphanumeric0.9\([0, \infty)\)
fair_cnumeric1\([0, \infty)\)
poisson_max_delta_stepnumeric0.7\([0, \infty)\)
tweedie_variance_powernumeric1.5\([1, 2]\)
metric_freqinteger1\([1, \infty)\)
is_provide_training_metriclogicalFALSETRUE, FALSE\((-\infty, \infty)\)
num_machinesinteger1\([1, \infty)\)
local_listen_portinteger12400\([1, \infty)\)
time_outinteger120\([1, \infty)\)
machine_list_filenamelist\((-\infty, \infty)\)
machineslist\((-\infty, \infty)\)
gpu_platform_idinteger-1\((-\infty, \infty)\)
gpu_device_idinteger-1\((-\infty, \infty)\)
gpu_use_dplogicalFALSETRUE, FALSE\((-\infty, \infty)\)
num_gpuinteger1\([1, \infty)\)
start_iterationinteger0\((-\infty, \infty)\)
num_iterationinteger-1\((-\infty, \infty)\)
pred_early_stoplogicalFALSETRUE, FALSE\((-\infty, \infty)\)
pred_early_stop_freqinteger10\((-\infty, \infty)\)
pred_early_stop_marginnumeric10\((-\infty, \infty)\)
output_resultlistLightGBM_predict_result.txt\((-\infty, \infty)\)

Custom mlr3 defaults

  • num_threads:

    • Actual default: 0L

    • Adjusted default: 1L

    • Reason for change: Prevents accidental conflicts with future.

  • verbose:

    • Actual default: 1L

    • Adjusted default: -1L

    • Reason for change: Prevents accidental conflicts with mlr messaging system.

For categorical features either pre-process data by encoding columns or specify the categorical columns with the categorical_feature parameter. For this learner please do not prefix the categorical feature with name:.

See also

Author

kapsner

Super classes

mlr3::Learner -> mlr3::LearnerRegr -> LearnerRegrLightGBM

Methods

Inherited methods


Method new()

Creates a new instance of this R6 class.

Usage


Method importance()

The importance scores are extracted from lbg.importance.

Usage

LearnerRegrLightGBM$importance()

Returns

Named numeric().


Method clone()

The objects of this class are cloneable with this method.

Usage

LearnerRegrLightGBM$clone(deep = FALSE)

Arguments

deep

Whether to make a deep clone.

Examples

if (requireNamespace("lightgbm", quietly = TRUE)) {
  learner = mlr3::lrn("regr.lightgbm")
  print(learner)

  # available parameters:
  learner$param_set$ids()
}
#> <LearnerRegrLightGBM:regr.lightgbm>
#> * Model: -
#> * Parameters: num_threads=1, verbose=-1, objective=regression
#> * Packages: mlr3, mlr3extralearners, lightgbm
#> * Predict Type: response
#> * Feature types: numeric, integer
#> * Properties: importance, missings, weights
#>   [1] "nrounds"                       "objective"                    
#>   [3] "metric"                        "custom_eval"                  
#>   [5] "verbose"                       "record"                       
#>   [7] "eval_freq"                     "init_model"                   
#>   [9] "early_stopping_rounds"         "callbacks"                    
#>  [11] "reset_data"                    "categorical_feature"          
#>  [13] "boosting"                      "linear_tree"                  
#>  [15] "num_iterations"                "learning_rate"                
#>  [17] "num_leaves"                    "tree_learner"                 
#>  [19] "num_threads"                   "device_type"                  
#>  [21] "seed"                          "deterministic"                
#>  [23] "force_col_wise"                "force_row_wise"               
#>  [25] "histogram_pool_size"           "max_depth"                    
#>  [27] "min_data_in_leaf"              "min_sum_hessian_in_leaf"      
#>  [29] "bagging_fraction"              "bagging_freq"                 
#>  [31] "bagging_seed"                  "feature_fraction"             
#>  [33] "feature_fraction_bynode"       "feature_fraction_seed"        
#>  [35] "extra_trees"                   "extra_seed"                   
#>  [37] "first_metric_only"             "max_delta_step"               
#>  [39] "lambda_l1"                     "lambda_l2"                    
#>  [41] "linear_lambda"                 "min_gain_to_split"            
#>  [43] "drop_rate"                     "max_drop"                     
#>  [45] "skip_drop"                     "xgboost_dart_mode"            
#>  [47] "uniform_drop"                  "drop_seed"                    
#>  [49] "top_rate"                      "other_rate"                   
#>  [51] "min_data_per_group"            "max_cat_threshold"            
#>  [53] "cat_l2"                        "cat_smooth"                   
#>  [55] "max_cat_to_onehot"             "top_k"                        
#>  [57] "monotone_constraints"          "monotone_constraints_method"  
#>  [59] "monotone_penalty"              "feature_contri"               
#>  [61] "forcedsplits_filename"         "refit_decay_rate"             
#>  [63] "cegb_tradeoff"                 "cegb_penalty_split"           
#>  [65] "cegb_penalty_feature_lazy"     "cegb_penalty_feature_coupled" 
#>  [67] "path_smooth"                   "interaction_constraints"      
#>  [69] "input_model"                   "output_model"                 
#>  [71] "saved_feature_importance_type" "snapshot_freq"                
#>  [73] "max_bin"                       "max_bin_by_feature"           
#>  [75] "min_data_in_bin"               "bin_construct_sample_cnt"     
#>  [77] "data_random_seed"              "is_enable_sparse"             
#>  [79] "enable_bundle"                 "use_missing"                  
#>  [81] "zero_as_missing"               "feature_pre_filter"           
#>  [83] "pre_partition"                 "two_round"                    
#>  [85] "header"                        "group_column"                 
#>  [87] "forcedbins_filename"           "save_binary"                  
#>  [89] "boost_from_average"            "reg_sqrt"                     
#>  [91] "alpha"                         "fair_c"                       
#>  [93] "poisson_max_delta_step"        "tweedie_variance_power"       
#>  [95] "metric_freq"                   "is_provide_training_metric"   
#>  [97] "num_machines"                  "local_listen_port"            
#>  [99] "time_out"                      "machine_list_filename"        
#> [101] "machines"                      "gpu_platform_id"              
#> [103] "gpu_device_id"                 "gpu_use_dp"                   
#> [105] "num_gpu"                       "start_iteration"              
#> [107] "num_iteration"                 "pred_early_stop"              
#> [109] "pred_early_stop_freq"          "pred_early_stop_margin"       
#> [111] "output_result"