Skip to contents

Calls lightgbm::lgb.train from package lightgbm.

Details

For categorical features either pre-process data by encoding columns or specify the categorical columns with the categorical_feature parameter. For this learner please do not prefix the categorical feature with name:.

Dictionary

This Learner can be instantiated via the dictionary mlr_learners or with the associated sugar function lrn():

mlr_learners$get("classif.lightgbm")
lrn("classif.lightgbm")

Meta Information

  • Task type: “classif”

  • Predict Types: “prob”, “response”

  • Feature Types: “numeric”, “integer”

  • Required Packages: mlr3extralearners, lightgbm

Parameters

IdTypeDefaultLevelsRange
nroundsinteger5\([1, \infty)\)
objectivecharacterbinarybinary, multiclass, multiclassova, cross_entropy, cross_entropy_lambda, lambdarank, rank_xendcg\((-\infty, \infty)\)
metriccharacter, None, ndcg, map, auc, average_precision, binary_logloss, binary_error, auc_mu, multi_logloss, ...\((-\infty, \infty)\)
custom_evallistNULL\((-\infty, \infty)\)
verboseinteger1\((-\infty, \infty)\)
recordlogicalTRUETRUE, FALSE\((-\infty, \infty)\)
eval_freqinteger1\([1, \infty)\)
init_modellistNULL\((-\infty, \infty)\)
early_stopping_roundsinteger-\([1, \infty)\)
callbackslist-\((-\infty, \infty)\)
reset_datalogicalFALSETRUE, FALSE\((-\infty, \infty)\)
categorical_featurelist\((-\infty, \infty)\)
boostingcharactergbdtgbdt, rf, dart, goss\((-\infty, \infty)\)
linear_treelogicalFALSETRUE, FALSE\((-\infty, \infty)\)
num_iterationsinteger100\([0, \infty)\)
learning_ratenumeric0.1\([0, \infty)\)
num_leavesinteger31\([1, 131072]\)
tree_learnercharacterserialserial, feature, data, voting\((-\infty, \infty)\)
num_threadsinteger0\([0, \infty)\)
device_typecharactercpucpu, gpu\((-\infty, \infty)\)
seedinteger-\((-\infty, \infty)\)
deterministiclogicalFALSETRUE, FALSE\((-\infty, \infty)\)
force_col_wiselogicalFALSETRUE, FALSE\((-\infty, \infty)\)
force_row_wiselogicalFALSETRUE, FALSE\((-\infty, \infty)\)
histogram_pool_sizeinteger-1\((-\infty, \infty)\)
max_depthinteger-1\((-\infty, \infty)\)
min_data_in_leafinteger20\([0, \infty)\)
min_sum_hessian_in_leafnumeric0.001\([0, \infty)\)
bagging_fractionnumeric1\([0, 1]\)
pos_bagging_fractionnumeric1\([0, 1]\)
neg_bagging_fractionnumeric1\([0, 1]\)
bagging_freqinteger0\([0, \infty)\)
bagging_seedinteger3\((-\infty, \infty)\)
feature_fractionnumeric1\([0, 1]\)
feature_fraction_bynodenumeric1\([0, 1]\)
feature_fraction_seedinteger2\((-\infty, \infty)\)
extra_treeslogicalFALSETRUE, FALSE\((-\infty, \infty)\)
extra_seedinteger6\((-\infty, \infty)\)
first_metric_onlylogicalFALSETRUE, FALSE\((-\infty, \infty)\)
max_delta_stepnumeric0\((-\infty, \infty)\)
lambda_l1numeric0\([0, \infty)\)
lambda_l2numeric0\([0, \infty)\)
linear_lambdanumeric0\([0, \infty)\)
min_gain_to_splitnumeric0\([0, \infty)\)
drop_ratenumeric0.1\([0, 1]\)
max_dropinteger50\((-\infty, \infty)\)
skip_dropnumeric0.5\([0, 1]\)
xgboost_dart_modelogicalFALSETRUE, FALSE\((-\infty, \infty)\)
uniform_droplogicalFALSETRUE, FALSE\((-\infty, \infty)\)
drop_seedinteger4\((-\infty, \infty)\)
top_ratenumeric0.2\([0, 1]\)
other_ratenumeric0.1\([0, 1]\)
min_data_per_groupinteger100\([1, \infty)\)
max_cat_thresholdinteger32\([1, \infty)\)
cat_l2numeric10\([0, \infty)\)
cat_smoothnumeric10\([0, \infty)\)
max_cat_to_onehotinteger4\([1, \infty)\)
top_kinteger20\([1, \infty)\)
monotone_constraintslistNULL\((-\infty, \infty)\)
monotone_constraints_methodcharacterbasicbasic, intermediate, advanced\((-\infty, \infty)\)
monotone_penaltynumeric0\([0, \infty)\)
feature_contrilistNULL\((-\infty, \infty)\)
forcedsplits_filenamelist\((-\infty, \infty)\)
refit_decay_ratenumeric0.9\([0, 1]\)
cegb_tradeoffnumeric1\([0, \infty)\)
cegb_penalty_splitnumeric0\([0, \infty)\)
cegb_penalty_feature_lazylist-\((-\infty, \infty)\)
cegb_penalty_feature_coupledlist-\((-\infty, \infty)\)
path_smoothnumeric0\([0, \infty)\)
interaction_constraintslist-\((-\infty, \infty)\)
input_modellist\((-\infty, \infty)\)
output_modellistLightGBM_model.txt\((-\infty, \infty)\)
saved_feature_importance_typeinteger0\([0, 1]\)
snapshot_freqinteger-1\((-\infty, \infty)\)
max_bininteger255\([2, \infty)\)
max_bin_by_featurelistNULL\((-\infty, \infty)\)
min_data_in_bininteger3\([1, \infty)\)
bin_construct_sample_cntinteger200000\([1, \infty)\)
data_random_seedinteger1\((-\infty, \infty)\)
is_enable_sparselogicalTRUETRUE, FALSE\((-\infty, \infty)\)
enable_bundlelogicalTRUETRUE, FALSE\((-\infty, \infty)\)
use_missinglogicalTRUETRUE, FALSE\((-\infty, \infty)\)
zero_as_missinglogicalFALSETRUE, FALSE\((-\infty, \infty)\)
feature_pre_filterlogicalTRUETRUE, FALSE\((-\infty, \infty)\)
pre_partitionlogicalFALSETRUE, FALSE\((-\infty, \infty)\)
two_roundlogicalFALSETRUE, FALSE\((-\infty, \infty)\)
headerlogicalFALSETRUE, FALSE\((-\infty, \infty)\)
group_columnlist\((-\infty, \infty)\)
forcedbins_filenamelist\((-\infty, \infty)\)
save_binarylogicalFALSETRUE, FALSE\((-\infty, \infty)\)
objective_seedinteger5\((-\infty, \infty)\)
is_unbalancelogicalFALSETRUE, FALSE\((-\infty, \infty)\)
scale_pos_weightnumeric1\([0, \infty)\)
sigmoidnumeric1\([0, \infty)\)
boost_from_averagelogicalTRUETRUE, FALSE\((-\infty, \infty)\)
lambdarank_truncation_levelinteger30\([1, \infty)\)
lambdarank_normlogicalTRUETRUE, FALSE\((-\infty, \infty)\)
label_gainlist-\((-\infty, \infty)\)
metric_freqinteger1\([1, \infty)\)
is_provide_training_metriclogicalFALSETRUE, FALSE\((-\infty, \infty)\)
eval_atlist1, 2, 3, 4, 5\((-\infty, \infty)\)
multi_error_top_kinteger1\([1, \infty)\)
auc_mu_weightslistNULL\((-\infty, \infty)\)
num_machinesinteger1\([1, \infty)\)
local_listen_portinteger12400\([1, \infty)\)
time_outinteger120\([1, \infty)\)
machine_list_filenamelist\((-\infty, \infty)\)
machineslist\((-\infty, \infty)\)
gpu_platform_idinteger-1\((-\infty, \infty)\)
gpu_device_idinteger-1\((-\infty, \infty)\)
gpu_use_dplogicalFALSETRUE, FALSE\((-\infty, \infty)\)
num_gpuinteger1\([1, \infty)\)
start_iterationinteger0\((-\infty, \infty)\)
num_iterationinteger-1\((-\infty, \infty)\)
pred_early_stoplogicalFALSETRUE, FALSE\((-\infty, \infty)\)
pred_early_stop_freqinteger10\((-\infty, \infty)\)
pred_early_stop_marginnumeric10\((-\infty, \infty)\)
output_resultlistLightGBM_predict_result.txt\((-\infty, \infty)\)

Custom mlr3 defaults

  • num_threads:

    • Actual default: 0L

    • Adjusted default: 1L

    • Reason for change: Prevents accidental conflicts with future.

  • verbose:

    • Actual default: 1L

    • Adjusted default: -1L

    • Reason for change: Prevents accidental conflicts with mlr messaging system.

See also

Author

kapsner

Super classes

mlr3::Learner -> mlr3::LearnerClassif -> LearnerClassifLightGBM

Methods

Inherited methods


Method new()

Creates a new instance of this R6 class.


Method importance()

The importance scores are extracted from lbg.importance.

Usage

LearnerClassifLightGBM$importance()

Returns

Named numeric().


Method clone()

The objects of this class are cloneable with this method.

Usage

LearnerClassifLightGBM$clone(deep = FALSE)

Arguments

deep

Whether to make a deep clone.

Examples

if (requireNamespace("lightgbm", quietly = TRUE)) {
  learner = mlr3::lrn("classif.lightgbm")
  print(learner)

  # available parameters:
  learner$param_set$ids()
}
#> <LearnerClassifLightGBM:classif.lightgbm>
#> * Model: -
#> * Parameters: num_threads=1, verbose=-1
#> * Packages: mlr3, mlr3extralearners, lightgbm
#> * Predict Type: prob
#> * Feature types: numeric, integer
#> * Properties: importance, missings, multiclass, twoclass, weights
#>   [1] "nrounds"                       "objective"                    
#>   [3] "metric"                        "custom_eval"                  
#>   [5] "verbose"                       "record"                       
#>   [7] "eval_freq"                     "init_model"                   
#>   [9] "early_stopping_rounds"         "callbacks"                    
#>  [11] "reset_data"                    "categorical_feature"          
#>  [13] "boosting"                      "linear_tree"                  
#>  [15] "num_iterations"                "learning_rate"                
#>  [17] "num_leaves"                    "tree_learner"                 
#>  [19] "num_threads"                   "device_type"                  
#>  [21] "seed"                          "deterministic"                
#>  [23] "force_col_wise"                "force_row_wise"               
#>  [25] "histogram_pool_size"           "max_depth"                    
#>  [27] "min_data_in_leaf"              "min_sum_hessian_in_leaf"      
#>  [29] "bagging_fraction"              "pos_bagging_fraction"         
#>  [31] "neg_bagging_fraction"          "bagging_freq"                 
#>  [33] "bagging_seed"                  "feature_fraction"             
#>  [35] "feature_fraction_bynode"       "feature_fraction_seed"        
#>  [37] "extra_trees"                   "extra_seed"                   
#>  [39] "first_metric_only"             "max_delta_step"               
#>  [41] "lambda_l1"                     "lambda_l2"                    
#>  [43] "linear_lambda"                 "min_gain_to_split"            
#>  [45] "drop_rate"                     "max_drop"                     
#>  [47] "skip_drop"                     "xgboost_dart_mode"            
#>  [49] "uniform_drop"                  "drop_seed"                    
#>  [51] "top_rate"                      "other_rate"                   
#>  [53] "min_data_per_group"            "max_cat_threshold"            
#>  [55] "cat_l2"                        "cat_smooth"                   
#>  [57] "max_cat_to_onehot"             "top_k"                        
#>  [59] "monotone_constraints"          "monotone_constraints_method"  
#>  [61] "monotone_penalty"              "feature_contri"               
#>  [63] "forcedsplits_filename"         "refit_decay_rate"             
#>  [65] "cegb_tradeoff"                 "cegb_penalty_split"           
#>  [67] "cegb_penalty_feature_lazy"     "cegb_penalty_feature_coupled" 
#>  [69] "path_smooth"                   "interaction_constraints"      
#>  [71] "input_model"                   "output_model"                 
#>  [73] "saved_feature_importance_type" "snapshot_freq"                
#>  [75] "max_bin"                       "max_bin_by_feature"           
#>  [77] "min_data_in_bin"               "bin_construct_sample_cnt"     
#>  [79] "data_random_seed"              "is_enable_sparse"             
#>  [81] "enable_bundle"                 "use_missing"                  
#>  [83] "zero_as_missing"               "feature_pre_filter"           
#>  [85] "pre_partition"                 "two_round"                    
#>  [87] "header"                        "group_column"                 
#>  [89] "forcedbins_filename"           "save_binary"                  
#>  [91] "objective_seed"                "is_unbalance"                 
#>  [93] "scale_pos_weight"              "sigmoid"                      
#>  [95] "boost_from_average"            "lambdarank_truncation_level"  
#>  [97] "lambdarank_norm"               "label_gain"                   
#>  [99] "metric_freq"                   "is_provide_training_metric"   
#> [101] "eval_at"                       "multi_error_top_k"            
#> [103] "auc_mu_weights"                "num_machines"                 
#> [105] "local_listen_port"             "time_out"                     
#> [107] "machine_list_filename"         "machines"                     
#> [109] "gpu_platform_id"               "gpu_device_id"                
#> [111] "gpu_use_dp"                    "num_gpu"                      
#> [113] "start_iteration"               "num_iteration"                
#> [115] "pred_early_stop"               "pred_early_stop_freq"         
#> [117] "pred_early_stop_margin"        "output_result"