RandomForest/manifest at develop · genepattern/RandomForest · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
#RandomForest
#Fri Feb 16 20:49:45 GMT 2024
JVMLevel=
LSID=urn\:lsid\:8080.gpserver.ip-172-31-26-71.ip-172-31-26-71.ec2.internal\:genepatternmodules\:730\:2
author=Omar Halawa;GenePattern Team @ Mesirov Lab - UCSD
categories=prediction
commandLine=python /RandomForest/rnd_forest.py <train.data.file> <train.class.file> <test.data.file> <test.class.file> <model.output> <model.output.filename> <model.input.file>  <prediction.results.filename>  <feature.importance.filename> --n_jobs <job.cpuCount> <bootstrap> <ccp_alpha> <class_weight> <criterion> <max_depth> <max_features> <max_leaf_nodes> <max_samples> <min_impurity_decrease> <min_samples_leaf> <min_samples_split> <min_weight_fraction_leaf> <n_estimators>  <random_state> <verbose> <debug>
cpuType=any
description=Performs random forest classification (cross-validation or test-train prediction), outputting a prediction results file (and feature importance for test-train mode). See the RandomForest.GPU module for potentially faster jobs.
documentationUrl=https\://github.com/genepattern/RandomForest/blob/main/docs/tutorial.md
fileFormat=.pred.odf
job.cpuCount=
job.docker.image=genepattern/randomforest\:v0.5
job.memory=
job.walltime=
language=any
name=RandomForest
os=any
p10_MODE=
p10_TYPE=Floating Point
p10_default_value=0.0
p10_description=Optional float for complexity parameter of minimum cost-complexity pruning.\n\nPossible input range is any float greater than or equal to 0.0.\nDefault value is 0.0.
p10_fileFormat=
p10_flag=--ccp_alpha
p10_name=ccp_alpha
p10_numValues=0..1
p10_optional=on
p10_prefix=--ccp_alpha
p10_prefix_when_specified=--ccp_alpha
p10_range=0+
p10_type=java.lang.Float
p10_value=
p11_MODE=
p11_TYPE=TEXT
p11_default_value=None
p11_description=Optional string for class weight specification.\n\nDefault value is None.
p11_fileFormat=
p11_flag=--class_weight
p11_name=class_weight
p11_numValues=0..1
p11_optional=on
p11_prefix=--class_weight
p11_prefix_when_specified=--class_weight
p11_type=java.lang.String
p11_value=None\=None;balanced\=balanced;balanced_subsample\=balanced_subsample
p12_MODE=
p12_TYPE=TEXT
p12_default_value=gini
p12_description=Optional string for node-splitting criterion.\n\nDefault value is "gini."
p12_fileFormat=
p12_flag=--criterion
p12_name=criterion
p12_numValues=0..1
p12_optional=on
p12_prefix=--criterion
p12_prefix_when_specified=--criterion
p12_type=java.lang.String
p12_value=gini\=gini;entropy\=entropy;log_loss\=log_loss
p13_MODE=
p13_TYPE=Integer
p13_default_value=None
p13_description=Optional int for maximum tree depth.\n\nPossible input range is ints greater than or equal to 1. Default value is None.
p13_fileFormat=
p13_flag=--max_depth
p13_name=max_depth
p13_numValues=0..1
p13_optional=on
p13_prefix=--max_depth
p13_prefix_when_specified=--max_depth
p13_range=1+
p13_type=java.lang.Integer
p13_value=
p14_MODE=
p14_TYPE=TEXT
p14_default_value=sqrt
p14_description=Optional string for number of features to consider when looking for best split. "None" will use all features.\n\nDefault value is "sqrt."\nNote\: "auto" is removed in new version of Scikit (v1.3 and later).
p14_fileFormat=
p14_flag=--max_features
p14_name=max_features
p14_numValues=0..1
p14_optional=on
p14_prefix=--max_features
p14_prefix_when_specified=--max_features
p14_type=java.lang.String
p14_value=sqrt\=sqrt;log2\=log2;auto\=auto;None\=None
p15_MODE=
p15_TYPE=Integer
p15_default_value=None
p15_description=Optional int for maximum leaf nodes per tree.\n\nPossible input range is ints greater than or equal to 2. Default value is None.
p15_fileFormat=
p15_flag=--max_leaf_nodes
p15_name=max_leaf_nodes
p15_numValues=0..1
p15_optional=on
p15_prefix=--max_leaf_nodes
p15_prefix_when_specified=--max_leaf_nodes
p15_range=2+
p15_type=java.lang.Integer
p15_value=
p16_MODE=
p16_TYPE=Floating Point
p16_default_value=None
p16_description=Optional float for ratio of datasets to use per tree.\n\nPossible input range is floats between 0.0 and 1.0 (inclusive of both).\nDefault value is None.\nNote\: If bootstrap is False, can only be None
p16_fileFormat=
p16_flag=--max_samples
p16_name=max_samples
p16_numValues=0..1
p16_optional=on
p16_prefix=--max_samples
p16_prefix_when_specified=--max_samples
p16_range=0..1
p16_type=java.lang.Float
p16_value=
p17_MODE=
p17_TYPE=Floating Point
p17_default_value=0.0
p17_description=Optional float for minimum impurity decrease needed per node split.\n\nPossible input range is floats greater than or equal to 0.0.\nDefault value is 0.0.
p17_fileFormat=
p17_flag=--min_impurity_decrease
p17_name=min_impurity_decrease
p17_numValues=0..1
p17_optional=on
p17_prefix=--min_impurity_decrease
p17_prefix_when_specified=--min_impurity_decrease
p17_range=0+
p17_type=java.lang.Float
p17_value=
p18_MODE=
p18_TYPE=Integer
p18_default_value=1
p18_description=Optional int for minimum number of samples required at leaf node.\n\nPossible input range is ints greater than or equal to 1.\nDefault value is 1.
p18_fileFormat=
p18_flag=--min_samples_leaf
p18_name=min_samples_leaf
p18_numValues=0..1
p18_optional=on
p18_prefix=--min_samples_leaf
p18_prefix_when_specified=--min_samples_leaf
p18_range=1+
p18_type=java.lang.Integer
p18_value=
p19_MODE=
p19_TYPE=Integer
p19_default_value=2
p19_description=Optional int for minimum sample number to split node.\n\nPossible input range is ints greater than or equal to 2.\nDefault value is 2.
p19_fileFormat=
p19_flag=--min_samples_split
p19_name=min_samples_split
p19_numValues=0..1
p19_optional=on
p19_prefix=--min_samples_split
p19_prefix_when_specified=--min_samples_split
p19_range=2+
p19_type=java.lang.Integer
p19_value=
p1_MODE=IN
p1_TYPE=FILE
p1_default_value=
p1_description=Training data (feature) file. Takes .gct file (follows GenePattern GCT format).
p1_fileFormat=.gct
p1_flag=--feature
p1_name=train.data.file
p1_numValues=0..1
p1_optional=on
p1_prefix=--feature
p1_prefix_when_specified=--feature
p1_type=java.io.File
p1_value=
p20_MODE=
p20_TYPE=Floating Point
p20_default_value=0.0
p20_description=Optional float for min weighted fraction of weight sum total to be leaf.\n\nPossible input range is floats between 0.0 and 0.5 (inclusive of both).\nDefault value is 0.0.
p20_fileFormat=
p20_flag=--min_weight_fraction_leaf
p20_name=min_weight_fraction_leaf
p20_numValues=0..1
p20_optional=on
p20_prefix=--min_weight_fraction_leaf
p20_prefix_when_specified=--min_weight_fraction_leaf
p20_range=0..0.5
p20_type=java.lang.Float
p20_value=
p21_MODE=
p21_TYPE=Integer
p21_default_value=100
p21_description=Optional int for number of trees in forest.\n\nPossible input range is ints greater than or equal to 1.\nDefault value is 100.
p21_fileFormat=
p21_flag=--n_estimators
p21_name=n_estimators
p21_numValues=0..1
p21_optional=on
p21_prefix=--n_estimators
p21_prefix_when_specified=--n_estimators
p21_range=1+
p21_type=java.lang.Integer
p21_value=
p22_MODE=
p22_TYPE=Integer
p22_default_value=
p22_description=Optional int for seed of random number generator.\n\nPossible input range is non-negative ints (caps at 4294967295, 2^32 - 1). \nDefault value is None.\nNote\: Setting this to a specific integer for a specific dataset will always yield the same prediction results file; see random_state in the <a href\=https\://scikit-learn.org/stable/glossary.html\#term-random_state>Scikit documentation</a> for more details.
p22_fileFormat=
p22_flag=--random_state
p22_name=random_state
p22_numValues=0..1
p22_optional=on
p22_prefix=--random_state
p22_prefix_when_specified=--random_state
p22_range=0+
p22_type=java.lang.Integer
p22_value=
p23_MODE=
p23_TYPE=TEXT
p23_default_value=False
p23_description=Optional boolean for program debugging.\n\nDefault value is False.
p23_fileFormat=
p23_flag=--debug
p23_name=debug
p23_numValues=0..1
p23_optional=on
p23_prefix=--debug
p23_prefix_when_specified=--debug
p23_type=java.lang.String
p23_value=False\=False;True\=True
p24_MODE=
p24_TYPE=Integer
p24_default_value=0
p24_description=Optional int (0 \= no verbose, 1 \= base verbosity) to increase classifier verbosity. For other values, see <a href\=https\://scikit-learn.org/stable/glossary.html\#term-verbose>Scikit documentation</a>.\n\nPossible input range is non-negative ints.\nDefault value is 0.\n
p24_fileFormat=
p24_flag=--verbose
p24_name=verbose
p24_numValues=0..1
p24_optional=on
p24_prefix=--verbose
p24_prefix_when_specified=--verbose
p24_range=0+
p24_type=java.lang.Integer
p24_value=
p25_MODE=IN
p25_TYPE=FILE
p25_default_value=
p25_description=Optional model file input (.pkl, similar to model.output file). This can serve as a substitute for "Training Data," and if both are provided, the model input file is used.
p25_fileFormat=.pkl
p25_flag=--model_input
p25_name=model.input.file
p25_numValues=0..1
p25_optional=on
p25_prefix=--model_input
p25_prefix_when_specified=--model_input
p25_type=java.io.File
p25_value=
p2_MODE=IN
p2_TYPE=FILE
p2_default_value=
p2_description=Training class (target) file. Takes .cls file (follows GenePattern CLS format).
p2_fileFormat=.cls
p2_flag=--target
p2_name=train.class.file
p2_numValues=0..1
p2_optional=on
p2_prefix=--target
p2_prefix_when_specified=--target
p2_type=java.io.File
p2_value=
p3_MODE=
p3_TYPE=TEXT
p3_default_value=False
p3_description=Optional parameter to export model trained on the dataset input in "Training Data" as a compressed pickle file (.pkl). Note\: This model will always be fitted using all samples of train.data.file regardless of if LOOCV is carried out for prediction. Only works if training dataset (not model input file) is provided. Default value is False.
p3_fileFormat=
p3_flag=--model_output
p3_name=model.output
p3_numValues=0..1
p3_optional=on
p3_prefix=--model_output
p3_prefix_when_specified=--model_output
p3_type=java.lang.String
p3_value=False\=False;True\=True
p4_MODE=
p4_TYPE=TEXT
p4_default_value=<train.data.file_basename>.pkl
p4_description=Optional parameter to name the model output file if model.output is True (and training dataset is provided). Default uses training data basename.
p4_fileFormat=
p4_flag=--model_output_filename
p4_name=model.output.filename
p4_numValues=0..1
p4_optional=on
p4_prefix=--model_output_filename
p4_prefix_when_specified=--model_output_filename
p4_type=java.lang.String
p4_value=
p5_MODE=IN
p5_TYPE=FILE
p5_default_value=
p5_description=Testing data (feature) file. Takes .gct file (follows GenePattern GCT format).
p5_fileFormat=.gct
p5_flag=--test_feat
p5_name=test.data.file
p5_numValues=0..1
p5_optional=on
p5_prefix=--test_feat
p5_prefix_when_specified=--test_feat
p5_type=java.io.File
p5_value=
p6_MODE=IN
p6_TYPE=FILE
p6_default_value=
p6_description=Testing class (target) file. Takes .cls file (follows GenePattern CLS format).
p6_fileFormat=.cls
p6_flag=--test_tar
p6_name=test.class.file
p6_numValues=0..1
p6_optional=on
p6_prefix=--test_tar
p6_prefix_when_specified=--test_tar
p6_type=java.io.File
p6_value=
p7_MODE=
p7_TYPE=TEXT
p7_default_value=results.pred.odf
p7_description=Classifier prediction results filename (.pred.odf, follows GenePattern ODF format). Default is results.pred.odf.
p7_fileFormat=
p7_flag=--pred_odf
p7_name=prediction.results.filename
p7_numValues=0..1
p7_optional=on
p7_prefix=--pred_odf
p7_prefix_when_specified=--pred_odf
p7_type=java.lang.String
p7_value=
p8_MODE=
p8_TYPE=TEXT
p8_default_value=model.feat.odf
p8_description=Classifier feature importance filename; only outputted for test-train prediction (.feat.odf, follows GenePattern ODF format). Default is model.feat.odf. See <a href\=https\://github.com/genepattern/RandomForest/blob/develop/docs/tutorial.md>documentation</a> for more details.
p8_fileFormat=
p8_flag=--feat_odf
p8_name=feature.importance.filename
p8_numValues=0..1
p8_optional=on
p8_prefix=--feat_odf
p8_prefix_when_specified=--feat_odf
p8_type=java.lang.String
p8_value=
p9_MODE=
p9_TYPE=TEXT
p9_default_value=True
p9_description=Optional boolean to turn on classifier bootstrapping.\n\nDefault value is True.
p9_fileFormat=
p9_flag=--bootstrap
p9_name=bootstrap
p9_numValues=0..1
p9_optional=on
p9_prefix=--bootstrap
p9_prefix_when_specified=--bootstrap
p9_type=java.lang.String
p9_value=True\=True;False\=False
privacy=private
publicationDate=10/26/2023 15\:09
quality=preproduction
requiredPatchLSIDs=
requiredPatchURLs=
src.repo=https\://github.com/genepattern/RandomForest/
taskDoc=
taskType=prediction
userid=omarhalawabeta
version=Module filename bug fixes