sklearn-metadata.json 158 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221
  1. [
  2. {
  3. "name": "lightgbm.basic.Booster",
  4. "attributes": [
  5. {
  6. "default": -1,
  7. "name": "best_iteration"
  8. },
  9. {
  10. "default": false,
  11. "name": "network"
  12. },
  13. {
  14. "default": null,
  15. "name": "train_set"
  16. },
  17. {
  18. "default": false,
  19. "name": "stride"
  20. },
  21. {
  22. "default": null,
  23. "name": "model_file"
  24. },
  25. {
  26. "default": null,
  27. "name": "params"
  28. },
  29. {
  30. "default": null,
  31. "name": "pandas_categorical"
  32. }
  33. ]
  34. },
  35. {
  36. "name": "lightgbm.sklearn.LGBMClassifier",
  37. "attributes": [
  38. {
  39. "default": "gbdt",
  40. "name": "boosting_type",
  41. "type": "string"
  42. },
  43. {
  44. "default": null,
  45. "name": "class_weight"
  46. },
  47. {
  48. "default": 1,
  49. "name": "colsample_bytree"
  50. },
  51. {
  52. "default": 0.05,
  53. "name": "learning_rate"
  54. },
  55. {
  56. "default": -1,
  57. "name": "max_depth"
  58. },
  59. {
  60. "default": 20,
  61. "name": "min_child_samples"
  62. },
  63. {
  64. "default": 0.001,
  65. "name": "min_child_weight"
  66. },
  67. {
  68. "default": 0,
  69. "name": "min_split_gain"
  70. },
  71. {
  72. "default": 100,
  73. "name": "n_estimators"
  74. },
  75. {
  76. "default": -1,
  77. "name": "n_jobs"
  78. },
  79. {
  80. "default": 31,
  81. "name": "num_leaves"
  82. },
  83. {
  84. "default": null,
  85. "name": "random_state"
  86. },
  87. {
  88. "default": 0,
  89. "name": "reg_alpha"
  90. },
  91. {
  92. "default": 0,
  93. "name": "reg_lambda"
  94. },
  95. {
  96. "default": true,
  97. "name": "silent",
  98. "type": "boolean"
  99. },
  100. {
  101. "default": 200000,
  102. "name": "subsample_for_bin"
  103. },
  104. {
  105. "default": 0,
  106. "name": "subsample_freq"
  107. },
  108. {
  109. "default": 1,
  110. "name": "subsample"
  111. }
  112. ]
  113. },
  114. {
  115. "name": "lightgbm.sklearn.LGBMRegressor",
  116. "attributes": [
  117. {
  118. "default": "gbdt",
  119. "name": "boosting_type",
  120. "type": "string"
  121. },
  122. {
  123. "default": null,
  124. "name": "class_weight"
  125. },
  126. {
  127. "default": 1,
  128. "name": "colsample_bytree"
  129. },
  130. {
  131. "default": 0.05,
  132. "name": "learning_rate"
  133. },
  134. {
  135. "default": -1,
  136. "name": "max_depth"
  137. },
  138. {
  139. "default": 20,
  140. "name": "min_child_samples"
  141. },
  142. {
  143. "default": 0.001,
  144. "name": "min_child_weight"
  145. },
  146. {
  147. "default": 0,
  148. "name": "min_split_gain"
  149. },
  150. {
  151. "default": 100,
  152. "name": "n_estimators"
  153. },
  154. {
  155. "default": -1,
  156. "name": "n_jobs"
  157. },
  158. {
  159. "default": 31,
  160. "name": "num_leaves"
  161. },
  162. {
  163. "default": null,
  164. "name": "random_state"
  165. },
  166. {
  167. "default": 0,
  168. "name": "reg_alpha"
  169. },
  170. {
  171. "default": 0,
  172. "name": "reg_lambda"
  173. },
  174. {
  175. "default": true,
  176. "name": "silent",
  177. "type": "boolean"
  178. },
  179. {
  180. "default": 200000,
  181. "name": "subsample_for_bin"
  182. },
  183. {
  184. "default": 0,
  185. "name": "subsample_freq"
  186. },
  187. {
  188. "default": 1,
  189. "name": "subsample"
  190. }
  191. ]
  192. },
  193. {
  194. "name": "sklearn.calibration.CalibratedClassifierCV",
  195. "description": "Probability calibration with isotonic regression or logistic regression.\n\nThis class uses cross-validation to both estimate the parameters of a\nclassifier and subsequently calibrate a classifier. With default\n`ensemble=True`, for each cv split it\nfits a copy of the base estimator to the training subset, and calibrates it\nusing the testing subset. For prediction, predicted probabilities are\naveraged across these individual calibrated classifiers. When\n`ensemble=False`, cross-validation is used to obtain unbiased predictions,\nvia :func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. For prediction, the base estimator, trained using all\nthe data, is used. This is the prediction method implemented when\n`probabilities=True` for :class:`~sklearn.svm.SVC` and :class:`~sklearn.svm.NuSVC`\nestimators (see :ref:`User Guide <scores_probabilities>` for details).\n\nAlready fitted classifiers can be calibrated via the parameter\n`cv=\"prefit\"`. In this case, no cross-validation is used and all provided\ndata is used for calibration. The user has to take care manually that data\nfor model fitting and calibration are disjoint.\n\nThe calibration is based on the :term:`decision_function` method of the\n`estimator` if it exists, else on :term:`predict_proba`.\n\nRead more in the :ref:`User Guide <calibration>`.\n",
  196. "attributes": [
  197. {
  198. "default": null,
  199. "description": "This parameter is deprecated. Use `estimator` instead.\n\n.. deprecated:: 1.2\nThe parameter `base_estimator` is deprecated in 1.2 and will be\nremoved in 1.4. Use `estimator` instead.\n",
  200. "name": "base_estimator"
  201. },
  202. {
  203. "default": "sigmoid",
  204. "description": "The method to use for calibration. Can be 'sigmoid' which\ncorresponds to Platt's method (i.e. a logistic regression model) or\n'isotonic' which is a non-parametric approach. It is not advised to\nuse isotonic calibration with too few calibration samples\n``(<<1000)`` since it tends to overfit.\n",
  205. "name": "method"
  206. },
  207. {
  208. "default": null,
  209. "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross-validation,\n- integer, to specify the number of folds.\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if ``y`` is binary or multiclass,\n:class:`~sklearn.model_selection.StratifiedKFold` is used. If ``y`` is\nneither binary nor multiclass, :class:`~sklearn.model_selection.KFold`\nis used.\n\nRefer to the :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\nIf \"prefit\" is passed, it is assumed that `estimator` has been\nfitted already and all data is used for calibration.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold.\n",
  210. "name": "cv",
  211. "optional": true,
  212. "type": "int32"
  213. },
  214. {
  215. "default": null,
  216. "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors.\n\nBase estimator clones are fitted in parallel across cross-validation\niterations. Therefore parallelism happens only when `cv != \"prefit\"`.\n\nSee :term:`Glossary <n_jobs>` for more details.\n\n.. versionadded:: 0.24\n",
  217. "name": "n_jobs",
  218. "type": "int32"
  219. },
  220. {
  221. "default": true,
  222. "description": "Determines how the calibrator is fitted when `cv` is not `'prefit'`.\nIgnored if `cv='prefit'`.\n\nIf `True`, the `estimator` is fitted using training data, and\ncalibrated using testing data, for each `cv` fold. The final estimator\nis an ensemble of `n_cv` fitted classifier and calibrator pairs, where\n`n_cv` is the number of cross-validation folds. The output is the\naverage predicted probabilities of all pairs.\n\nIf `False`, `cv` is used to compute unbiased predictions, via\n:func:`~sklearn.model_selection.cross_val_predict`, which are then\nused for calibration. At prediction time, the classifier used is the\n`estimator` trained on all the data.\nNote that this method is also internally implemented in\n:mod:`sklearn.svm` estimators with the `probabilities=True` parameter.\n\n.. versionadded:: 0.24\n",
  223. "name": "ensemble",
  224. "type": "boolean"
  225. },
  226. {
  227. "name": "estimator",
  228. "description": "The classifier whose output need to be calibrated to provide more\naccurate `predict_proba` outputs. The default classifier is\na :class:`~sklearn.svm.LinearSVC`.\n\n.. versionadded:: 1.2\n",
  229. "default": null
  230. }
  231. ]
  232. },
  233. {
  234. "name": "sklearn.compose._column_transformer.ColumnTransformer",
  235. "description": "Applies transformers to columns of an array or pandas DataFrame.\n\nThis estimator allows different columns or column subsets of the input\nto be transformed separately and the features generated by each transformer\nwill be concatenated to form a single feature space.\nThis is useful for heterogeneous or columnar data, to combine several\nfeature extraction mechanisms or transformations into a single transformer.\n\nRead more in the :ref:`User Guide <column_transformer>`.\n\n.. versionadded:: 0.20\n",
  236. "attributes": [
  237. {
  238. "description": "List of (name, transformer, columns) tuples specifying the\ntransformer objects to be applied to subsets of the data.\n\nname : str\nLike in Pipeline and FeatureUnion, this allows the transformer and\nits parameters to be set using ``set_params`` and searched in grid\nsearch.\ntransformer : {'drop', 'passthrough'} or estimator\nEstimator must support :term:`fit` and :term:`transform`.\nSpecial-cased strings 'drop' and 'passthrough' are accepted as\nwell, to indicate to drop the columns or to pass them through\nuntransformed, respectively.\ncolumns : str, array-like of str, int, array-like of int, array-like of bool, slice or callable\nIndexes the data on its second axis. Integers are interpreted as\npositional columns, while strings can reference DataFrame columns\nby name. A scalar string or int should be used where\n``transformer`` expects X to be a 1d array-like (vector),\notherwise a 2d array will be passed to the transformer.\nA callable is passed the input data `X` and can return any of the\nabove. To select multiple columns by name or dtype, you can use\n:obj:`make_column_selector`.\n",
  239. "name": "transformers"
  240. },
  241. {
  242. "description": "By default, only the specified columns in `transformers` are\ntransformed and combined in the output, and the non-specified\ncolumns are dropped. (default of ``'drop'``).\nBy specifying ``remainder='passthrough'``, all remaining columns that\nwere not specified in `transformers`, but present in the data passed\nto `fit` will be automatically passed through. This subset of columns\nis concatenated with the output of the transformers. For dataframes,\nextra columns not seen during `fit` will be excluded from the output\nof `transform`.\nBy setting ``remainder`` to be an estimator, the remaining\nnon-specified columns will use the ``remainder`` estimator. The\nestimator must support :term:`fit` and :term:`transform`.\nNote that using this feature requires that the DataFrame columns\ninput at :term:`fit` and :term:`transform` have identical order.\n",
  243. "name": "remainder",
  244. "default": "drop"
  245. },
  246. {
  247. "default": 0.3,
  248. "description": "If the output of the different transformers contains sparse matrices,\nthese will be stacked as a sparse matrix if the overall density is\nlower than this value. Use ``sparse_threshold=0`` to always return\ndense. When the transformed output consists of all dense data, the\nstacked result will be dense, and this keyword will be ignored.\n",
  249. "name": "sparse_threshold",
  250. "type": "float32"
  251. },
  252. {
  253. "default": null,
  254. "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n",
  255. "name": "n_jobs",
  256. "type": "int32"
  257. },
  258. {
  259. "default": null,
  260. "description": "Multiplicative weights for features per transformer. The output of the\ntransformer is multiplied by these weights. Keys are transformer names,\nvalues the weights.\n",
  261. "name": "transformer_weights"
  262. },
  263. {
  264. "default": false,
  265. "description": "If True, the time elapsed while fitting each transformer will be\nprinted as it is completed.\n",
  266. "name": "verbose",
  267. "type": "boolean"
  268. },
  269. {
  270. "name": "prefix_feature_names_out",
  271. "description": "If True, :meth:`get_feature_names_out` will prefix all feature names\nwith the name of the transformer that generated that feature.\nIf False, :meth:`get_feature_names_out` will not prefix any feature\nnames and will error if feature names are not unique.\n\n.. versionadded:: 1.0\n",
  272. "type": "boolean",
  273. "default": true
  274. },
  275. {
  276. "name": "verbose_feature_names_out",
  277. "description": "If True, :meth:`ColumnTransformer.get_feature_names_out` will prefix\nall feature names with the name of the transformer that generated that\nfeature.\nIf False, :meth:`ColumnTransformer.get_feature_names_out` will not\nprefix any feature names and will error if feature names are not\nunique.\n\n.. versionadded:: 1.0\n",
  278. "type": "boolean",
  279. "default": true
  280. },
  281. {
  282. "name": "force_int_remainder_cols",
  283. "default": true,
  284. "description": "Force the columns of the last entry of `transformers_`, which\ncorresponds to the \"remainder\" transformer, to always be stored as\nindices (int) rather than column names (str). See description of the\n`transformers_` attribute for details.\n\n.. note::\nIf you do not access the list of columns for the remainder columns\nin the `transformers_` fitted attribute, you do not need to set\nthis parameter.\n\n.. versionadded:: 1.5\n\n.. versionchanged:: 1.7\nThe default value for `force_int_remainder_cols` will change from\n`True` to `False` in version 1.7.\n"
  285. }
  286. ]
  287. },
  288. {
  289. "name": "sklearn.decomposition._pca.PCA",
  290. "description": "Principal component analysis (PCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of the\ndata to project it to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nIt uses the LAPACK implementation of the full SVD or a randomized truncated\nSVD by the method of Halko et al. 2009, depending on the shape of the input\ndata and the number of components to extract.\n\nWith sparse inputs, the ARPACK implementation of the truncated SVD can be\nused (i.e. through :func:`scipy.sparse.linalg.svds`). Alternatively, one\nmay consider :class:`TruncatedSVD` where the data are not centered.\n\nNotice that this class only supports sparse inputs for some solvers such as\n\"arpack\" and \"covariance_eigh\". See :class:`TruncatedSVD` for an\nalternative with sparse data.\n\nFor a usage example, see\n:ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py`\n\nRead more in the :ref:`User Guide <PCA>`.\n",
  291. "attributes": [
  292. {
  293. "default": null,
  294. "description": "Number of components to keep.\nif n_components is not set all components are kept::\n\nn_components == min(n_samples, n_features)\n\nIf ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\nMLE is used to guess the dimension. Use of ``n_components == 'mle'``\nwill interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\nIf ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\nnumber of components such that the amount of variance that needs to be\nexplained is greater than the percentage specified by n_components.\n\nIf ``svd_solver == 'arpack'``, the number of components must be\nstrictly less than the minimum of n_features and n_samples.\n\nHence, the None case results in::\n\nn_components == min(n_samples, n_features) - 1\n",
  295. "name": "n_components",
  296. "type": "int32"
  297. },
  298. {
  299. "default": true,
  300. "description": "If False, data passed to fit are overwritten and running\nfit(X).transform(X) will not yield the expected results,\nuse fit_transform(X) instead.\n",
  301. "name": "copy",
  302. "type": "boolean"
  303. },
  304. {
  305. "default": false,
  306. "description": "When True (False by default) the `components_` vectors are multiplied\nby the square root of n_samples and then divided by the singular values\nto ensure uncorrelated outputs with unit component-wise variances.\n\nWhitening will remove some information from the transformed signal\n(the relative variance scales of the components) but can sometime\nimprove the predictive accuracy of the downstream estimators by\nmaking their data respect some hard-wired assumptions.\n",
  307. "name": "whiten",
  308. "optional": true,
  309. "type": "boolean"
  310. },
  311. {
  312. "default": "auto",
  313. "description": "\"auto\" :\nThe solver is selected by a default 'auto' policy is based on `X.shape` and\n`n_components`: if the input data has fewer than 1000 features and\nmore than 10 times as many samples, then the \"covariance_eigh\"\nsolver is used. Otherwise, if the input data is larger than 500x500\nand the number of components to extract is lower than 80% of the\nsmallest dimension of the data, then the more efficient\n\"randomized\" method is selected. Otherwise the exact \"full\" SVD is\ncomputed and optionally truncated afterwards.\n\"full\" :\nRun exact full SVD calling the standard LAPACK solver via\n`scipy.linalg.svd` and select the components by postprocessing\n\"covariance_eigh\" :\nPrecompute the covariance matrix (on centered data), run a\nclassical eigenvalue decomposition on the covariance matrix\ntypically using LAPACK and select the components by postprocessing.\nThis solver is very efficient for n_samples >> n_features and small\nn_features. It is, however, not tractable otherwise for large\nn_features (large memory footprint required to materialize the\ncovariance matrix). Also note that compared to the \"full\" solver,\nthis solver effectively doubles the condition number and is\ntherefore less numerical stable (e.g. on input data with a large\nrange of singular values).\n\"arpack\" :\nRun SVD truncated to `n_components` calling ARPACK solver via\n`scipy.sparse.linalg.svds`. It requires strictly\n`0 < n_components < min(X.shape)`\n\"randomized\" :\nRun randomized SVD by the method of Halko et al.\n\n.. versionadded:: 0.18.0\n\n.. versionchanged:: 1.5\nAdded the 'covariance_eigh' solver.\n",
  314. "name": "svd_solver"
  315. },
  316. {
  317. "default": 0.0,
  318. "description": "Tolerance for singular values computed by svd_solver == 'arpack'.\nMust be of range [0.0, infinity).\n\n.. versionadded:: 0.18.0\n",
  319. "name": "tol",
  320. "optional": true,
  321. "type": "float32"
  322. },
  323. {
  324. "default": "auto",
  325. "description": "Number of iterations for the power method computed by\nsvd_solver == 'randomized'.\nMust be of range [0, infinity).\n\n.. versionadded:: 0.18.0\n",
  326. "name": "iterated_power"
  327. },
  328. {
  329. "default": null,
  330. "description": "Used when the 'arpack' or 'randomized' solvers are used. Pass an int\nfor reproducible results across multiple function calls.\nSee :term:`Glossary <random_state>`.\n\n.. versionadded:: 0.18.0\n",
  331. "name": "random_state",
  332. "type": "int32"
  333. },
  334. {
  335. "name": "n_oversamples",
  336. "description": "This parameter is only relevant when `svd_solver=\"randomized\"`.\nIt corresponds to the additional number of random vectors to sample the\nrange of `X` so as to ensure proper conditioning. See\n:func:`~sklearn.utils.extmath.randomized_svd` for more details.\n\n.. versionadded:: 1.1\n",
  337. "type": "int32",
  338. "default": 10
  339. },
  340. {
  341. "name": "power_iteration_normalizer",
  342. "description": "Power iteration normalizer for randomized SVD solver.\nNot used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`\nfor more details.\n\n.. versionadded:: 1.1\n",
  343. "default": "auto"
  344. }
  345. ]
  346. },
  347. {
  348. "name": "sklearn.decomposition._truncated_svd.TruncatedSVD",
  349. "description": "Dimensionality reduction using truncated SVD (aka LSA).\n\nThis transformer performs linear dimensionality reduction by means of\ntruncated singular value decomposition (SVD). Contrary to PCA, this\nestimator does not center the data before computing the singular value\ndecomposition. This means it can work with sparse matrices\nefficiently.\n\nIn particular, truncated SVD works on term count/tf-idf matrices as\nreturned by the vectorizers in :mod:`sklearn.feature_extraction.text`. In\nthat context, it is known as latent semantic analysis (LSA).\n\nThis estimator supports two algorithms: a fast randomized SVD solver, and\na \"naive\" algorithm that uses ARPACK as an eigensolver on `X * X.T` or\n`X.T * X`, whichever is more efficient.\n\nRead more in the :ref:`User Guide <LSA>`.\n",
  350. "attributes": [
  351. {
  352. "default": 2,
  353. "description": "Desired dimensionality of output data.\nIf algorithm='arpack', must be strictly less than the number of features.\nIf algorithm='randomized', must be less than or equal to the number of features.\nThe default value is useful for visualisation. For LSA, a value of\n100 is recommended.\n",
  354. "name": "n_components",
  355. "type": "int32"
  356. },
  357. {
  358. "default": "randomized",
  359. "description": "SVD solver to use. Either \"arpack\" for the ARPACK wrapper in SciPy\n(scipy.sparse.linalg.svds), or \"randomized\" for the randomized\nalgorithm due to Halko (2009).\n",
  360. "name": "algorithm",
  361. "type": "string"
  362. },
  363. {
  364. "default": 5,
  365. "description": "Number of iterations for randomized SVD solver. Not used by ARPACK. The\ndefault is larger than the default in\n:func:`~sklearn.utils.extmath.randomized_svd` to handle sparse\nmatrices that may have large slowly decaying spectrum.\n",
  366. "name": "n_iter",
  367. "optional": true,
  368. "type": "int32"
  369. },
  370. {
  371. "default": null,
  372. "description": "Used during randomized svd. Pass an int for reproducible results across\nmultiple function calls.\nSee :term:`Glossary <random_state>`.\n",
  373. "name": "random_state",
  374. "type": "int32"
  375. },
  376. {
  377. "default": 0.0,
  378. "description": "Tolerance for ARPACK. 0 means machine precision. Ignored by randomized\nSVD solver.\n",
  379. "name": "tol",
  380. "optional": true,
  381. "type": "float32"
  382. },
  383. {
  384. "name": "n_oversamples",
  385. "description": "Number of oversamples for randomized SVD solver. Not used by ARPACK.\nSee :func:`~sklearn.utils.extmath.randomized_svd` for a complete\ndescription.\n\n.. versionadded:: 1.1\n",
  386. "type": "int32",
  387. "default": 10
  388. },
  389. {
  390. "name": "power_iteration_normalizer",
  391. "description": "Power iteration normalizer for randomized SVD solver.\nNot used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`\nfor more details.\n\n.. versionadded:: 1.1\n",
  392. "default": "auto"
  393. }
  394. ]
  395. },
  396. {
  397. "name": "sklearn.decomposition.PCA",
  398. "description": "Principal component analysis (PCA).\n\nLinear dimensionality reduction using Singular Value Decomposition of the\ndata to project it to a lower dimensional space. The input data is centered\nbut not scaled for each feature before applying the SVD.\n\nIt uses the LAPACK implementation of the full SVD or a randomized truncated\nSVD by the method of Halko et al. 2009, depending on the shape of the input\ndata and the number of components to extract.\n\nWith sparse inputs, the ARPACK implementation of the truncated SVD can be\nused (i.e. through :func:`scipy.sparse.linalg.svds`). Alternatively, one\nmay consider :class:`TruncatedSVD` where the data are not centered.\n\nNotice that this class only supports sparse inputs for some solvers such as\n\"arpack\" and \"covariance_eigh\". See :class:`TruncatedSVD` for an\nalternative with sparse data.\n\nFor a usage example, see\n:ref:`sphx_glr_auto_examples_decomposition_plot_pca_iris.py`\n\nRead more in the :ref:`User Guide <PCA>`.\n",
  399. "attributes": [
  400. {
  401. "default": null,
  402. "description": "Number of components to keep.\nif n_components is not set all components are kept::\n\nn_components == min(n_samples, n_features)\n\nIf ``n_components == 'mle'`` and ``svd_solver == 'full'``, Minka's\nMLE is used to guess the dimension. Use of ``n_components == 'mle'``\nwill interpret ``svd_solver == 'auto'`` as ``svd_solver == 'full'``.\n\nIf ``0 < n_components < 1`` and ``svd_solver == 'full'``, select the\nnumber of components such that the amount of variance that needs to be\nexplained is greater than the percentage specified by n_components.\n\nIf ``svd_solver == 'arpack'``, the number of components must be\nstrictly less than the minimum of n_features and n_samples.\n\nHence, the None case results in::\n\nn_components == min(n_samples, n_features) - 1\n",
  403. "name": "n_components",
  404. "type": "int32"
  405. },
  406. {
  407. "default": true,
  408. "description": "If False, data passed to fit are overwritten and running\nfit(X).transform(X) will not yield the expected results,\nuse fit_transform(X) instead.\n",
  409. "name": "copy",
  410. "type": "boolean"
  411. },
  412. {
  413. "default": false,
  414. "description": "When True (False by default) the `components_` vectors are multiplied\nby the square root of n_samples and then divided by the singular values\nto ensure uncorrelated outputs with unit component-wise variances.\n\nWhitening will remove some information from the transformed signal\n(the relative variance scales of the components) but can sometime\nimprove the predictive accuracy of the downstream estimators by\nmaking their data respect some hard-wired assumptions.\n",
  415. "name": "whiten",
  416. "optional": true,
  417. "type": "boolean"
  418. },
  419. {
  420. "default": "auto",
  421. "description": "\"auto\" :\nThe solver is selected by a default 'auto' policy is based on `X.shape` and\n`n_components`: if the input data has fewer than 1000 features and\nmore than 10 times as many samples, then the \"covariance_eigh\"\nsolver is used. Otherwise, if the input data is larger than 500x500\nand the number of components to extract is lower than 80% of the\nsmallest dimension of the data, then the more efficient\n\"randomized\" method is selected. Otherwise the exact \"full\" SVD is\ncomputed and optionally truncated afterwards.\n\"full\" :\nRun exact full SVD calling the standard LAPACK solver via\n`scipy.linalg.svd` and select the components by postprocessing\n\"covariance_eigh\" :\nPrecompute the covariance matrix (on centered data), run a\nclassical eigenvalue decomposition on the covariance matrix\ntypically using LAPACK and select the components by postprocessing.\nThis solver is very efficient for n_samples >> n_features and small\nn_features. It is, however, not tractable otherwise for large\nn_features (large memory footprint required to materialize the\ncovariance matrix). Also note that compared to the \"full\" solver,\nthis solver effectively doubles the condition number and is\ntherefore less numerical stable (e.g. on input data with a large\nrange of singular values).\n\"arpack\" :\nRun SVD truncated to `n_components` calling ARPACK solver via\n`scipy.sparse.linalg.svds`. It requires strictly\n`0 < n_components < min(X.shape)`\n\"randomized\" :\nRun randomized SVD by the method of Halko et al.\n\n.. versionadded:: 0.18.0\n\n.. versionchanged:: 1.5\nAdded the 'covariance_eigh' solver.\n",
  422. "name": "svd_solver",
  423. "type": "string"
  424. },
  425. {
  426. "default": 0.0,
  427. "description": "Tolerance for singular values computed by svd_solver == 'arpack'.\nMust be of range [0.0, infinity).\n\n.. versionadded:: 0.18.0\n",
  428. "name": "tol",
  429. "optional": true,
  430. "type": "float32"
  431. },
  432. {
  433. "default": "auto",
  434. "description": "Number of iterations for the power method computed by\nsvd_solver == 'randomized'.\nMust be of range [0, infinity).\n\n.. versionadded:: 0.18.0\n",
  435. "name": "iterated_power"
  436. },
  437. {
  438. "default": null,
  439. "description": "Used when the 'arpack' or 'randomized' solvers are used. Pass an int\nfor reproducible results across multiple function calls.\nSee :term:`Glossary <random_state>`.\n\n.. versionadded:: 0.18.0\n",
  440. "name": "random_state",
  441. "optional": true,
  442. "type": "int32"
  443. },
  444. {
  445. "name": "n_oversamples",
  446. "description": "This parameter is only relevant when `svd_solver=\"randomized\"`.\nIt corresponds to the additional number of random vectors to sample the\nrange of `X` so as to ensure proper conditioning. See\n:func:`~sklearn.utils.extmath.randomized_svd` for more details.\n\n.. versionadded:: 1.1\n",
  447. "type": "int32",
  448. "default": 10
  449. },
  450. {
  451. "name": "power_iteration_normalizer",
  452. "description": "Power iteration normalizer for randomized SVD solver.\nNot used by ARPACK. See :func:`~sklearn.utils.extmath.randomized_svd`\nfor more details.\n\n.. versionadded:: 1.1\n",
  453. "default": "auto"
  454. }
  455. ]
  456. },
  457. {
  458. "name": "sklearn.discriminant_analysis.LinearDiscriminantAnalysis",
  459. "description": "Linear Discriminant Analysis.\n\nA classifier with a linear decision boundary, generated by fitting class\nconditional densities to the data and using Bayes' rule.\n\nThe model fits a Gaussian density to each class, assuming that all classes\nshare the same covariance matrix.\n\nThe fitted model can also be used to reduce the dimensionality of the input\nby projecting it to the most discriminative directions, using the\n`transform` method.\n\n.. versionadded:: 0.17\n\nFor a comparison between\n:class:`~sklearn.discriminant_analysis.LinearDiscriminantAnalysis`\nand :class:`~sklearn.discriminant_analysis.QuadraticDiscriminantAnalysis`, see\n:ref:`sphx_glr_auto_examples_classification_plot_lda_qda.py`.\n\nRead more in the :ref:`User Guide <lda_qda>`.\n",
  460. "attributes": [
  461. {
  462. "default": "svd",
  463. "description": "Solver to use, possible values:\n- 'svd': Singular value decomposition (default).\nDoes not compute the covariance matrix, therefore this solver is\nrecommended for data with a large number of features.\n- 'lsqr': Least squares solution.\nCan be combined with shrinkage or custom covariance estimator.\n- 'eigen': Eigenvalue decomposition.\nCan be combined with shrinkage or custom covariance estimator.\n\n.. versionchanged:: 1.2\n`solver=\"svd\"` now has experimental Array API support. See the\n:ref:`Array API User Guide <array_api>` for more details.\n",
  464. "name": "solver"
  465. },
  466. {
  467. "description": "Shrinkage parameter, possible values:\n- None: no shrinkage (default).\n- 'auto': automatic shrinkage using the Ledoit-Wolf lemma.\n- float between 0 and 1: fixed shrinkage parameter.\n\nThis should be left to None if `covariance_estimator` is used.\nNote that shrinkage works only with 'lsqr' and 'eigen' solvers.\n\nFor a usage example, see\n:ref:`sphx_glr_auto_examples_classification_plot_lda.py`.\n",
  468. "name": "shrinkage",
  469. "default": null
  470. },
  471. {
  472. "default": null,
  473. "description": "The class prior probabilities. By default, the class proportions are\ninferred from the training data.\n",
  474. "name": "priors"
  475. },
  476. {
  477. "default": null,
  478. "description": "Number of components (<= min(n_classes - 1, n_features)) for\ndimensionality reduction. If None, will be set to\nmin(n_classes - 1, n_features). This parameter only affects the\n`transform` method.\n\nFor a usage example, see\n:ref:`sphx_glr_auto_examples_decomposition_plot_pca_vs_lda.py`.\n",
  479. "name": "n_components",
  480. "type": "int32"
  481. },
  482. {
  483. "default": false,
  484. "description": "If True, explicitly compute the weighted within-class covariance\nmatrix when solver is 'svd'. The matrix is always computed\nand stored for the other solvers.\n\n.. versionadded:: 0.17\n",
  485. "name": "store_covariance",
  486. "type": "boolean"
  487. },
  488. {
  489. "default": 0.0001,
  490. "description": "Absolute threshold for a singular value of X to be considered\nsignificant, used to estimate the rank of X. Dimensions whose\nsingular values are non-significant are discarded. Only used if\nsolver is 'svd'.\n\n.. versionadded:: 0.17\n",
  491. "name": "tol",
  492. "type": "float32"
  493. },
  494. {
  495. "default": null,
  496. "description": "If not None, `covariance_estimator` is used to estimate\nthe covariance matrices instead of relying on the empirical\ncovariance estimator (with potential shrinkage).\nThe object should have a fit method and a ``covariance_`` attribute\nlike the estimators in :mod:`sklearn.covariance`.\nif None the shrinkage parameter drives the estimate.\n\nThis should be left to None if `shrinkage` is used.\nNote that `covariance_estimator` works only with 'lsqr' and 'eigen'\nsolvers.\n\n.. versionadded:: 0.24\n",
  497. "name": "covariance_estimator"
  498. }
  499. ]
  500. },
  501. {
  502. "name": "sklearn.ensemble.forest.ExtraTreesClassifier",
  503. "description": "\nAn extra-trees classifier.\n\nThis class implements a meta estimator that fits a number of\nrandomized decision trees (a.k.a. extra-trees) on various sub-samples\nof the dataset and uses averaging to improve the predictive accuracy\nand control over-fitting.\n\nRead more in the :ref:`User Guide <forest>`.\n",
  504. "attributes": [
  505. {
  506. "default": 100,
  507. "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\nThe default value of ``n_estimators`` changed from 10 to 100\nin 0.22.\n",
  508. "name": "n_estimators",
  509. "type": "int32"
  510. },
  511. {
  512. "default": "\"gini\"",
  513. "description": "The function to measure the quality of a split. Supported criteria are\n\"gini\" for the Gini impurity and \"entropy\" for the information gain.\n",
  514. "name": "criterion"
  515. },
  516. {
  517. "default": null,
  518. "description": "The maximum depth of the tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples.\n",
  519. "name": "max_depth",
  520. "type": "int32"
  521. },
  522. {
  523. "default": "2",
  524. "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` are the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions.\n",
  525. "name": "min_samples_split"
  526. },
  527. {
  528. "default": "1",
  529. "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` are the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions.\n",
  530. "name": "min_samples_leaf"
  531. },
  532. {
  533. "default": 0,
  534. "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided.\n",
  535. "name": "min_weight_fraction_leaf",
  536. "type": "float32"
  537. },
  538. {
  539. "default": "\"auto\"",
  540. "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n`int(max_features * n_features)` features are considered at each\nsplit.\n- If \"auto\", then `max_features=sqrt(n_features)`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features.\n",
  541. "name": "max_features"
  542. },
  543. {
  544. "default": null,
  545. "description": "Grow trees with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes.\n",
  546. "name": "max_leaf_nodes",
  547. "type": "int32"
  548. },
  549. {
  550. "default": 0,
  551. "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19\n",
  552. "name": "min_impurity_decrease",
  553. "type": "float32"
  554. },
  555. {
  556. "default": null,
  557. "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n``min_impurity_split`` has been deprecated in favor of\n``min_impurity_decrease`` in 0.19. The default value of\n``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\nwill be removed in 0.25. Use ``min_impurity_decrease`` instead.\n",
  558. "name": "min_impurity_split",
  559. "type": "float32"
  560. },
  561. {
  562. "default": false,
  563. "description": "Whether bootstrap samples are used when building trees. If False, the\nwhole dataset is used to build each tree.\n",
  564. "name": "bootstrap",
  565. "type": "boolean"
  566. },
  567. {
  568. "default": false,
  569. "description": "Whether to use out-of-bag samples to estimate\nthe generalization accuracy.\n",
  570. "name": "oob_score",
  571. "type": "boolean"
  572. },
  573. {
  574. "default": null,
  575. "description": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n:meth:`decision_path` and :meth:`apply` are all parallelized over the\ntrees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\ncontext. ``-1`` means using all processors. See :term:`Glossary\n<n_jobs>` for more details.\n",
  576. "name": "n_jobs",
  577. "type": "int32"
  578. },
  579. {
  580. "default": null,
  581. "description": "Controls 3 sources of randomness:\n\n- the bootstrapping of the samples used when building trees\n(if ``bootstrap=True``)\n- the sampling of the features to consider when looking for the best\nsplit at each node (if ``max_features < n_features``)\n- the draw of the splits for each of the `max_features`\n\nSee :term:`Glossary <random_state>` for details.\n",
  582. "name": "random_state",
  583. "type": "int32"
  584. },
  585. {
  586. "default": 0,
  587. "description": "Controls the verbosity when fitting and predicting.\n",
  588. "name": "verbose",
  589. "type": "int32"
  590. },
  591. {
  592. "default": false,
  593. "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary <warm_start>`.\n",
  594. "name": "warm_start",
  595. "type": "boolean"
  596. },
  597. {
  598. "default": null,
  599. "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one. For\nmulti-output problems, a list of dicts can be provided in the same\norder as the columns of y.\n\nNote that for multioutput (including multilabel) weights should be\ndefined for each class of every column in its own dict. For example,\nfor four-class multilabel classification weights should be\n[{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n[{1:1}, {2:5}, {3:1}, {4:1}].\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``\n\nThe \"balanced_subsample\" mode is the same as \"balanced\" except that\nweights are computed based on the bootstrap sample for every tree\ngrown.\n\nFor multi-output, the weights of each column of y will be multiplied.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified.\n",
  600. "name": "class_weight"
  601. },
  602. {
  603. "default": "0.0",
  604. "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22\n",
  605. "name": "ccp_alpha"
  606. },
  607. {
  608. "default": null,
  609. "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n`max_samples` should be in the interval `(0, 1)`.\n\n.. versionadded:: 0.22\n",
  610. "name": "max_samples"
  611. }
  612. ]
  613. },
  614. {
  615. "name": "sklearn.ensemble.forest.RandomForestClassifier",
  616. "description": "\nA random forest classifier.\n\nA random forest is a meta estimator that fits a number of decision tree\nclassifiers on various sub-samples of the dataset and uses averaging to\nimprove the predictive accuracy and control over-fitting.\nThe sub-sample size is controlled with the `max_samples` parameter if\n`bootstrap=True` (default), otherwise the whole dataset is used to build\neach tree.\n\nRead more in the :ref:`User Guide <forest>`.\n",
  617. "attributes": [
  618. {
  619. "default": 100,
  620. "description": "The number of trees in the forest.\n\n.. versionchanged:: 0.22\nThe default value of ``n_estimators`` changed from 10 to 100\nin 0.22.\n",
  621. "name": "n_estimators",
  622. "type": "int32"
  623. },
  624. {
  625. "default": "\"gini\"",
  626. "description": "The function to measure the quality of a split. Supported criteria are\n\"gini\" for the Gini impurity and \"entropy\" for the information gain.\nNote: this parameter is tree-specific.\n",
  627. "name": "criterion"
  628. },
  629. {
  630. "default": null,
  631. "description": "The maximum depth of the tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples.\n",
  632. "name": "max_depth",
  633. "type": "int32"
  634. },
  635. {
  636. "default": "2",
  637. "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` are the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions.\n",
  638. "name": "min_samples_split"
  639. },
  640. {
  641. "default": "1",
  642. "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` are the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions.\n",
  643. "name": "min_samples_leaf"
  644. },
  645. {
  646. "default": 0,
  647. "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided.\n",
  648. "name": "min_weight_fraction_leaf",
  649. "type": "float32"
  650. },
  651. {
  652. "default": "\"auto\"",
  653. "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n`int(max_features * n_features)` features are considered at each\nsplit.\n- If \"auto\", then `max_features=sqrt(n_features)`.\n- If \"sqrt\", then `max_features=sqrt(n_features)` (same as \"auto\").\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features.\n",
  654. "name": "max_features"
  655. },
  656. {
  657. "default": null,
  658. "description": "Grow trees with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes.\n",
  659. "name": "max_leaf_nodes",
  660. "type": "int32"
  661. },
  662. {
  663. "default": 0,
  664. "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19\n",
  665. "name": "min_impurity_decrease",
  666. "type": "float32"
  667. },
  668. {
  669. "default": null,
  670. "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n``min_impurity_split`` has been deprecated in favor of\n``min_impurity_decrease`` in 0.19. The default value of\n``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\nwill be removed in 0.25. Use ``min_impurity_decrease`` instead.\n\n",
  671. "name": "min_impurity_split",
  672. "type": "float32"
  673. },
  674. {
  675. "default": true,
  676. "description": "Whether bootstrap samples are used when building trees. If False, the\nwhole dataset is used to build each tree.\n",
  677. "name": "bootstrap",
  678. "type": "boolean"
  679. },
  680. {
  681. "default": false,
  682. "description": "Whether to use out-of-bag samples to estimate\nthe generalization accuracy.\n",
  683. "name": "oob_score",
  684. "type": "boolean"
  685. },
  686. {
  687. "default": null,
  688. "description": "The number of jobs to run in parallel. :meth:`fit`, :meth:`predict`,\n:meth:`decision_path` and :meth:`apply` are all parallelized over the\ntrees. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\ncontext. ``-1`` means using all processors. See :term:`Glossary\n<n_jobs>` for more details.\n",
  689. "name": "n_jobs",
  690. "type": "int32"
  691. },
  692. {
  693. "default": null,
  694. "description": "Controls both the randomness of the bootstrapping of the samples used\nwhen building trees (if ``bootstrap=True``) and the sampling of the\nfeatures to consider when looking for the best split at each node\n(if ``max_features < n_features``).\nSee :term:`Glossary <random_state>` for details.\n",
  695. "name": "random_state"
  696. },
  697. {
  698. "default": 0,
  699. "description": "Controls the verbosity when fitting and predicting.\n",
  700. "name": "verbose",
  701. "type": "int32"
  702. },
  703. {
  704. "default": false,
  705. "description": "When set to ``True``, reuse the solution of the previous call to fit\nand add more estimators to the ensemble, otherwise, just fit a whole\nnew forest. See :term:`the Glossary <warm_start>`.\n",
  706. "name": "warm_start",
  707. "type": "boolean"
  708. },
  709. {
  710. "default": null,
  711. "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one. For\nmulti-output problems, a list of dicts can be provided in the same\norder as the columns of y.\n\nNote that for multioutput (including multilabel) weights should be\ndefined for each class of every column in its own dict. For example,\nfor four-class multilabel classification weights should be\n[{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n[{1:1}, {2:5}, {3:1}, {4:1}].\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``\n\nThe \"balanced_subsample\" mode is the same as \"balanced\" except that\nweights are computed based on the bootstrap sample for every tree\ngrown.\n\nFor multi-output, the weights of each column of y will be multiplied.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified.\n",
  712. "name": "class_weight"
  713. },
  714. {
  715. "default": "0.0",
  716. "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22\n",
  717. "name": "ccp_alpha"
  718. },
  719. {
  720. "default": null,
  721. "description": "If bootstrap is True, the number of samples to draw from X\nto train each base estimator.\n\n- If None (default), then draw `X.shape[0]` samples.\n- If int, then draw `max_samples` samples.\n- If float, then draw `max_samples * X.shape[0]` samples. Thus,\n`max_samples` should be in the interval `(0, 1)`.\n\n.. versionadded:: 0.22\n",
  722. "name": "max_samples"
  723. }
  724. ]
  725. },
  726. {
  727. "name": "sklearn.ensemble.weight_boosting.AdaBoostClassifier",
  728. "description": "An AdaBoost classifier.\n\nAn AdaBoost [1] classifier is a meta-estimator that begins by fitting a\nclassifier on the original dataset and then fits additional copies of the\nclassifier on the same dataset but where the weights of incorrectly\nclassified instances are adjusted such that subsequent classifiers focus\nmore on difficult cases.\n\nThis class implements the algorithm known as AdaBoost-SAMME [2].\n\nRead more in the :ref:`User Guide <adaboost>`.\n\n.. versionadded:: 0.14\n",
  729. "attributes": [
  730. {
  731. "default": null,
  732. "description": "The base estimator from which the boosted ensemble is built.\nSupport for sample weighting is required, as well as proper\n``classes_`` and ``n_classes_`` attributes. If ``None``, then\nthe base estimator is ``DecisionTreeClassifier(max_depth=1)``.\n",
  733. "name": "base_estimator"
  734. },
  735. {
  736. "default": 50,
  737. "description": "The maximum number of estimators at which boosting is terminated.\nIn case of perfect fit, the learning procedure is stopped early.\n",
  738. "name": "n_estimators",
  739. "type": "int32"
  740. },
  741. {
  742. "default": 1,
  743. "description": "Learning rate shrinks the contribution of each classifier by\n``learning_rate``. There is a trade-off between ``learning_rate`` and\n``n_estimators``.\n",
  744. "name": "learning_rate",
  745. "type": "float32"
  746. },
  747. {
  748. "default": "SAMME.R",
  749. "description": "If 'SAMME.R' then use the SAMME.R real boosting algorithm.\n``base_estimator`` must support calculation of class probabilities.\nIf 'SAMME' then use the SAMME discrete boosting algorithm.\nThe SAMME.R algorithm typically converges faster than SAMME,\nachieving a lower test error with fewer boosting iterations.\n",
  750. "name": "algorithm"
  751. },
  752. {
  753. "default": null,
  754. "description": "Controls the random seed given at each `base_estimator` at each\nboosting iteration.\nThus, it is only used when `base_estimator` exposes a `random_state`.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary <random_state>`.\n",
  755. "name": "random_state"
  756. }
  757. ]
  758. },
  759. {
  760. "name": "sklearn.feature_extraction.text.CountVectorizer",
  761. "description": "Convert a collection of text documents to a matrix of token counts.\n\nThis implementation produces a sparse representation of the counts using\nscipy.sparse.csr_matrix.\n\nIf you do not provide an a-priori dictionary and you do not use an analyzer\nthat does some kind of feature selection then the number of features will\nbe equal to the vocabulary size found by analyzing the data.\n\nFor an efficiency comparison of the different feature extractors, see\n:ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.\n",
  762. "attributes": [
  763. {
  764. "default": "content",
  765. "description": "- If `'filename'`, the sequence passed as an argument to fit is\nexpected to be a list of filenames that need reading to fetch\nthe raw content to analyze.\n\n- If `'file'`, the sequence items must have a 'read' method (file-like\nobject) that is called to fetch the bytes in memory.\n\n- If `'content'`, the input is expected to be a sequence of items that\ncan be of type string or byte.\n",
  766. "name": "input",
  767. "type": "string"
  768. },
  769. {
  770. "default": "utf-8",
  771. "description": "If bytes or files are given to analyze, this encoding is used to\ndecode.\n",
  772. "name": "encoding",
  773. "type": "string"
  774. },
  775. {
  776. "default": "strict",
  777. "description": "Instruction on what to do if a byte sequence is given to analyze that\ncontains characters not of the given `encoding`. By default, it is\n'strict', meaning that a UnicodeDecodeError will be raised. Other\nvalues are 'ignore' and 'replace'.\n",
  778. "name": "decode_error"
  779. },
  780. {
  781. "default": null,
  782. "description": "Remove accents and perform other character normalization\nduring the preprocessing step.\n'ascii' is a fast method that only works on characters that have\na direct ASCII mapping.\n'unicode' is a slightly slower method that works on any characters.\nNone (default) means no character normalization is performed.\n\nBoth 'ascii' and 'unicode' use NFKD normalization from\n:func:`unicodedata.normalize`.\n",
  783. "name": "strip_accents"
  784. },
  785. {
  786. "default": true,
  787. "description": "Convert all characters to lowercase before tokenizing.\n",
  788. "name": "lowercase",
  789. "type": "boolean"
  790. },
  791. {
  792. "default": null,
  793. "description": "Override the preprocessing (strip_accents and lowercase) stage while\npreserving the tokenizing and n-grams generation steps.\nOnly applies if ``analyzer`` is not callable.\n",
  794. "name": "preprocessor"
  795. },
  796. {
  797. "default": null,
  798. "description": "Override the string tokenization step while preserving the\npreprocessing and n-grams generation steps.\nOnly applies if ``analyzer == 'word'``.\n",
  799. "name": "tokenizer"
  800. },
  801. {
  802. "default": null,
  803. "description": "If 'english', a built-in stop word list for English is used.\nThere are several known issues with 'english' and you should\nconsider an alternative (see :ref:`stop_words`).\n\nIf a list, that list is assumed to contain stop words, all of which\nwill be removed from the resulting tokens.\nOnly applies if ``analyzer == 'word'``.\n\nIf None, no stop words will be used. In this case, setting `max_df`\nto a higher value, such as in the range (0.7, 1.0), can automatically detect\nand filter stop words based on intra corpus document frequency of terms.\n",
  804. "name": "stop_words",
  805. "type": "string"
  806. },
  807. {
  808. "default": "r\"(?u)\\\\b\\\\w\\\\w+\\\\b\"",
  809. "description": "Regular expression denoting what constitutes a \"token\", only used\nif ``analyzer == 'word'``. The default regexp select tokens of 2\nor more alphanumeric characters (punctuation is completely ignored\nand always treated as a token separator).\n\nIf there is a capturing group in token_pattern then the\ncaptured group content, not the entire match, becomes the token.\nAt most one capturing group is permitted.\n",
  810. "name": "token_pattern",
  811. "type": "string"
  812. },
  813. {
  814. "default": "(1, 1)",
  815. "description": "The lower and upper boundary of the range of n-values for different\nword n-grams or char n-grams to be extracted. All values of n such\nsuch that min_n <= n <= max_n will be used. For example an\n``ngram_range`` of ``(1, 1)`` means only unigrams, ``(1, 2)`` means\nunigrams and bigrams, and ``(2, 2)`` means only bigrams.\nOnly applies if ``analyzer`` is not callable.\n",
  816. "name": "ngram_range"
  817. },
  818. {
  819. "default": "word",
  820. "description": "Whether the feature should be made of word n-gram or character\nn-grams.\nOption 'char_wb' creates character n-grams only from text inside\nword boundaries; n-grams at the edges of words are padded with space.\n\nIf a callable is passed it is used to extract the sequence of features\nout of the raw, unprocessed input.\n\n.. versionchanged:: 0.21\n\nSince v0.21, if ``input`` is ``filename`` or ``file``, the data is\nfirst read from the file and then passed to the given callable\nanalyzer.\n",
  821. "name": "analyzer",
  822. "type": "string"
  823. },
  824. {
  825. "default": "1.0",
  826. "description": "When building the vocabulary ignore terms that have a document\nfrequency strictly higher than the given threshold (corpus-specific\nstop words).\nIf float, the parameter represents a proportion of documents, integer\nabsolute counts.\nThis parameter is ignored if vocabulary is not None.\n",
  827. "name": "max_df"
  828. },
  829. {
  830. "default": "1",
  831. "description": "When building the vocabulary ignore terms that have a document\nfrequency strictly lower than the given threshold. This value is also\ncalled cut-off in the literature.\nIf float, the parameter represents a proportion of documents, integer\nabsolute counts.\nThis parameter is ignored if vocabulary is not None.\n",
  832. "name": "min_df"
  833. },
  834. {
  835. "default": null,
  836. "description": "If not None, build a vocabulary that only consider the top\n`max_features` ordered by term frequency across the corpus.\nOtherwise, all features are used.\n\nThis parameter is ignored if vocabulary is not None.\n",
  837. "name": "max_features",
  838. "type": "int32"
  839. },
  840. {
  841. "default": null,
  842. "description": "Either a Mapping (e.g., a dict) where keys are terms and values are\nindices in the feature matrix, or an iterable over terms. If not\ngiven, a vocabulary is determined from the input documents. Indices\nin the mapping should not be repeated and should not have any gap\nbetween 0 and the largest index.\n",
  843. "name": "vocabulary",
  844. "optional": true
  845. },
  846. {
  847. "default": false,
  848. "description": "If True, all non zero counts are set to 1. This is useful for discrete\nprobabilistic models that model binary events rather than integer\ncounts.\n",
  849. "name": "binary",
  850. "type": "boolean"
  851. },
  852. {
  853. "default": "np.int64",
  854. "description": "Type of the matrix returned by fit_transform() or transform().\n",
  855. "name": "dtype",
  856. "optional": true
  857. }
  858. ]
  859. },
  860. {
  861. "name": "sklearn.feature_extraction.text.TfidfVectorizer",
  862. "description": "Convert a collection of raw documents to a matrix of TF-IDF features.\n\nEquivalent to :class:`CountVectorizer` followed by\n:class:`TfidfTransformer`.\n\nFor an example of usage, see\n:ref:`sphx_glr_auto_examples_text_plot_document_classification_20newsgroups.py`.\n\nFor an efficiency comparison of the different feature extractors, see\n:ref:`sphx_glr_auto_examples_text_plot_hashing_vs_dict_vectorizer.py`.\n\nFor an example of document clustering and comparison with\n:class:`~sklearn.feature_extraction.text.HashingVectorizer`, see\n:ref:`sphx_glr_auto_examples_text_plot_document_clustering.py`.\n\nRead more in the :ref:`User Guide <text_feature_extraction>`.\n",
  863. "attributes": [
  864. {
  865. "default": "content",
  866. "description": "- If `'filename'`, the sequence passed as an argument to fit is\nexpected to be a list of filenames that need reading to fetch\nthe raw content to analyze.\n\n- If `'file'`, the sequence items must have a 'read' method (file-like\nobject) that is called to fetch the bytes in memory.\n\n- If `'content'`, the input is expected to be a sequence of items that\ncan be of type string or byte.\n",
  867. "name": "input",
  868. "type": "string"
  869. },
  870. {
  871. "default": "utf-8",
  872. "description": "If bytes or files are given to analyze, this encoding is used to\ndecode.\n",
  873. "name": "encoding",
  874. "type": "string"
  875. },
  876. {
  877. "default": "strict",
  878. "description": "Instruction on what to do if a byte sequence is given to analyze that\ncontains characters not of the given `encoding`. By default, it is\n'strict', meaning that a UnicodeDecodeError will be raised. Other\nvalues are 'ignore' and 'replace'.\n",
  879. "name": "decode_error"
  880. },
  881. {
  882. "default": null,
  883. "description": "Remove accents and perform other character normalization\nduring the preprocessing step.\n'ascii' is a fast method that only works on characters that have\na direct ASCII mapping.\n'unicode' is a slightly slower method that works on any characters.\nNone (default) means no character normalization is performed.\n\nBoth 'ascii' and 'unicode' use NFKD normalization from\n:func:`unicodedata.normalize`.\n",
  884. "name": "strip_accents"
  885. },
  886. {
  887. "default": true,
  888. "description": "Convert all characters to lowercase before tokenizing.\n",
  889. "name": "lowercase",
  890. "type": "boolean"
  891. },
  892. {
  893. "default": null,
  894. "description": "Override the preprocessing (string transformation) stage while\npreserving the tokenizing and n-grams generation steps.\nOnly applies if ``analyzer`` is not callable.\n",
  895. "name": "preprocessor"
  896. },
  897. {
  898. "default": null,
  899. "description": "Override the string tokenization step while preserving the\npreprocessing and n-grams generation steps.\nOnly applies if ``analyzer == 'word'``.\n",
  900. "name": "tokenizer"
  901. },
  902. {
  903. "description": "Whether the feature should be made of word or character n-grams.\nOption 'char_wb' creates character n-grams only from text inside\nword boundaries; n-grams at the edges of words are padded with space.\n\nIf a callable is passed it is used to extract the sequence of features\nout of the raw, unprocessed input.\n\n.. versionchanged:: 0.21\nSince v0.21, if ``input`` is ``'filename'`` or ``'file'``, the data\nis first read from the file and then passed to the given callable\nanalyzer.\n",
  904. "name": "analyzer",
  905. "default": "word"
  906. },
  907. {
  908. "default": null,
  909. "description": "If a string, it is passed to _check_stop_list and the appropriate stop\nlist is returned. 'english' is currently the only supported string\nvalue.\nThere are several known issues with 'english' and you should\nconsider an alternative (see :ref:`stop_words`).\n\nIf a list, that list is assumed to contain stop words, all of which\nwill be removed from the resulting tokens.\nOnly applies if ``analyzer == 'word'``.\n\nIf None, no stop words will be used. In this case, setting `max_df`\nto a higher value, such as in the range (0.7, 1.0), can automatically detect\nand filter stop words based on intra corpus document frequency of terms.\n",
  910. "name": "stop_words"
  911. },
  912. {
  913. "default": "r\"(?u)\\\\b\\\\w\\\\w+\\\\b",
  914. "description": "Regular expression denoting what constitutes a \"token\", only used\nif ``analyzer == 'word'``. The default regexp selects tokens of 2\nor more alphanumeric characters (punctuation is completely ignored\nand always treated as a token separator).\n\nIf there is a capturing group in token_pattern then the\ncaptured group content, not the entire match, becomes the token.\nAt most one capturing group is permitted.\n",
  915. "name": "token_pattern",
  916. "type": "string"
  917. },
  918. {
  919. "default": "(1, 1)",
  920. "description": "The lower and upper boundary of the range of n-values for different\nn-grams to be extracted. All values of n such that min_n <= n <= max_n\nwill be used. For example an ``ngram_range`` of ``(1, 1)`` means only\nunigrams, ``(1, 2)`` means unigrams and bigrams, and ``(2, 2)`` means\nonly bigrams.\nOnly applies if ``analyzer`` is not callable.\n",
  921. "name": "ngram_range"
  922. },
  923. {
  924. "default": "1.0",
  925. "description": "When building the vocabulary ignore terms that have a document\nfrequency strictly higher than the given threshold (corpus-specific\nstop words).\nIf float in range [0.0, 1.0], the parameter represents a proportion of\ndocuments, integer absolute counts.\nThis parameter is ignored if vocabulary is not None.\n",
  926. "name": "max_df"
  927. },
  928. {
  929. "default": "1",
  930. "description": "When building the vocabulary ignore terms that have a document\nfrequency strictly lower than the given threshold. This value is also\ncalled cut-off in the literature.\nIf float in range of [0.0, 1.0], the parameter represents a proportion\nof documents, integer absolute counts.\nThis parameter is ignored if vocabulary is not None.\n",
  931. "name": "min_df"
  932. },
  933. {
  934. "default": null,
  935. "description": "If not None, build a vocabulary that only consider the top\n`max_features` ordered by term frequency across the corpus.\nOtherwise, all features are used.\n\nThis parameter is ignored if vocabulary is not None.\n",
  936. "name": "max_features",
  937. "type": "int32"
  938. },
  939. {
  940. "default": null,
  941. "description": "Either a Mapping (e.g., a dict) where keys are terms and values are\nindices in the feature matrix, or an iterable over terms. If not\ngiven, a vocabulary is determined from the input documents.\n",
  942. "name": "vocabulary",
  943. "optional": true
  944. },
  945. {
  946. "default": false,
  947. "description": "If True, all non-zero term counts are set to 1. This does not mean\noutputs will have only 0/1 values, only that the tf term in tf-idf\nis binary. (Set `binary` to True, `use_idf` to False and\n`norm` to None to get 0/1 outputs).\n",
  948. "name": "binary",
  949. "type": "boolean"
  950. },
  951. {
  952. "default": "float64",
  953. "description": "Type of the matrix returned by fit_transform() or transform().\n",
  954. "name": "dtype",
  955. "optional": true
  956. },
  957. {
  958. "default": "l2",
  959. "description": "Each output row will have unit norm, either:\n\n- 'l2': Sum of squares of vector elements is 1. The cosine\nsimilarity between two vectors is their dot product when l2 norm has\nbeen applied.\n- 'l1': Sum of absolute values of vector elements is 1.\nSee :func:`~sklearn.preprocessing.normalize`.\n- None: No normalization.\n",
  960. "name": "norm"
  961. },
  962. {
  963. "default": true,
  964. "description": "Enable inverse-document-frequency reweighting. If False, idf(t) = 1.\n",
  965. "name": "use_idf",
  966. "type": "boolean"
  967. },
  968. {
  969. "default": true,
  970. "description": "Smooth idf weights by adding one to document frequencies, as if an\nextra document was seen containing every term in the collection\nexactly once. Prevents zero divisions.\n",
  971. "name": "smooth_idf",
  972. "type": "boolean"
  973. },
  974. {
  975. "default": false,
  976. "description": "Apply sublinear tf scaling, i.e. replace tf with 1 + log(tf).\n",
  977. "name": "sublinear_tf",
  978. "type": "boolean"
  979. }
  980. ]
  981. },
  982. {
  983. "name": "sklearn.feature_selection._univariate_selection.SelectKBest",
  984. "description": "Select features according to the k highest scores.\n\nRead more in the :ref:`User Guide <univariate_feature_selection>`.\n",
  985. "attributes": [
  986. {
  987. "default": "f_classif",
  988. "description": "Function taking two arrays X and y, and returning a pair of arrays\n(scores, pvalues) or a single array with scores.\nDefault is f_classif (see below \"See Also\"). The default function only\nworks with classification tasks.\n\n.. versionadded:: 0.18\n",
  989. "name": "score_func"
  990. },
  991. {
  992. "default": "10",
  993. "description": "Number of top features to select.\nThe \"all\" option bypasses selection, for use in a parameter search.\n",
  994. "name": "k",
  995. "optional": true
  996. }
  997. ]
  998. },
  999. {
  1000. "name": "sklearn.impute._base.SimpleImputer",
  1001. "description": "Univariate imputer for completing missing values with simple strategies.\n\nReplace missing values using a descriptive statistic (e.g. mean, median, or\nmost frequent) along each column, or using a constant value.\n\nRead more in the :ref:`User Guide <impute>`.\n\n.. versionadded:: 0.20\n`SimpleImputer` replaces the previous `sklearn.preprocessing.Imputer`\nestimator which is now removed.\n",
  1002. "attributes": [
  1003. {
  1004. "description": "The placeholder for the missing values. All occurrences of\n`missing_values` will be imputed. For pandas' dataframes with\nnullable integer dtypes with missing values, `missing_values`\ncan be set to either `np.nan` or `pd.NA`.\n",
  1005. "name": "missing_values",
  1006. "default": "np.nan"
  1007. },
  1008. {
  1009. "default": "mean",
  1010. "description": "The imputation strategy.\n\n- If \"mean\", then replace missing values using the mean along\neach column. Can only be used with numeric data.\n- If \"median\", then replace missing values using the median along\neach column. Can only be used with numeric data.\n- If \"most_frequent\", then replace missing using the most frequent\nvalue along each column. Can be used with strings or numeric data.\nIf there is more than one such value, only the smallest is returned.\n- If \"constant\", then replace missing values with fill_value. Can be\nused with strings or numeric data.\n- If an instance of Callable, then replace missing values using the\nscalar statistic returned by running the callable over a dense 1d\narray containing non-missing values of each column.\n\n.. versionadded:: 0.20\nstrategy=\"constant\" for fixed value imputation.\n\n.. versionadded:: 1.5\nstrategy=callable for custom value imputation.\n",
  1011. "name": "strategy",
  1012. "type": "string"
  1013. },
  1014. {
  1015. "default": null,
  1016. "description": "When strategy == \"constant\", `fill_value` is used to replace all\noccurrences of missing_values. For string or object data types,\n`fill_value` must be a string.\nIf `None`, `fill_value` will be 0 when imputing numerical\ndata and \"missing_value\" for strings or object data types.\n",
  1017. "name": "fill_value"
  1018. },
  1019. {
  1020. "default": 0,
  1021. "description": "Controls the verbosity of the imputer.\n\n.. deprecated:: 1.1\nThe 'verbose' parameter was deprecated in version 1.1 and will be\nremoved in 1.3. A warning will always be raised upon the removal of\nempty columns in the future version.\n",
  1022. "name": "verbose",
  1023. "type": "int32"
  1024. },
  1025. {
  1026. "default": true,
  1027. "description": "If True, a copy of X will be created. If False, imputation will\nbe done in-place whenever possible. Note that, in the following cases,\na new copy will always be made, even if `copy=False`:\n\n- If `X` is not an array of floating values;\n- If `X` is encoded as a CSR matrix;\n- If `add_indicator=True`.\n",
  1028. "name": "copy",
  1029. "type": "boolean"
  1030. },
  1031. {
  1032. "default": false,
  1033. "description": "If True, a :class:`MissingIndicator` transform will stack onto output\nof the imputer's transform. This allows a predictive estimator\nto account for missingness despite imputation. If a feature has no\nmissing values at fit/train time, the feature won't appear on\nthe missing indicator even if there are missing values at\ntransform/test time.\n",
  1034. "name": "add_indicator",
  1035. "type": "boolean"
  1036. },
  1037. {
  1038. "name": "keep_empty_features",
  1039. "default": false,
  1040. "description": "If True, features that consist exclusively of missing values when\n`fit` is called are returned in results when `transform` is called.\nThe imputed value is always `0` except when `strategy=\"constant\"`\nin which case `fill_value` will be used instead.\n\n.. versionadded:: 1.2\n"
  1041. }
  1042. ]
  1043. },
  1044. {
  1045. "name": "sklearn.linear_model._logistic.LogisticRegression",
  1046. "description": "\nLogistic Regression (aka logit, MaxEnt) classifier.\n\nIn the multiclass case, the training algorithm uses the one-vs-rest (OvR)\nscheme if the 'multi_class' option is set to 'ovr', and uses the\ncross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n(Currently the 'multinomial' option is supported only by the 'lbfgs',\n'sag', 'saga' and 'newton-cg' solvers.)\n\nThis class implements regularized logistic regression using the\n'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\nthat regularization is applied by default**. It can handle both dense\nand sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\nfloats for optimal performance; any other input format will be converted\n(and copied).\n\nThe 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\nwith primal formulation, or no regularization. The 'liblinear' solver\nsupports both L1 and L2 regularization, with a dual formulation only for\nthe L2 penalty. The Elastic-Net regularization is only supported by the\n'saga' solver.\n\nRead more in the :ref:`User Guide <logistic_regression>`.\n",
  1047. "attributes": [
  1048. {
  1049. "default": "l2",
  1050. "description": "Specify the norm of the penalty:\n\n- `None`: no penalty is added;\n- `'l2'`: add a L2 penalty term and it is the default choice;\n- `'l1'`: add a L1 penalty term;\n- `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n.. warning::\nSome penalties may not work with some solvers. See the parameter\n`solver` below, to know the compatibility between the penalty and\nsolver.\n\n.. versionadded:: 0.19\nl1 penalty with SAGA solver (allowing 'multinomial' + L1)\n",
  1051. "name": "penalty"
  1052. },
  1053. {
  1054. "default": false,
  1055. "description": "Dual (constrained) or primal (regularized, see also\n:ref:`this equation <regularized-logistic-loss>`) formulation. Dual formulation\nis only implemented for l2 penalty with liblinear solver. Prefer dual=False when\nn_samples > n_features.\n",
  1056. "name": "dual",
  1057. "type": "boolean"
  1058. },
  1059. {
  1060. "default": 0.0001,
  1061. "description": "Tolerance for stopping criteria.\n",
  1062. "name": "tol",
  1063. "type": "float32"
  1064. },
  1065. {
  1066. "default": 1.0,
  1067. "description": "Inverse of regularization strength; must be a positive float.\nLike in support vector machines, smaller values specify stronger\nregularization.\n",
  1068. "name": "C",
  1069. "type": "float32"
  1070. },
  1071. {
  1072. "default": true,
  1073. "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the decision function.\n",
  1074. "name": "fit_intercept",
  1075. "type": "boolean"
  1076. },
  1077. {
  1078. "default": 1.0,
  1079. "description": "Useful only when the solver 'liblinear' is used\nand self.fit_intercept is set to True. In this case, x becomes\n[x, self.intercept_scaling],\ni.e. a \"synthetic\" feature with constant value equal to\nintercept_scaling is appended to the instance vector.\nThe intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\nNote! the synthetic feature weight is subject to l1/l2 regularization\nas all other features.\nTo lessen the effect of regularization on synthetic feature weight\n(and therefore on the intercept) intercept_scaling has to be increased.\n",
  1080. "name": "intercept_scaling",
  1081. "type": "float32"
  1082. },
  1083. {
  1084. "default": null,
  1085. "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified.\n\n.. versionadded:: 0.17\n*class_weight='balanced'*\n",
  1086. "name": "class_weight"
  1087. },
  1088. {
  1089. "default": null,
  1090. "description": "Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\ndata. See :term:`Glossary <random_state>` for details.\n",
  1091. "name": "random_state",
  1092. "type": "int32"
  1093. },
  1094. {
  1095. "default": "lbfgs",
  1096. "description": "\nAlgorithm to use in the optimization problem. Default is 'lbfgs'.\nTo choose a solver, you might want to consider the following aspects:\n\n- For small datasets, 'liblinear' is a good choice, whereas 'sag'\nand 'saga' are faster for large ones;\n- For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n'lbfgs' handle multinomial loss;\n- 'liblinear' and 'newton-cholesky' can only handle binary classification\nby default. To apply a one-versus-rest scheme for the multiclass setting\none can wrapt it with the `OneVsRestClassifier`.\n- 'newton-cholesky' is a good choice for `n_samples` >> `n_features`,\nespecially with one-hot encoded categorical features with rare\ncategories. Be aware that the memory usage of this solver has a quadratic\ndependency on `n_features` because it explicitly computes the Hessian\nmatrix.\n\n.. warning::\nThe choice of the algorithm depends on the penalty chosen and on\n(multinomial) multiclass support:\n\n================= ============================== ======================\nsolver penalty multinomial multiclass\n================= ============================== ======================\n'lbfgs' 'l2', None yes\n'liblinear' 'l1', 'l2' no\n'newton-cg' 'l2', None yes\n'newton-cholesky' 'l2', None no\n'sag' 'l2', None yes\n'saga' 'elasticnet', 'l1', 'l2', None yes\n================= ============================== ======================\n\n.. note::\n'sag' and 'saga' fast convergence is only guaranteed on features\nwith approximately the same scale. You can preprocess the data with\na scaler from :mod:`sklearn.preprocessing`.\n\n.. seealso::\nRefer to the User Guide for more information regarding\n:class:`LogisticRegression` and more specifically the\n:ref:`Table <Logistic_regression>`\nsummarizing solver/penalty supports.\n\n.. versionadded:: 0.17\nStochastic Average Gradient descent solver.\n.. versionadded:: 0.19\nSAGA solver.\n.. versionchanged:: 0.22\nThe default solver changed from 'liblinear' to 'lbfgs' in 0.22.\n.. versionadded:: 1.2\nnewton-cholesky solver.\n",
  1097. "name": "solver"
  1098. },
  1099. {
  1100. "default": 100,
  1101. "description": "Maximum number of iterations taken for the solvers to converge.\n",
  1102. "name": "max_iter",
  1103. "type": "int32"
  1104. },
  1105. {
  1106. "default": "auto",
  1107. "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'.\n'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\nand otherwise selects 'multinomial'.\n\n.. versionadded:: 0.18\nStochastic Average Gradient descent solver for 'multinomial' case.\n.. versionchanged:: 0.22\nDefault changed from 'ovr' to 'auto' in 0.22.\n.. deprecated:: 1.5\n``multi_class`` was deprecated in version 1.5 and will be removed in 1.7.\nFrom then on, the recommended 'multinomial' will always be used for\n`n_classes >= 3`.\nSolvers that do not support 'multinomial' will raise an error.\nUse `sklearn.multiclass.OneVsRestClassifier(LogisticRegression())` if you\nstill want to use OvR.\n",
  1108. "name": "multi_class"
  1109. },
  1110. {
  1111. "default": 0,
  1112. "description": "For the liblinear and lbfgs solvers set verbose to any positive\nnumber for verbosity.\n",
  1113. "name": "verbose",
  1114. "type": "int32"
  1115. },
  1116. {
  1117. "default": false,
  1118. "description": "When set to True, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nUseless for liblinear solver. See :term:`the Glossary <warm_start>`.\n\n.. versionadded:: 0.17\n*warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.\n",
  1119. "name": "warm_start",
  1120. "type": "boolean"
  1121. },
  1122. {
  1123. "default": null,
  1124. "description": "Number of CPU cores used when parallelizing over classes if\nmulti_class='ovr'\". This parameter is ignored when the ``solver`` is\nset to 'liblinear' regardless of whether 'multi_class' is specified or\nnot. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\ncontext. ``-1`` means using all processors.\nSee :term:`Glossary <n_jobs>` for more details.\n",
  1125. "name": "n_jobs",
  1126. "type": "int32"
  1127. },
  1128. {
  1129. "default": null,
  1130. "description": "The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\nused if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\nto using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\nto using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\ncombination of L1 and L2.\n",
  1131. "name": "l1_ratio",
  1132. "type": "float32"
  1133. }
  1134. ]
  1135. },
  1136. {
  1137. "name": "sklearn.linear_model.LassoLars",
  1138. "description": "Lasso model fit with Least Angle Regression a.k.a. Lars.\n\nIt is a Linear Model trained with an L1 prior as regularizer.\n\nThe optimization objective for Lasso is::\n\n(1 / (2 * n_samples)) * ||y - Xw||^2_2 + alpha * ||w||_1\n\nRead more in the :ref:`User Guide <least_angle_regression>`.\n",
  1139. "attributes": [
  1140. {
  1141. "default": 1.0,
  1142. "description": "Constant that multiplies the penalty term. Defaults to 1.0.\n``alpha = 0`` is equivalent to an ordinary least square, solved\nby :class:`LinearRegression`. For numerical reasons, using\n``alpha = 0`` with the LassoLars object is not advised and you\nshould prefer the LinearRegression object.\n",
  1143. "name": "alpha",
  1144. "type": "float32"
  1145. },
  1146. {
  1147. "default": true,
  1148. "description": "Whether to calculate the intercept for this model. If set\nto false, no intercept will be used in calculations\n(i.e. data is expected to be centered).\n",
  1149. "name": "fit_intercept",
  1150. "type": "boolean"
  1151. },
  1152. {
  1153. "default": "False",
  1154. "description": "Sets the verbosity amount.\n",
  1155. "name": "verbose",
  1156. "optional": true
  1157. },
  1158. {
  1159. "default": false,
  1160. "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. versionchanged:: 1.2\ndefault changed from True to False in 1.2.\n\n.. deprecated:: 1.2\n``normalize`` was deprecated in version 1.2 and will be removed in 1.4.\n",
  1161. "name": "normalize",
  1162. "optional": true,
  1163. "type": "boolean"
  1164. },
  1165. {
  1166. "default": "auto",
  1167. "description": "Whether to use a precomputed Gram matrix to speed up\ncalculations. If set to ``'auto'`` let us decide. The Gram\nmatrix can also be passed as argument.\n",
  1168. "name": "precompute",
  1169. "type": "boolean"
  1170. },
  1171. {
  1172. "default": 500,
  1173. "description": "Maximum number of iterations to perform.\n",
  1174. "name": "max_iter",
  1175. "optional": true,
  1176. "type": "int32"
  1177. },
  1178. {
  1179. "description": "The machine-precision regularization in the computation of the\nCholesky diagonal factors. Increase this for very ill-conditioned\nsystems. Unlike the ``tol`` parameter in some iterative\noptimization-based algorithms, this parameter does not control\nthe tolerance of the optimization.\n",
  1180. "name": "eps",
  1181. "optional": true,
  1182. "type": "float32",
  1183. "default": null
  1184. },
  1185. {
  1186. "default": true,
  1187. "description": "If True, X will be copied; else, it may be overwritten.\n",
  1188. "name": "copy_X",
  1189. "optional": true,
  1190. "type": "boolean"
  1191. },
  1192. {
  1193. "default": true,
  1194. "description": "If ``True`` the full path is stored in the ``coef_path_`` attribute.\nIf you compute the solution for a large problem or many targets,\nsetting ``fit_path`` to ``False`` will lead to a speedup, especially\nwith a small alpha.\n",
  1195. "name": "fit_path",
  1196. "type": "boolean"
  1197. },
  1198. {
  1199. "default": false,
  1200. "description": "Restrict coefficients to be >= 0. Be aware that you might want to\nremove fit_intercept which is set True by default.\nUnder the positive restriction the model coefficients will not converge\nto the ordinary-least-squares solution for small values of alpha.\nOnly coefficients up to the smallest alpha value (``alphas_[alphas_ >\n0.].min()`` when fit_path=True) reached by the stepwise Lars-Lasso\nalgorithm are typically in congruence with the solution of the\ncoordinate descent Lasso estimator.\n",
  1201. "name": "positive",
  1202. "type": "boolean"
  1203. },
  1204. {
  1205. "default": null,
  1206. "description": "Upper bound on a uniform noise parameter to be added to the\n`y` values, to satisfy the model's assumption of\none-at-a-time computations. Might help with stability.\n\n.. versionadded:: 0.23\n",
  1207. "name": "jitter",
  1208. "type": "float32"
  1209. },
  1210. {
  1211. "default": null,
  1212. "description": "Determines random number generation for jittering. Pass an int\nfor reproducible output across multiple function calls.\nSee :term:`Glossary <random_state>`. Ignored if `jitter` is None.\n\n.. versionadded:: 0.23\n",
  1213. "name": "random_state",
  1214. "type": "int32"
  1215. }
  1216. ]
  1217. },
  1218. {
  1219. "name": "sklearn.linear_model.LinearRegression",
  1220. "description": "\nOrdinary least squares Linear Regression.\n\nLinearRegression fits a linear model with coefficients w = (w1, ..., wp)\nto minimize the residual sum of squares between the observed targets in\nthe dataset, and the targets predicted by the linear approximation.\n",
  1221. "attributes": [
  1222. {
  1223. "default": true,
  1224. "description": "Whether to calculate the intercept for this model. If set\nto False, no intercept will be used in calculations\n(i.e. data is expected to be centered).\n",
  1225. "name": "fit_intercept",
  1226. "optional": true,
  1227. "type": "boolean"
  1228. },
  1229. {
  1230. "default": false,
  1231. "description": "This parameter is ignored when ``fit_intercept`` is set to False.\nIf True, the regressors X will be normalized before regression by\nsubtracting the mean and dividing by the l2-norm.\nIf you wish to standardize, please use\n:class:`~sklearn.preprocessing.StandardScaler` before calling ``fit``\non an estimator with ``normalize=False``.\n\n.. deprecated:: 1.0\n`normalize` was deprecated in version 1.0 and will be\nremoved in 1.2.\n",
  1232. "name": "normalize",
  1233. "optional": true,
  1234. "type": "boolean"
  1235. },
  1236. {
  1237. "default": true,
  1238. "description": "If True, X will be copied; else, it may be overwritten.\n",
  1239. "name": "copy_X",
  1240. "optional": true,
  1241. "type": "boolean"
  1242. },
  1243. {
  1244. "default": null,
  1245. "description": "The number of jobs to use for the computation. This will only provide\nspeedup in case of sufficiently large problems, that is if firstly\n`n_targets > 1` and secondly `X` is sparse or if `positive` is set\nto `True`. ``None`` means 1 unless in a\n:obj:`joblib.parallel_backend` context. ``-1`` means using all\nprocessors. See :term:`Glossary <n_jobs>` for more details.\n",
  1246. "name": "n_jobs",
  1247. "optional": true,
  1248. "type": "int32"
  1249. },
  1250. {
  1251. "default": false,
  1252. "description": "When set to ``True``, forces the coefficients to be positive. This\noption is only supported for dense arrays.\n\n.. versionadded:: 0.24\n",
  1253. "name": "positive",
  1254. "type": "boolean"
  1255. }
  1256. ]
  1257. },
  1258. {
  1259. "name": "sklearn.linear_model.LogisticRegression",
  1260. "description": "\nLogistic Regression (aka logit, MaxEnt) classifier.\n\nIn the multiclass case, the training algorithm uses the one-vs-rest (OvR)\nscheme if the 'multi_class' option is set to 'ovr', and uses the\ncross-entropy loss if the 'multi_class' option is set to 'multinomial'.\n(Currently the 'multinomial' option is supported only by the 'lbfgs',\n'sag', 'saga' and 'newton-cg' solvers.)\n\nThis class implements regularized logistic regression using the\n'liblinear' library, 'newton-cg', 'sag', 'saga' and 'lbfgs' solvers. **Note\nthat regularization is applied by default**. It can handle both dense\nand sparse input. Use C-ordered arrays or CSR matrices containing 64-bit\nfloats for optimal performance; any other input format will be converted\n(and copied).\n\nThe 'newton-cg', 'sag', and 'lbfgs' solvers support only L2 regularization\nwith primal formulation, or no regularization. The 'liblinear' solver\nsupports both L1 and L2 regularization, with a dual formulation only for\nthe L2 penalty. The Elastic-Net regularization is only supported by the\n'saga' solver.\n\nRead more in the :ref:`User Guide <logistic_regression>`.\n",
  1261. "attributes": [
  1262. {
  1263. "default": "l2",
  1264. "description": "Specify the norm of the penalty:\n\n- `None`: no penalty is added;\n- `'l2'`: add a L2 penalty term and it is the default choice;\n- `'l1'`: add a L1 penalty term;\n- `'elasticnet'`: both L1 and L2 penalty terms are added.\n\n.. warning::\nSome penalties may not work with some solvers. See the parameter\n`solver` below, to know the compatibility between the penalty and\nsolver.\n\n.. versionadded:: 0.19\nl1 penalty with SAGA solver (allowing 'multinomial' + L1)\n",
  1265. "name": "penalty",
  1266. "optional": true
  1267. },
  1268. {
  1269. "default": false,
  1270. "description": "Dual (constrained) or primal (regularized, see also\n:ref:`this equation <regularized-logistic-loss>`) formulation. Dual formulation\nis only implemented for l2 penalty with liblinear solver. Prefer dual=False when\nn_samples > n_features.\n",
  1271. "name": "dual",
  1272. "optional": true,
  1273. "type": "boolean"
  1274. },
  1275. {
  1276. "default": 0.0001,
  1277. "description": "Tolerance for stopping criteria.\n",
  1278. "name": "tol",
  1279. "optional": true,
  1280. "type": "float32"
  1281. },
  1282. {
  1283. "default": 1.0,
  1284. "description": "Inverse of regularization strength; must be a positive float.\nLike in support vector machines, smaller values specify stronger\nregularization.\n",
  1285. "name": "C",
  1286. "optional": true,
  1287. "type": "float32"
  1288. },
  1289. {
  1290. "default": true,
  1291. "description": "Specifies if a constant (a.k.a. bias or intercept) should be\nadded to the decision function.\n",
  1292. "name": "fit_intercept",
  1293. "optional": true,
  1294. "type": "boolean"
  1295. },
  1296. {
  1297. "default": 1.0,
  1298. "description": "Useful only when the solver 'liblinear' is used\nand self.fit_intercept is set to True. In this case, x becomes\n[x, self.intercept_scaling],\ni.e. a \"synthetic\" feature with constant value equal to\nintercept_scaling is appended to the instance vector.\nThe intercept becomes ``intercept_scaling * synthetic_feature_weight``.\n\nNote! the synthetic feature weight is subject to l1/l2 regularization\nas all other features.\nTo lessen the effect of regularization on synthetic feature weight\n(and therefore on the intercept) intercept_scaling has to be increased.\n",
  1299. "name": "intercept_scaling",
  1300. "optional": true,
  1301. "type": "float32"
  1302. },
  1303. {
  1304. "default": null,
  1305. "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf not given, all classes are supposed to have weight one.\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified.\n\n.. versionadded:: 0.17\n*class_weight='balanced'*\n",
  1306. "name": "class_weight",
  1307. "optional": true
  1308. },
  1309. {
  1310. "default": null,
  1311. "description": "Used when ``solver`` == 'sag', 'saga' or 'liblinear' to shuffle the\ndata. See :term:`Glossary <random_state>` for details.\n",
  1312. "name": "random_state",
  1313. "optional": true,
  1314. "type": "int32"
  1315. },
  1316. {
  1317. "default": "lbfgs",
  1318. "description": "\nAlgorithm to use in the optimization problem. Default is 'lbfgs'.\nTo choose a solver, you might want to consider the following aspects:\n\n- For small datasets, 'liblinear' is a good choice, whereas 'sag'\nand 'saga' are faster for large ones;\n- For multiclass problems, only 'newton-cg', 'sag', 'saga' and\n'lbfgs' handle multinomial loss;\n- 'liblinear' and 'newton-cholesky' can only handle binary classification\nby default. To apply a one-versus-rest scheme for the multiclass setting\none can wrapt it with the `OneVsRestClassifier`.\n- 'newton-cholesky' is a good choice for `n_samples` >> `n_features`,\nespecially with one-hot encoded categorical features with rare\ncategories. Be aware that the memory usage of this solver has a quadratic\ndependency on `n_features` because it explicitly computes the Hessian\nmatrix.\n\n.. warning::\nThe choice of the algorithm depends on the penalty chosen and on\n(multinomial) multiclass support:\n\n================= ============================== ======================\nsolver penalty multinomial multiclass\n================= ============================== ======================\n'lbfgs' 'l2', None yes\n'liblinear' 'l1', 'l2' no\n'newton-cg' 'l2', None yes\n'newton-cholesky' 'l2', None no\n'sag' 'l2', None yes\n'saga' 'elasticnet', 'l1', 'l2', None yes\n================= ============================== ======================\n\n.. note::\n'sag' and 'saga' fast convergence is only guaranteed on features\nwith approximately the same scale. You can preprocess the data with\na scaler from :mod:`sklearn.preprocessing`.\n\n.. seealso::\nRefer to the User Guide for more information regarding\n:class:`LogisticRegression` and more specifically the\n:ref:`Table <Logistic_regression>`\nsummarizing solver/penalty supports.\n\n.. versionadded:: 0.17\nStochastic Average Gradient descent solver.\n.. versionadded:: 0.19\nSAGA solver.\n.. versionchanged:: 0.22\nThe default solver changed from 'liblinear' to 'lbfgs' in 0.22.\n.. versionadded:: 1.2\nnewton-cholesky solver.\n",
  1319. "name": "solver",
  1320. "optional": true
  1321. },
  1322. {
  1323. "default": 100,
  1324. "description": "Maximum number of iterations taken for the solvers to converge.\n",
  1325. "name": "max_iter",
  1326. "optional": true,
  1327. "type": "int32"
  1328. },
  1329. {
  1330. "default": "auto",
  1331. "description": "If the option chosen is 'ovr', then a binary problem is fit for each\nlabel. For 'multinomial' the loss minimised is the multinomial loss fit\nacross the entire probability distribution, *even when the data is\nbinary*. 'multinomial' is unavailable when solver='liblinear'.\n'auto' selects 'ovr' if the data is binary, or if solver='liblinear',\nand otherwise selects 'multinomial'.\n\n.. versionadded:: 0.18\nStochastic Average Gradient descent solver for 'multinomial' case.\n.. versionchanged:: 0.22\nDefault changed from 'ovr' to 'auto' in 0.22.\n.. deprecated:: 1.5\n``multi_class`` was deprecated in version 1.5 and will be removed in 1.7.\nFrom then on, the recommended 'multinomial' will always be used for\n`n_classes >= 3`.\nSolvers that do not support 'multinomial' will raise an error.\nUse `sklearn.multiclass.OneVsRestClassifier(LogisticRegression())` if you\nstill want to use OvR.\n",
  1332. "name": "multi_class",
  1333. "optional": true
  1334. },
  1335. {
  1336. "default": 0,
  1337. "description": "For the liblinear and lbfgs solvers set verbose to any positive\nnumber for verbosity.\n",
  1338. "name": "verbose",
  1339. "optional": true,
  1340. "type": "int32"
  1341. },
  1342. {
  1343. "default": false,
  1344. "description": "When set to True, reuse the solution of the previous call to fit as\ninitialization, otherwise, just erase the previous solution.\nUseless for liblinear solver. See :term:`the Glossary <warm_start>`.\n\n.. versionadded:: 0.17\n*warm_start* to support *lbfgs*, *newton-cg*, *sag*, *saga* solvers.\n",
  1345. "name": "warm_start",
  1346. "optional": true,
  1347. "type": "boolean"
  1348. },
  1349. {
  1350. "default": null,
  1351. "description": "Number of CPU cores used when parallelizing over classes if\nmulti_class='ovr'\". This parameter is ignored when the ``solver`` is\nset to 'liblinear' regardless of whether 'multi_class' is specified or\nnot. ``None`` means 1 unless in a :obj:`joblib.parallel_backend`\ncontext. ``-1`` means using all processors.\nSee :term:`Glossary <n_jobs>` for more details.\n",
  1352. "name": "n_jobs",
  1353. "optional": true,
  1354. "type": "int32"
  1355. },
  1356. {
  1357. "default": null,
  1358. "description": "The Elastic-Net mixing parameter, with ``0 <= l1_ratio <= 1``. Only\nused if ``penalty='elasticnet'``. Setting ``l1_ratio=0`` is equivalent\nto using ``penalty='l2'``, while setting ``l1_ratio=1`` is equivalent\nto using ``penalty='l1'``. For ``0 < l1_ratio <1``, the penalty is a\ncombination of L1 and L2.\n",
  1359. "name": "l1_ratio",
  1360. "optional": true,
  1361. "type": "float32"
  1362. }
  1363. ]
  1364. },
  1365. {
  1366. "name": "sklearn.model_selection._search.GridSearchCV",
  1367. "description": "Exhaustive search over specified parameter values for an estimator.\n\nImportant members are fit, predict.\n\nGridSearchCV implements a \"fit\" and a \"score\" method.\nIt also implements \"score_samples\", \"predict\", \"predict_proba\",\n\"decision_function\", \"transform\" and \"inverse_transform\" if they are\nimplemented in the estimator used.\n\nThe parameters of the estimator used to apply these methods are optimized\nby cross-validated grid-search over a parameter grid.\n\nRead more in the :ref:`User Guide <grid_search>`.\n",
  1368. "attributes": [
  1369. {
  1370. "description": "This is assumed to implement the scikit-learn estimator interface.\nEither estimator needs to provide a ``score`` function,\nor ``scoring`` must be passed.\n",
  1371. "name": "estimator"
  1372. },
  1373. {
  1374. "description": "Dictionary with parameters names (`str`) as keys and lists of\nparameter settings to try as values, or a list of such\ndictionaries, in which case the grids spanned by each dictionary\nin the list are explored. This enables searching over any sequence\nof parameter settings.\n",
  1375. "name": "param_grid"
  1376. },
  1377. {
  1378. "default": null,
  1379. "description": "Strategy to evaluate the performance of the cross-validated model on\nthe test set.\n\nIf `scoring` represents a single score, one can use:\n\n- a single string (see :ref:`scoring_parameter`);\n- a callable (see :ref:`scoring`) that returns a single value.\n\nIf `scoring` represents multiple scores, one can use:\n\n- a list or tuple of unique strings;\n- a callable returning a dictionary where the keys are the metric\nnames and the values are the metric scores;\n- a dictionary with metric names as keys and callables a values.\n\nSee :ref:`multimetric_grid_search` for an example.\n",
  1380. "name": "scoring"
  1381. },
  1382. {
  1383. "default": null,
  1384. "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n`n_jobs` default changed from 1 to None\n",
  1385. "name": "n_jobs",
  1386. "type": "int32"
  1387. },
  1388. {
  1389. "description": "Controls the number of jobs that get dispatched during parallel\nexecution. Reducing this number can be useful to avoid an\nexplosion of memory consumption when more jobs get dispatched\nthan CPUs can process. This parameter can be:\n\n- None, in which case all the jobs are immediately\ncreated and spawned. Use this for lightweight and\nfast-running jobs, to avoid delays due to on-demand\nspawning of the jobs\n\n- An int, giving the exact number of total jobs that are\nspawned\n\n- A str, giving an expression as a function of n_jobs,\nas in '2*n_jobs'\n",
  1390. "name": "pre_dispatch",
  1391. "default": "2*n_jobs"
  1392. },
  1393. {
  1394. "default": false,
  1395. "description": "If True, return the average score across folds, weighted by the number\nof samples in each test set. In this case, the data is assumed to be\nidentically distributed across the folds, and the loss minimized is\nthe total loss per sample, and not the mean loss across the folds.\n\n.. deprecated:: 0.22\nParameter ``iid`` is deprecated in 0.22 and will be removed in 0.24\n",
  1396. "name": "iid",
  1397. "type": "boolean"
  1398. },
  1399. {
  1400. "default": null,
  1401. "description": "Determines the cross-validation splitting strategy.\nPossible inputs for cv are:\n\n- None, to use the default 5-fold cross validation,\n- integer, to specify the number of folds in a `(Stratified)KFold`,\n- :term:`CV splitter`,\n- An iterable yielding (train, test) splits as arrays of indices.\n\nFor integer/None inputs, if the estimator is a classifier and ``y`` is\neither binary or multiclass, :class:`StratifiedKFold` is used. In all\nother cases, :class:`KFold` is used. These splitters are instantiated\nwith `shuffle=False` so the splits will be the same across calls.\n\nRefer :ref:`User Guide <cross_validation>` for the various\ncross-validation strategies that can be used here.\n\n.. versionchanged:: 0.22\n``cv`` default value if None changed from 3-fold to 5-fold.\n",
  1402. "name": "cv",
  1403. "type": "int32"
  1404. },
  1405. {
  1406. "default": "True",
  1407. "description": "Refit an estimator using the best found parameters on the whole\ndataset.\n\nFor multiple metric evaluation, this needs to be a `str` denoting the\nscorer that would be used to find the best parameters for refitting\nthe estimator at the end.\n\nWhere there are considerations other than maximum score in\nchoosing a best estimator, ``refit`` can be set to a function which\nreturns the selected ``best_index_`` given ``cv_results_``. In that\ncase, the ``best_estimator_`` and ``best_params_`` will be set\naccording to the returned ``best_index_`` while the ``best_score_``\nattribute will not be available.\n\nThe refitted estimator is made available at the ``best_estimator_``\nattribute and permits using ``predict`` directly on this\n``GridSearchCV`` instance.\n\nAlso for multiple metric evaluation, the attributes ``best_index_``,\n``best_score_`` and ``best_params_`` will only be available if\n``refit`` is set and all of them will be determined w.r.t this specific\nscorer.\n\nSee ``scoring`` parameter to know more about multiple metric\nevaluation.\n\nSee :ref:`sphx_glr_auto_examples_model_selection_plot_grid_search_digits.py`\nto see how to design a custom selection strategy using a callable\nvia `refit`.\n\n.. versionchanged:: 0.20\nSupport for callable added.\n",
  1408. "name": "refit",
  1409. "type": "boolean"
  1410. },
  1411. {
  1412. "description": "Controls the verbosity: the higher, the more messages.\n\n- >1 : the computation time for each fold and parameter candidate is\ndisplayed;\n- >2 : the score is also displayed;\n- >3 : the fold and candidate parameter indexes are also displayed\ntogether with the starting time of the computation.\n",
  1413. "name": "verbose",
  1414. "type": "int32"
  1415. },
  1416. {
  1417. "description": "Value to assign to the score if an error occurs in estimator fitting.\nIf set to 'raise', the error is raised. If a numeric value is given,\nFitFailedWarning is raised. This parameter does not affect the refit\nstep, which will always raise the error.\n",
  1418. "name": "error_score",
  1419. "default": "np.nan"
  1420. },
  1421. {
  1422. "default": false,
  1423. "description": "If ``False``, the ``cv_results_`` attribute will not include training\nscores.\nComputing training scores is used to get insights on how different\nparameter settings impact the overfitting/underfitting trade-off.\nHowever computing the scores on the training set can be computationally\nexpensive and is not strictly required to select the parameters that\nyield the best generalization performance.\n\n.. versionadded:: 0.19\n\n.. versionchanged:: 0.21\nDefault value was changed from ``True`` to ``False``\n",
  1424. "name": "return_train_score",
  1425. "type": "boolean"
  1426. }
  1427. ]
  1428. },
  1429. {
  1430. "name": "sklearn.naive_bayes.BernoulliNB",
  1431. "description": "Naive Bayes classifier for multivariate Bernoulli models.\n\nLike MultinomialNB, this classifier is suitable for discrete data. The\ndifference is that while MultinomialNB works with occurrence counts,\nBernoulliNB is designed for binary/boolean features.\n\nRead more in the :ref:`User Guide <bernoulli_naive_bayes>`.\n",
  1432. "attributes": [
  1433. {
  1434. "default": "1.0",
  1435. "description": "Additive (Laplace/Lidstone) smoothing parameter\n(set alpha=0 and force_alpha=True, for no smoothing).\n",
  1436. "name": "alpha",
  1437. "optional": true,
  1438. "type": "float32"
  1439. },
  1440. {
  1441. "default": "0.0",
  1442. "description": "Threshold for binarizing (mapping to booleans) of sample features.\nIf None, input is presumed to already consist of binary vectors.\n",
  1443. "name": "binarize",
  1444. "optional": true
  1445. },
  1446. {
  1447. "default": true,
  1448. "description": "Whether to learn class prior probabilities or not.\nIf false, a uniform prior will be used.\n",
  1449. "name": "fit_prior",
  1450. "optional": true,
  1451. "type": "boolean"
  1452. },
  1453. {
  1454. "default": null,
  1455. "description": "Prior probabilities of the classes. If specified, the priors are not\nadjusted according to the data.\n",
  1456. "name": "class_prior",
  1457. "optional": true
  1458. },
  1459. {
  1460. "name": "force_alpha",
  1461. "description": "If False and alpha is less than 1e-10, it will set alpha to\n1e-10. If True, alpha will remain unchanged. This may cause\nnumerical errors if alpha is too close to 0.\n\n.. versionadded:: 1.2\n.. versionchanged:: 1.4\nThe default value of `force_alpha` changed to `True`.\n",
  1462. "type": "boolean",
  1463. "default": true
  1464. }
  1465. ]
  1466. },
  1467. {
  1468. "name": "sklearn.naive_bayes.ComplementNB",
  1469. "description": "The Complement Naive Bayes classifier described in Rennie et al. (2003).\n\nThe Complement Naive Bayes classifier was designed to correct the \"severe\nassumptions\" made by the standard Multinomial Naive Bayes classifier. It is\nparticularly suited for imbalanced data sets.\n\nRead more in the :ref:`User Guide <complement_naive_bayes>`.\n\n.. versionadded:: 0.20\n",
  1470. "attributes": [
  1471. {
  1472. "default": "1.0",
  1473. "description": "Additive (Laplace/Lidstone) smoothing parameter\n(set alpha=0 and force_alpha=True, for no smoothing).\n",
  1474. "name": "alpha",
  1475. "optional": true,
  1476. "type": "float32"
  1477. },
  1478. {
  1479. "default": true,
  1480. "description": "Only used in edge case with a single class in the training set.\n",
  1481. "name": "fit_prior",
  1482. "optional": true,
  1483. "type": "boolean"
  1484. },
  1485. {
  1486. "default": null,
  1487. "description": "Prior probabilities of the classes. Not used.\n",
  1488. "name": "class_prior",
  1489. "optional": true
  1490. },
  1491. {
  1492. "default": false,
  1493. "description": "Whether or not a second normalization of the weights is performed. The\ndefault behavior mirrors the implementations found in Mahout and Weka,\nwhich do not follow the full algorithm described in Table 9 of the\npaper.\n",
  1494. "name": "norm",
  1495. "optional": true,
  1496. "type": "boolean"
  1497. },
  1498. {
  1499. "name": "force_alpha",
  1500. "description": "If False and alpha is less than 1e-10, it will set alpha to\n1e-10. If True, alpha will remain unchanged. This may cause\nnumerical errors if alpha is too close to 0.\n\n.. versionadded:: 1.2\n.. versionchanged:: 1.4\nThe default value of `force_alpha` changed to `True`.\n",
  1501. "type": "boolean",
  1502. "default": true
  1503. }
  1504. ]
  1505. },
  1506. {
  1507. "name": "sklearn.naive_bayes.MultinomialNB",
  1508. "description": "\nNaive Bayes classifier for multinomial models.\n\nThe multinomial Naive Bayes classifier is suitable for classification with\ndiscrete features (e.g., word counts for text classification). The\nmultinomial distribution normally requires integer feature counts. However,\nin practice, fractional counts such as tf-idf may also work.\n\nRead more in the :ref:`User Guide <multinomial_naive_bayes>`.\n",
  1509. "attributes": [
  1510. {
  1511. "default": "1.0",
  1512. "description": "Additive (Laplace/Lidstone) smoothing parameter\n(set alpha=0 and force_alpha=True, for no smoothing).\n",
  1513. "name": "alpha",
  1514. "optional": true,
  1515. "type": "float32"
  1516. },
  1517. {
  1518. "default": true,
  1519. "description": "Whether to learn class prior probabilities or not.\nIf false, a uniform prior will be used.\n",
  1520. "name": "fit_prior",
  1521. "optional": true,
  1522. "type": "boolean"
  1523. },
  1524. {
  1525. "default": null,
  1526. "description": "Prior probabilities of the classes. If specified, the priors are not\nadjusted according to the data.\n",
  1527. "name": "class_prior",
  1528. "optional": true
  1529. },
  1530. {
  1531. "name": "force_alpha",
  1532. "description": "If False and alpha is less than 1e-10, it will set alpha to\n1e-10. If True, alpha will remain unchanged. This may cause\nnumerical errors if alpha is too close to 0.\n\n.. versionadded:: 1.2\n.. versionchanged:: 1.4\nThe default value of `force_alpha` changed to `True`.\n",
  1533. "type": "boolean",
  1534. "default": true
  1535. }
  1536. ]
  1537. },
  1538. {
  1539. "name": "sklearn.neighbors.KNeighborsClassifier",
  1540. "description": "Classifier implementing the k-nearest neighbors vote.\n\nRead more in the :ref:`User Guide <classification>`.\n",
  1541. "attributes": [
  1542. {
  1543. "default": 5,
  1544. "description": "Number of neighbors to use by default for :meth:`kneighbors` queries.\n",
  1545. "name": "n_neighbors",
  1546. "optional": true,
  1547. "type": "int32"
  1548. },
  1549. {
  1550. "default": "uniform",
  1551. "description": "Weight function used in prediction. Possible values:\n\n- 'uniform' : uniform weights. All points in each neighborhood\nare weighted equally.\n- 'distance' : weight points by the inverse of their distance.\nin this case, closer neighbors of a query point will have a\ngreater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\narray of distances, and returns an array of the same shape\ncontaining the weights.\n\nRefer to the example entitled\n:ref:`sphx_glr_auto_examples_neighbors_plot_classification.py`\nshowing the impact of the `weights` parameter on the decision\nboundary.\n",
  1552. "name": "weights",
  1553. "optional": true
  1554. },
  1555. {
  1556. "default": "auto",
  1557. "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\nbased on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force.\n",
  1558. "name": "algorithm",
  1559. "optional": true
  1560. },
  1561. {
  1562. "default": 30,
  1563. "description": "Leaf size passed to BallTree or KDTree. This can affect the\nspeed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem.\n",
  1564. "name": "leaf_size",
  1565. "optional": true,
  1566. "type": "int32"
  1567. },
  1568. {
  1569. "default": 2.0,
  1570. "description": "Power parameter for the Minkowski metric. When p = 1, this is equivalent\nto using manhattan_distance (l1), and euclidean_distance (l2) for p = 2.\nFor arbitrary p, minkowski_distance (l_p) is used. This parameter is expected\nto be positive.\n",
  1571. "name": "p",
  1572. "optional": true,
  1573. "type": "int32"
  1574. },
  1575. {
  1576. "default": "minkowski",
  1577. "description": "Metric to use for distance computation. Default is \"minkowski\", which\nresults in the standard Euclidean distance when p = 2. See the\ndocumentation of `scipy.spatial.distance\n<https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\nthe metrics listed in\n:class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\nvalues.\n\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square during fit. X may be a :term:`sparse graph`, in which\ncase only \"nonzero\" elements may be considered neighbors.\n\nIf metric is a callable function, it takes two arrays representing 1D\nvectors as inputs and must return one value indicating the distance\nbetween those vectors. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n",
  1578. "name": "metric"
  1579. },
  1580. {
  1581. "default": null,
  1582. "description": "Additional keyword arguments for the metric function.\n",
  1583. "name": "metric_params",
  1584. "optional": true
  1585. },
  1586. {
  1587. "default": null,
  1588. "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\nDoesn't affect :meth:`fit` method.\n",
  1589. "name": "n_jobs",
  1590. "optional": true,
  1591. "type": "int32"
  1592. }
  1593. ]
  1594. },
  1595. {
  1596. "name": "sklearn.neighbors.KNeighborsRegressor",
  1597. "description": "Regression based on k-nearest neighbors.\n\nThe target is predicted by local interpolation of the targets\nassociated of the nearest neighbors in the training set.\n\nRead more in the :ref:`User Guide <regression>`.\n\n.. versionadded:: 0.9\n",
  1598. "attributes": [
  1599. {
  1600. "default": 5,
  1601. "description": "Number of neighbors to use by default for :meth:`kneighbors` queries.\n",
  1602. "name": "n_neighbors",
  1603. "optional": true,
  1604. "type": "int32"
  1605. },
  1606. {
  1607. "description": "Weight function used in prediction. Possible values:\n\n- 'uniform' : uniform weights. All points in each neighborhood\nare weighted equally.\n- 'distance' : weight points by the inverse of their distance.\nin this case, closer neighbors of a query point will have a\ngreater influence than neighbors which are further away.\n- [callable] : a user-defined function which accepts an\narray of distances, and returns an array of the same shape\ncontaining the weights.\n\nUniform weights are used by default.\n",
  1608. "name": "weights",
  1609. "default": "uniform"
  1610. },
  1611. {
  1612. "default": "auto",
  1613. "description": "Algorithm used to compute the nearest neighbors:\n\n- 'ball_tree' will use :class:`BallTree`\n- 'kd_tree' will use :class:`KDTree`\n- 'brute' will use a brute-force search.\n- 'auto' will attempt to decide the most appropriate algorithm\nbased on the values passed to :meth:`fit` method.\n\nNote: fitting on sparse input will override the setting of\nthis parameter, using brute force.\n",
  1614. "name": "algorithm",
  1615. "optional": true
  1616. },
  1617. {
  1618. "default": 30,
  1619. "description": "Leaf size passed to BallTree or KDTree. This can affect the\nspeed of the construction and query, as well as the memory\nrequired to store the tree. The optimal value depends on the\nnature of the problem.\n",
  1620. "name": "leaf_size",
  1621. "optional": true,
  1622. "type": "int32"
  1623. },
  1624. {
  1625. "default": 2.0,
  1626. "description": "Power parameter for the Minkowski metric. When p = 1, this is\nequivalent to using manhattan_distance (l1), and euclidean_distance\n(l2) for p = 2. For arbitrary p, minkowski_distance (l_p) is used.\n",
  1627. "name": "p",
  1628. "optional": true,
  1629. "type": "int32"
  1630. },
  1631. {
  1632. "default": "minkowski",
  1633. "description": "Metric to use for distance computation. Default is \"minkowski\", which\nresults in the standard Euclidean distance when p = 2. See the\ndocumentation of `scipy.spatial.distance\n<https://docs.scipy.org/doc/scipy/reference/spatial.distance.html>`_ and\nthe metrics listed in\n:class:`~sklearn.metrics.pairwise.distance_metrics` for valid metric\nvalues.\n\nIf metric is \"precomputed\", X is assumed to be a distance matrix and\nmust be square during fit. X may be a :term:`sparse graph`, in which\ncase only \"nonzero\" elements may be considered neighbors.\n\nIf metric is a callable function, it takes two arrays representing 1D\nvectors as inputs and must return one value indicating the distance\nbetween those vectors. This works for Scipy's metrics, but is less\nefficient than passing the metric name as a string.\n\nIf metric is a DistanceMetric object, it will be passed directly to\nthe underlying computation routines.\n",
  1634. "name": "metric"
  1635. },
  1636. {
  1637. "default": null,
  1638. "description": "Additional keyword arguments for the metric function.\n",
  1639. "name": "metric_params",
  1640. "optional": true
  1641. },
  1642. {
  1643. "default": null,
  1644. "description": "The number of parallel jobs to run for neighbors search.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\nDoesn't affect :meth:`fit` method.\n",
  1645. "name": "n_jobs",
  1646. "optional": true,
  1647. "type": "int32"
  1648. }
  1649. ]
  1650. },
  1651. {
  1652. "name": "sklearn.neural_network.multilayer_perceptron.MLPRegressor",
  1653. "description": "Multi-layer Perceptron regressor.\n\nThis model optimizes the squared-loss using LBFGS or stochastic gradient\ndescent.\n\n.. versionadded:: 0.18\n",
  1654. "attributes": [
  1655. {
  1656. "default": "(100,)",
  1657. "description": "The ith element represents the number of neurons in the ith\nhidden layer.\n",
  1658. "name": "hidden_layer_sizes"
  1659. },
  1660. {
  1661. "default": "relu",
  1662. "description": "Activation function for the hidden layer.\n\n- 'identity', no-op activation, useful to implement linear bottleneck,\nreturns f(x) = x\n\n- 'logistic', the logistic sigmoid function,\nreturns f(x) = 1 / (1 + exp(-x)).\n\n- 'tanh', the hyperbolic tan function,\nreturns f(x) = tanh(x).\n\n- 'relu', the rectified linear unit function,\nreturns f(x) = max(0, x)\n",
  1663. "name": "activation"
  1664. },
  1665. {
  1666. "default": "adam",
  1667. "description": "The solver for weight optimization.\n\n- 'lbfgs' is an optimizer in the family of quasi-Newton methods.\n\n- 'sgd' refers to stochastic gradient descent.\n\n- 'adam' refers to a stochastic gradient-based optimizer proposed by\nKingma, Diederik, and Jimmy Ba\n\nNote: The default solver 'adam' works pretty well on relatively\nlarge datasets (with thousands of training samples or more) in terms of\nboth training time and validation score.\nFor small datasets, however, 'lbfgs' can converge faster and perform\nbetter.\n",
  1668. "name": "solver"
  1669. },
  1670. {
  1671. "default": 0.0001,
  1672. "description": "L2 penalty (regularization term) parameter.\n",
  1673. "name": "alpha",
  1674. "type": "float32"
  1675. },
  1676. {
  1677. "default": "auto",
  1678. "description": "Size of minibatches for stochastic optimizers.\nIf the solver is 'lbfgs', the classifier will not use minibatch.\nWhen set to \"auto\", `batch_size=min(200, n_samples)`\n",
  1679. "name": "batch_size",
  1680. "type": "int32"
  1681. },
  1682. {
  1683. "default": "constant",
  1684. "description": "Learning rate schedule for weight updates.\n\n- 'constant' is a constant learning rate given by\n'learning_rate_init'.\n\n- 'invscaling' gradually decreases the learning rate ``learning_rate_``\nat each time step 't' using an inverse scaling exponent of 'power_t'.\neffective_learning_rate = learning_rate_init / pow(t, power_t)\n\n- 'adaptive' keeps the learning rate constant to\n'learning_rate_init' as long as training loss keeps decreasing.\nEach time two consecutive epochs fail to decrease training loss by at\nleast tol, or fail to increase validation score by at least tol if\n'early_stopping' is on, the current learning rate is divided by 5.\n\nOnly used when solver='sgd'.\n",
  1685. "name": "learning_rate"
  1686. },
  1687. {
  1688. "default": "0.001",
  1689. "description": "The initial learning rate used. It controls the step-size\nin updating the weights. Only used when solver='sgd' or 'adam'.\n",
  1690. "name": "learning_rate_init"
  1691. },
  1692. {
  1693. "default": "0.5",
  1694. "description": "The exponent for inverse scaling learning rate.\nIt is used in updating effective learning rate when the learning_rate\nis set to 'invscaling'. Only used when solver='sgd'.\n",
  1695. "name": "power_t"
  1696. },
  1697. {
  1698. "default": 200,
  1699. "description": "Maximum number of iterations. The solver iterates until convergence\n(determined by 'tol') or this number of iterations. For stochastic\nsolvers ('sgd', 'adam'), note that this determines the number of epochs\n(how many times each data point will be used), not the number of\ngradient steps.\n",
  1700. "name": "max_iter",
  1701. "type": "int32"
  1702. },
  1703. {
  1704. "default": true,
  1705. "description": "Whether to shuffle samples in each iteration. Only used when\nsolver='sgd' or 'adam'.\n",
  1706. "name": "shuffle",
  1707. "type": "boolean"
  1708. },
  1709. {
  1710. "default": null,
  1711. "description": "Determines random number generation for weights and bias\ninitialization, train-test split if early stopping is used, and batch\nsampling when solver='sgd' or 'adam'.\nPass an int for reproducible results across multiple function calls.\nSee :term:`Glossary <random_state>`.\n",
  1712. "name": "random_state",
  1713. "type": "int32"
  1714. },
  1715. {
  1716. "default": 0.0001,
  1717. "description": "Tolerance for the optimization. When the loss or score is not improving\nby at least ``tol`` for ``n_iter_no_change`` consecutive iterations,\nunless ``learning_rate`` is set to 'adaptive', convergence is\nconsidered to be reached and training stops.\n",
  1718. "name": "tol",
  1719. "type": "float32"
  1720. },
  1721. {
  1722. "default": false,
  1723. "description": "Whether to print progress messages to stdout.\n",
  1724. "name": "verbose",
  1725. "type": "boolean"
  1726. },
  1727. {
  1728. "default": false,
  1729. "description": "When set to True, reuse the solution of the previous\ncall to fit as initialization, otherwise, just erase the\nprevious solution. See :term:`the Glossary <warm_start>`.\n",
  1730. "name": "warm_start",
  1731. "type": "boolean"
  1732. },
  1733. {
  1734. "default": 0.9,
  1735. "description": "Momentum for gradient descent update. Should be between 0 and 1. Only\nused when solver='sgd'.\n",
  1736. "name": "momentum",
  1737. "type": "float32"
  1738. },
  1739. {
  1740. "default": true,
  1741. "description": "Whether to use Nesterov's momentum. Only used when solver='sgd' and\nmomentum > 0.\n",
  1742. "name": "nesterovs_momentum",
  1743. "type": "boolean"
  1744. },
  1745. {
  1746. "default": false,
  1747. "description": "Whether to use early stopping to terminate training when validation\nscore is not improving. If set to true, it will automatically set\naside 10% of training data as validation and terminate training when\nvalidation score is not improving by at least ``tol`` for\n``n_iter_no_change`` consecutive epochs.\nOnly effective when solver='sgd' or 'adam'\n",
  1748. "name": "early_stopping",
  1749. "type": "boolean"
  1750. },
  1751. {
  1752. "default": 0.1,
  1753. "description": "The proportion of training data to set aside as validation set for\nearly stopping. Must be between 0 and 1.\nOnly used if early_stopping is True\n",
  1754. "name": "validation_fraction",
  1755. "type": "float32"
  1756. },
  1757. {
  1758. "default": 0.9,
  1759. "description": "Exponential decay rate for estimates of first moment vector in adam,\nshould be in [0, 1). Only used when solver='adam'\n",
  1760. "name": "beta_1",
  1761. "type": "float32"
  1762. },
  1763. {
  1764. "default": 0.999,
  1765. "description": "Exponential decay rate for estimates of second moment vector in adam,\nshould be in [0, 1). Only used when solver='adam'\n",
  1766. "name": "beta_2",
  1767. "type": "float32"
  1768. },
  1769. {
  1770. "default": 1e-08,
  1771. "description": "Value for numerical stability in adam. Only used when solver='adam'\n",
  1772. "name": "epsilon",
  1773. "type": "float32"
  1774. },
  1775. {
  1776. "default": 10,
  1777. "description": "Maximum number of epochs to not meet ``tol`` improvement.\nOnly effective when solver='sgd' or 'adam'\n\n.. versionadded:: 0.20\n",
  1778. "name": "n_iter_no_change",
  1779. "type": "int32"
  1780. },
  1781. {
  1782. "default": 15000,
  1783. "description": "Only used when solver='lbfgs'. Maximum number of function calls.\nThe solver iterates until convergence (determined by 'tol'), number\nof iterations reaches max_iter, or this number of function calls.\nNote that number of function calls will be greater than or equal to\nthe number of iterations for the MLPRegressor.\n\n.. versionadded:: 0.22\n",
  1784. "name": "max_fun",
  1785. "type": "int32"
  1786. }
  1787. ]
  1788. },
  1789. {
  1790. "name": "sklearn.pipeline.FeatureUnion",
  1791. "description": "Concatenates results of multiple transformer objects.\n\nThis estimator applies a list of transformer objects in parallel to the\ninput data, then concatenates the results. This is useful to combine\nseveral feature extraction mechanisms into a single transformer.\n\nParameters of the transformers may be set using its name and the parameter\nname separated by a '__'. A transformer may be replaced entirely by\nsetting the parameter with its name to another transformer, removed by\nsetting to 'drop' or disabled by setting to 'passthrough' (features are\npassed without transformation).\n\nRead more in the :ref:`User Guide <feature_union>`.\n\n.. versionadded:: 0.13\n",
  1792. "attributes": [
  1793. {
  1794. "description": "List of transformer objects to be applied to the data. The first\nhalf of each tuple is the name of the transformer. The transformer can\nbe 'drop' for it to be ignored or can be 'passthrough' for features to\nbe passed unchanged.\n\n.. versionadded:: 1.1\nAdded the option `\"passthrough\"`.\n\n.. versionchanged:: 0.22\nDeprecated `None` as a transformer in favor of 'drop'.\n",
  1795. "name": "transformer_list"
  1796. },
  1797. {
  1798. "default": null,
  1799. "description": "Number of jobs to run in parallel.\n``None`` means 1 unless in a :obj:`joblib.parallel_backend` context.\n``-1`` means using all processors. See :term:`Glossary <n_jobs>`\nfor more details.\n\n.. versionchanged:: v0.20\n`n_jobs` default changed from 1 to None\n",
  1800. "name": "n_jobs",
  1801. "type": "int32"
  1802. },
  1803. {
  1804. "default": null,
  1805. "description": "Multiplicative weights for features per transformer.\nKeys are transformer names, values the weights.\nRaises ValueError if key not present in ``transformer_list``.\n",
  1806. "name": "transformer_weights"
  1807. },
  1808. {
  1809. "default": false,
  1810. "description": "If True, the time elapsed while fitting each transformer will be\nprinted as it is completed.\n",
  1811. "name": "verbose",
  1812. "type": "boolean"
  1813. },
  1814. {
  1815. "name": "verbose_feature_names_out",
  1816. "default": true,
  1817. "description": "If True, :meth:`get_feature_names_out` will prefix all feature names\nwith the name of the transformer that generated that feature.\nIf False, :meth:`get_feature_names_out` will not prefix any feature\nnames and will error if feature names are not unique.\n\n.. versionadded:: 1.5\n"
  1818. }
  1819. ]
  1820. },
  1821. {
  1822. "name": "sklearn.preprocessing._data.StandardScaler",
  1823. "description": "Standardize features by removing the mean and scaling to unit variance.\n\nThe standard score of a sample `x` is calculated as:\n\nz = (x - u) / s\n\nwhere `u` is the mean of the training samples or zero if `with_mean=False`,\nand `s` is the standard deviation of the training samples or one if\n`with_std=False`.\n\nCentering and scaling happen independently on each feature by computing\nthe relevant statistics on the samples in the training set. Mean and\nstandard deviation are then stored to be used on later data using\n:meth:`transform`.\n\nStandardization of a dataset is a common requirement for many\nmachine learning estimators: they might behave badly if the\nindividual features do not more or less look like standard normally\ndistributed data (e.g. Gaussian with 0 mean and unit variance).\n\nFor instance many elements used in the objective function of\na learning algorithm (such as the RBF kernel of Support Vector\nMachines or the L1 and L2 regularizers of linear models) assume that\nall features are centered around 0 and have variance in the same\norder. If a feature has a variance that is orders of magnitude larger\nthan others, it might dominate the objective function and make the\nestimator unable to learn from other features correctly as expected.\n\n`StandardScaler` is sensitive to outliers, and the features may scale\ndifferently from each other in the presence of outliers. For an example\nvisualization, refer to :ref:`Compare StandardScaler with other scalers\n<plot_all_scaling_standard_scaler_section>`.\n\nThis scaler can also be applied to sparse CSR or CSC matrices by passing\n`with_mean=False` to avoid breaking the sparsity structure of the data.\n\nRead more in the :ref:`User Guide <preprocessing_scaler>`.\n",
  1824. "attributes": [
  1825. {
  1826. "default": true,
  1827. "description": "If False, try to avoid a copy and do inplace scaling instead.\nThis is not guaranteed to always work inplace; e.g. if the data is\nnot a NumPy array or scipy.sparse CSR matrix, a copy may still be\nreturned.\n",
  1828. "name": "copy",
  1829. "optional": true,
  1830. "type": "boolean"
  1831. },
  1832. {
  1833. "default": true,
  1834. "description": "If True, center the data before scaling.\nThis does not work (and will raise an exception) when attempted on\nsparse matrices, because centering them entails building a dense\nmatrix which in common use cases is likely to be too large to fit in\nmemory.\n",
  1835. "name": "with_mean",
  1836. "type": "boolean"
  1837. },
  1838. {
  1839. "default": true,
  1840. "description": "If True, scale the data to unit variance (or equivalently,\nunit standard deviation).\n",
  1841. "name": "with_std",
  1842. "type": "boolean"
  1843. }
  1844. ]
  1845. },
  1846. {
  1847. "name": "sklearn.preprocessing._encoders.OneHotEncoder",
  1848. "description": "\nEncode categorical features as a one-hot numeric array.\n\nThe input to this transformer should be an array-like of integers or\nstrings, denoting the values taken on by categorical (discrete) features.\nThe features are encoded using a one-hot (aka 'one-of-K' or 'dummy')\nencoding scheme. This creates a binary column for each category and\nreturns a sparse matrix or dense array (depending on the ``sparse_output``\nparameter).\n\nBy default, the encoder derives the categories based on the unique values\nin each feature. Alternatively, you can also specify the `categories`\nmanually.\n\nThis encoding is needed for feeding categorical data to many scikit-learn\nestimators, notably linear models and SVMs with the standard kernels.\n\nNote: a one-hot encoding of y labels should use a LabelBinarizer\ninstead.\n\nRead more in the :ref:`User Guide <preprocessing_categorical_features>`.\nFor a comparison of different encoders, refer to:\n:ref:`sphx_glr_auto_examples_preprocessing_plot_target_encoder.py`.\n",
  1849. "attributes": [
  1850. {
  1851. "description": "Categories (unique values) per feature:\n\n- 'auto' : Determine categories automatically from the training data.\n- list : ``categories[i]`` holds the categories expected in the ith\ncolumn. The passed categories should not mix strings and numeric\nvalues within a single feature, and should be sorted in case of\nnumeric values.\n\nThe used categories can be found in the ``categories_`` attribute.\n\n.. versionadded:: 0.20\n",
  1852. "name": "categories",
  1853. "default": "auto"
  1854. },
  1855. {
  1856. "description": "Specifies a methodology to use to drop one of the categories per\nfeature. This is useful in situations where perfectly collinear\nfeatures cause problems, such as when feeding the resulting data\ninto an unregularized linear regression model.\n\nHowever, dropping one category breaks the symmetry of the original\nrepresentation and can therefore induce a bias in downstream models,\nfor instance for penalized linear classification or regression models.\n\n- None : retain all features (the default).\n- 'first' : drop the first category in each feature. If only one\ncategory is present, the feature will be dropped entirely.\n- 'if_binary' : drop the first category in each feature with two\ncategories. Features with 1 or more than 2 categories are\nleft intact.\n- array : ``drop[i]`` is the category in feature ``X[:, i]`` that\nshould be dropped.\n\nWhen `max_categories` or `min_frequency` is configured to group\ninfrequent categories, the dropping behavior is handled after the\ngrouping.\n\n.. versionadded:: 0.21\nThe parameter `drop` was added in 0.21.\n\n.. versionchanged:: 0.23\nThe option `drop='if_binary'` was added in 0.23.\n\n.. versionchanged:: 1.1\nSupport for dropping infrequent categories.\n",
  1857. "name": "drop",
  1858. "default": null
  1859. },
  1860. {
  1861. "default": true,
  1862. "description": "Will return sparse matrix if set True else will return an array.\n\n.. deprecated:: 1.2\n`sparse` is deprecated in 1.2 and will be removed in 1.4. Use\n`sparse_output` instead.\n",
  1863. "name": "sparse",
  1864. "type": "boolean"
  1865. },
  1866. {
  1867. "default": "np.float64",
  1868. "description": "Desired dtype of output.\n",
  1869. "name": "dtype"
  1870. },
  1871. {
  1872. "default": "error",
  1873. "description": "Specifies the way unknown categories are handled during :meth:`transform`.\n\n- 'error' : Raise an error if an unknown category is present during transform.\n- 'ignore' : When an unknown category is encountered during\ntransform, the resulting one-hot encoded columns for this feature\nwill be all zeros. In the inverse transform, an unknown category\nwill be denoted as None.\n- 'infrequent_if_exist' : When an unknown category is encountered\nduring transform, the resulting one-hot encoded columns for this\nfeature will map to the infrequent category if it exists. The\ninfrequent category will be mapped to the last position in the\nencoding. During inverse transform, an unknown category will be\nmapped to the category denoted `'infrequent'` if it exists. If the\n`'infrequent'` category does not exist, then :meth:`transform` and\n:meth:`inverse_transform` will handle an unknown category as with\n`handle_unknown='ignore'`. Infrequent categories exist based on\n`min_frequency` and `max_categories`. Read more in the\n:ref:`User Guide <encoder_infrequent_categories>`.\n\n.. versionchanged:: 1.1\n`'infrequent_if_exist'` was added to automatically handle unknown\ncategories and infrequent categories.\n",
  1874. "name": "handle_unknown"
  1875. },
  1876. {
  1877. "name": "min_frequency",
  1878. "description": "Specifies the minimum frequency below which a category will be\nconsidered infrequent.\n\n- If `int`, categories with a smaller cardinality will be considered\ninfrequent.\n\n- If `float`, categories with a smaller cardinality than\n`min_frequency * n_samples` will be considered infrequent.\n\n.. versionadded:: 1.1\nRead more in the :ref:`User Guide <encoder_infrequent_categories>`.\n",
  1879. "default": null
  1880. },
  1881. {
  1882. "name": "max_categories",
  1883. "description": "Specifies an upper limit to the number of output features for each input\nfeature when considering infrequent categories. If there are infrequent\ncategories, `max_categories` includes the category representing the\ninfrequent categories along with the frequent categories. If `None`,\nthere is no limit to the number of output features.\n\n.. versionadded:: 1.1\nRead more in the :ref:`User Guide <encoder_infrequent_categories>`.\n",
  1884. "type": "int32",
  1885. "default": null
  1886. },
  1887. {
  1888. "name": "sparse_output",
  1889. "default": true,
  1890. "description": "When ``True``, it returns a :class:`scipy.sparse.csr_matrix`,\ni.e. a sparse matrix in \"Compressed Sparse Row\" (CSR) format.\n\n.. versionadded:: 1.2\n`sparse` was renamed to `sparse_output`\n"
  1891. },
  1892. {
  1893. "name": "feature_name_combiner",
  1894. "default": "\"concat\"",
  1895. "description": "Callable with signature `def callable(input_feature, category)` that returns a\nstring. This is used to create feature names to be returned by\n:meth:`get_feature_names_out`.\n\n`\"concat\"` concatenates encoded feature name and category with\n`feature + \"_\" + str(category)`.E.g. feature X with values 1, 6, 7 create\nfeature names `X_1, X_6, X_7`.\n\n.. versionadded:: 1.3\n"
  1896. }
  1897. ]
  1898. },
  1899. {
  1900. "name": "sklearn.preprocessing.Binarizer",
  1901. "description": "Binarize data (set feature values to 0 or 1) according to a threshold.\n\nValues greater than the threshold map to 1, while values less than\nor equal to the threshold map to 0. With the default threshold of 0,\nonly positive values map to 1.\n\nBinarization is a common operation on text count data where the\nanalyst can decide to only consider the presence or absence of a\nfeature rather than a quantified number of occurrences for instance.\n\nIt can also be used as a pre-processing step for estimators that\nconsider boolean random variables (e.g. modelled using the Bernoulli\ndistribution in a Bayesian setting).\n\nRead more in the :ref:`User Guide <preprocessing_binarization>`.\n",
  1902. "attributes": [
  1903. {
  1904. "default": true,
  1905. "description": "Set to False to perform inplace binarization and avoid a copy (if\nthe input is already a numpy array or a scipy.sparse CSR matrix).\n",
  1906. "name": "copy",
  1907. "optional": true,
  1908. "type": "boolean"
  1909. },
  1910. {
  1911. "default": 0.0,
  1912. "description": "Feature values below or equal to this are replaced by 0, above it by 1.\nThreshold may not be less than 0 for operations on sparse matrices.\n",
  1913. "name": "threshold",
  1914. "optional": true,
  1915. "type": "float32"
  1916. }
  1917. ]
  1918. },
  1919. {
  1920. "name": "sklearn.preprocessing.LabelEncoder",
  1921. "description": "Encode target labels with value between 0 and n_classes-1.\n\nThis transformer should be used to encode target values, *i.e.* `y`, and\nnot the input `X`.\n\nRead more in the :ref:`User Guide <preprocessing_targets>`.\n\n.. versionadded:: 0.12\n"
  1922. },
  1923. {
  1924. "name": "sklearn.preprocessing.MultiLabelBinarizer",
  1925. "description": "Transform between iterable of iterables and a multilabel format.\n\nAlthough a list of sets or tuples is a very intuitive format for multilabel\ndata, it is unwieldy to process. This transformer converts between this\nintuitive format and the supported multilabel format: a (samples x classes)\nbinary matrix indicating the presence of a class label.\n",
  1926. "attributes": [
  1927. {
  1928. "default": null,
  1929. "description": "Indicates an ordering for the class labels.\nAll entries should be unique (cannot contain duplicate classes).\n",
  1930. "name": "classes",
  1931. "optional": true
  1932. },
  1933. {
  1934. "default": false,
  1935. "description": "Set to True if output binary array is desired in CSR sparse format.\n",
  1936. "name": "sparse_output",
  1937. "type": "boolean"
  1938. }
  1939. ]
  1940. },
  1941. {
  1942. "name": "sklearn.svm.classes.SVC",
  1943. "description": "C-Support Vector Classification.\n\nThe implementation is based on libsvm. The fit time scales at least\nquadratically with the number of samples and may be impractical\nbeyond tens of thousands of samples. For large datasets\nconsider using :class:`sklearn.svm.LinearSVC` or\n:class:`sklearn.linear_model.SGDClassifier` instead, possibly after a\n:class:`sklearn.kernel_approximation.Nystroem` transformer.\n\nThe multiclass support is handled according to a one-vs-one scheme.\n\nFor details on the precise mathematical formulation of the provided\nkernel functions and how `gamma`, `coef0` and `degree` affect each\nother, see the corresponding section in the narrative documentation:\n:ref:`svm_kernels`.\n\nRead more in the :ref:`User Guide <svm_classification>`.\n",
  1944. "attributes": [
  1945. {
  1946. "default": 1,
  1947. "description": "Regularization parameter. The strength of the regularization is\ninversely proportional to C. Must be strictly positive. The penalty\nis a squared l2 penalty.\n",
  1948. "name": "C",
  1949. "type": "float32"
  1950. },
  1951. {
  1952. "default": "rbf",
  1953. "description": "Specifies the kernel type to be used in the algorithm.\nIt must be one of 'linear', 'poly', 'rbf', 'sigmoid', 'precomputed' or\na callable.\nIf none is given, 'rbf' will be used. If a callable is given it is\nused to pre-compute the kernel matrix from data matrices; that matrix\nshould be an array of shape ``(n_samples, n_samples)``.\n",
  1954. "name": "kernel"
  1955. },
  1956. {
  1957. "default": 3,
  1958. "description": "Degree of the polynomial kernel function ('poly').\nIgnored by all other kernels.\n",
  1959. "name": "degree",
  1960. "type": "int32"
  1961. },
  1962. {
  1963. "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features.\n\n.. versionchanged:: 0.22\nThe default value of ``gamma`` changed from 'auto' to 'scale'.\n",
  1964. "name": "gamma"
  1965. },
  1966. {
  1967. "default": 0,
  1968. "description": "Independent term in kernel function.\nIt is only significant in 'poly' and 'sigmoid'.\n",
  1969. "name": "coef0",
  1970. "type": "float32"
  1971. },
  1972. {
  1973. "default": true,
  1974. "description": "Whether to use the shrinking heuristic.\nSee the :ref:`User Guide <shrinking_svm>`.\n",
  1975. "name": "shrinking",
  1976. "type": "boolean"
  1977. },
  1978. {
  1979. "default": false,
  1980. "description": "Whether to enable probability estimates. This must be enabled prior\nto calling `fit`, will slow down that method as it internally uses\n5-fold cross-validation, and `predict_proba` may be inconsistent with\n`predict`. Read more in the :ref:`User Guide <scores_probabilities>`.\n",
  1981. "name": "probability",
  1982. "type": "boolean"
  1983. },
  1984. {
  1985. "default": 0.001,
  1986. "description": "Tolerance for stopping criterion.\n",
  1987. "name": "tol",
  1988. "type": "float32"
  1989. },
  1990. {
  1991. "default": 200,
  1992. "description": "Specify the size of the kernel cache (in MB).\n",
  1993. "name": "cache_size",
  1994. "type": "float32"
  1995. },
  1996. {
  1997. "default": null,
  1998. "description": "Set the parameter C of class i to class_weight[i]*C for\nSVC. If not given, all classes are supposed to have\nweight one.\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``\n",
  1999. "name": "class_weight"
  2000. },
  2001. {
  2002. "default": false,
  2003. "description": "Enable verbose output. Note that this setting takes advantage of a\nper-process runtime setting in libsvm that, if enabled, may not work\nproperly in a multithreaded context.\n",
  2004. "name": "verbose",
  2005. "type": "boolean"
  2006. },
  2007. {
  2008. "default": -1,
  2009. "description": "Hard limit on iterations within solver, or -1 for no limit.\n",
  2010. "name": "max_iter",
  2011. "type": "int32"
  2012. },
  2013. {
  2014. "default": "ovr",
  2015. "description": "Whether to return a one-vs-rest ('ovr') decision function of shape\n(n_samples, n_classes) as all other classifiers, or the original\none-vs-one ('ovo') decision function of libsvm which has shape\n(n_samples, n_classes * (n_classes - 1) / 2). However, one-vs-one\n('ovo') is always used as multi-class strategy. The parameter is\nignored for binary classification.\n\n.. versionchanged:: 0.19\ndecision_function_shape is 'ovr' by default.\n\n.. versionadded:: 0.17\n*decision_function_shape='ovr'* is recommended.\n\n.. versionchanged:: 0.17\nDeprecated *decision_function_shape='ovo' and None*.\n",
  2016. "name": "decision_function_shape"
  2017. },
  2018. {
  2019. "default": false,
  2020. "description": "If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n:term:`predict` will break ties according to the confidence values of\n:term:`decision_function`; otherwise the first class among the tied\nclasses is returned. Please note that breaking ties comes at a\nrelatively high computational cost compared to a simple predict.\n\n.. versionadded:: 0.22\n",
  2021. "name": "break_ties",
  2022. "type": "boolean"
  2023. },
  2024. {
  2025. "default": null,
  2026. "description": "Controls the pseudo random number generation for shuffling the data for\nprobability estimates. Ignored when `probability` is False.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary <random_state>`.\n",
  2027. "name": "random_state"
  2028. }
  2029. ]
  2030. },
  2031. {
  2032. "name": "sklearn.svm.SVC",
  2033. "description": "C-Support Vector Classification.\n\nThe implementation is based on libsvm. The fit time scales at least\nquadratically with the number of samples and may be impractical\nbeyond tens of thousands of samples. For large datasets\nconsider using :class:`~sklearn.svm.LinearSVC` or\n:class:`~sklearn.linear_model.SGDClassifier` instead, possibly after a\n:class:`~sklearn.kernel_approximation.Nystroem` transformer or\nother :ref:`kernel_approximation`.\n\nThe multiclass support is handled according to a one-vs-one scheme.\n\nFor details on the precise mathematical formulation of the provided\nkernel functions and how `gamma`, `coef0` and `degree` affect each\nother, see the corresponding section in the narrative documentation:\n:ref:`svm_kernels`.\n\nTo learn how to tune SVC's hyperparameters, see the following example:\n:ref:`sphx_glr_auto_examples_model_selection_plot_nested_cross_validation_iris.py`\n\nRead more in the :ref:`User Guide <svm_classification>`.\n",
  2034. "attributes": [
  2035. {
  2036. "default": 1.0,
  2037. "description": "Regularization parameter. The strength of the regularization is\ninversely proportional to C. Must be strictly positive. The penalty\nis a squared l2 penalty. For an intuitive visualization of the effects\nof scaling the regularization parameter C, see\n:ref:`sphx_glr_auto_examples_svm_plot_svm_scale_c.py`.\n",
  2038. "name": "C",
  2039. "optional": true,
  2040. "type": "float32"
  2041. },
  2042. {
  2043. "default": "rbf",
  2044. "description": "Specifies the kernel type to be used in the algorithm. If\nnone is given, 'rbf' will be used. If a callable is given it is used to\npre-compute the kernel matrix from data matrices; that matrix should be\nan array of shape ``(n_samples, n_samples)``. For an intuitive\nvisualization of different kernel types see\n:ref:`sphx_glr_auto_examples_svm_plot_svm_kernels.py`.\n",
  2045. "name": "kernel",
  2046. "optional": true,
  2047. "type": "string"
  2048. },
  2049. {
  2050. "default": 3,
  2051. "description": "Degree of the polynomial kernel function ('poly').\nMust be non-negative. Ignored by all other kernels.\n",
  2052. "name": "degree",
  2053. "optional": true,
  2054. "type": "int32"
  2055. },
  2056. {
  2057. "default": "scale",
  2058. "description": "Kernel coefficient for 'rbf', 'poly' and 'sigmoid'.\n\n- if ``gamma='scale'`` (default) is passed then it uses\n1 / (n_features * X.var()) as value of gamma,\n- if 'auto', uses 1 / n_features\n- if float, must be non-negative.\n\n.. versionchanged:: 0.22\nThe default value of ``gamma`` changed from 'auto' to 'scale'.\n",
  2059. "name": "gamma",
  2060. "optional": true,
  2061. "type": "float32"
  2062. },
  2063. {
  2064. "default": 0.0,
  2065. "description": "Independent term in kernel function.\nIt is only significant in 'poly' and 'sigmoid'.\n",
  2066. "name": "coef0",
  2067. "optional": true,
  2068. "type": "float32"
  2069. },
  2070. {
  2071. "default": false,
  2072. "description": "Whether to enable probability estimates. This must be enabled prior\nto calling `fit`, will slow down that method as it internally uses\n5-fold cross-validation, and `predict_proba` may be inconsistent with\n`predict`. Read more in the :ref:`User Guide <scores_probabilities>`.\n",
  2073. "name": "probability",
  2074. "optional": true,
  2075. "type": "boolean"
  2076. },
  2077. {
  2078. "default": true,
  2079. "description": "Whether to use the shrinking heuristic.\nSee the :ref:`User Guide <shrinking_svm>`.\n",
  2080. "name": "shrinking",
  2081. "optional": true,
  2082. "type": "boolean"
  2083. },
  2084. {
  2085. "default": 0.001,
  2086. "description": "Tolerance for stopping criterion.\n",
  2087. "name": "tol",
  2088. "optional": true,
  2089. "type": "float32"
  2090. },
  2091. {
  2092. "default": 200.0,
  2093. "description": "Specify the size of the kernel cache (in MB).\n",
  2094. "name": "cache_size",
  2095. "optional": true,
  2096. "type": "float32"
  2097. },
  2098. {
  2099. "default": null,
  2100. "description": "Set the parameter C of class i to class_weight[i]*C for\nSVC. If not given, all classes are supposed to have\nweight one.\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``.\n",
  2101. "name": "class_weight",
  2102. "optional": true
  2103. },
  2104. {
  2105. "default": false,
  2106. "description": "Enable verbose output. Note that this setting takes advantage of a\nper-process runtime setting in libsvm that, if enabled, may not work\nproperly in a multithreaded context.\n",
  2107. "name": "verbose",
  2108. "type": "boolean"
  2109. },
  2110. {
  2111. "default": -1,
  2112. "description": "Hard limit on iterations within solver, or -1 for no limit.\n",
  2113. "name": "max_iter",
  2114. "optional": true,
  2115. "type": "int32"
  2116. },
  2117. {
  2118. "default": "ovr",
  2119. "description": "Whether to return a one-vs-rest ('ovr') decision function of shape\n(n_samples, n_classes) as all other classifiers, or the original\none-vs-one ('ovo') decision function of libsvm which has shape\n(n_samples, n_classes * (n_classes - 1) / 2). However, note that\ninternally, one-vs-one ('ovo') is always used as a multi-class strategy\nto train models; an ovr matrix is only constructed from the ovo matrix.\nThe parameter is ignored for binary classification.\n\n.. versionchanged:: 0.19\ndecision_function_shape is 'ovr' by default.\n\n.. versionadded:: 0.17\n*decision_function_shape='ovr'* is recommended.\n\n.. versionchanged:: 0.17\nDeprecated *decision_function_shape='ovo' and None*.\n",
  2120. "name": "decision_function_shape"
  2121. },
  2122. {
  2123. "default": null,
  2124. "description": "Controls the pseudo random number generation for shuffling the data for\nprobability estimates. Ignored when `probability` is False.\nPass an int for reproducible output across multiple function calls.\nSee :term:`Glossary <random_state>`.\n",
  2125. "name": "random_state",
  2126. "optional": true,
  2127. "type": "int32"
  2128. },
  2129. {
  2130. "default": false,
  2131. "description": "If true, ``decision_function_shape='ovr'``, and number of classes > 2,\n:term:`predict` will break ties according to the confidence values of\n:term:`decision_function`; otherwise the first class among the tied\nclasses is returned. Please note that breaking ties comes at a\nrelatively high computational cost compared to a simple predict.\n\n.. versionadded:: 0.22\n",
  2132. "name": "break_ties",
  2133. "optional": true,
  2134. "type": "boolean"
  2135. }
  2136. ]
  2137. },
  2138. {
  2139. "name": "sklearn.tree.tree.DecisionTreeClassifier",
  2140. "description": "A decision tree classifier.\n\nRead more in the :ref:`User Guide <tree>`.\n",
  2141. "attributes": [
  2142. {
  2143. "default": "\"gini\"",
  2144. "description": "The function to measure the quality of a split. Supported criteria are\n\"gini\" for the Gini impurity and \"entropy\" for the information gain.\n",
  2145. "name": "criterion"
  2146. },
  2147. {
  2148. "default": "\"best\"",
  2149. "description": "The strategy used to choose the split at each node. Supported\nstrategies are \"best\" to choose the best split and \"random\" to choose\nthe best random split.\n",
  2150. "name": "splitter"
  2151. },
  2152. {
  2153. "default": null,
  2154. "description": "The maximum depth of the tree. If None, then nodes are expanded until\nall leaves are pure or until all leaves contain less than\nmin_samples_split samples.\n",
  2155. "name": "max_depth",
  2156. "type": "int32"
  2157. },
  2158. {
  2159. "default": "2",
  2160. "description": "The minimum number of samples required to split an internal node:\n\n- If int, then consider `min_samples_split` as the minimum number.\n- If float, then `min_samples_split` is a fraction and\n`ceil(min_samples_split * n_samples)` are the minimum\nnumber of samples for each split.\n\n.. versionchanged:: 0.18\nAdded float values for fractions.\n",
  2161. "name": "min_samples_split"
  2162. },
  2163. {
  2164. "default": "1",
  2165. "description": "The minimum number of samples required to be at a leaf node.\nA split point at any depth will only be considered if it leaves at\nleast ``min_samples_leaf`` training samples in each of the left and\nright branches. This may have the effect of smoothing the model,\nespecially in regression.\n\n- If int, then consider `min_samples_leaf` as the minimum number.\n- If float, then `min_samples_leaf` is a fraction and\n`ceil(min_samples_leaf * n_samples)` are the minimum\nnumber of samples for each node.\n\n.. versionchanged:: 0.18\nAdded float values for fractions.\n",
  2166. "name": "min_samples_leaf"
  2167. },
  2168. {
  2169. "default": 0,
  2170. "description": "The minimum weighted fraction of the sum total of weights (of all\nthe input samples) required to be at a leaf node. Samples have\nequal weight when sample_weight is not provided.\n",
  2171. "name": "min_weight_fraction_leaf",
  2172. "type": "float32"
  2173. },
  2174. {
  2175. "default": null,
  2176. "description": "The number of features to consider when looking for the best split:\n\n- If int, then consider `max_features` features at each split.\n- If float, then `max_features` is a fraction and\n`int(max_features * n_features)` features are considered at each\nsplit.\n- If \"auto\", then `max_features=sqrt(n_features)`.\n- If \"sqrt\", then `max_features=sqrt(n_features)`.\n- If \"log2\", then `max_features=log2(n_features)`.\n- If None, then `max_features=n_features`.\n\nNote: the search for a split does not stop until at least one\nvalid partition of the node samples is found, even if it requires to\neffectively inspect more than ``max_features`` features.\n",
  2177. "name": "max_features",
  2178. "type": "int32"
  2179. },
  2180. {
  2181. "default": null,
  2182. "description": "Controls the randomness of the estimator. The features are always\nrandomly permuted at each split, even if ``splitter`` is set to\n``\"best\"``. When ``max_features < n_features``, the algorithm will\nselect ``max_features`` at random at each split before finding the best\nsplit among them. But the best found split may vary across different\nruns, even if ``max_features=n_features``. That is the case, if the\nimprovement of the criterion is identical for several splits and one\nsplit has to be selected at random. To obtain a deterministic behaviour\nduring fitting, ``random_state`` has to be fixed to an integer.\nSee :term:`Glossary <random_state>` for details.\n",
  2183. "name": "random_state",
  2184. "type": "int32"
  2185. },
  2186. {
  2187. "default": null,
  2188. "description": "Grow a tree with ``max_leaf_nodes`` in best-first fashion.\nBest nodes are defined as relative reduction in impurity.\nIf None then unlimited number of leaf nodes.\n",
  2189. "name": "max_leaf_nodes",
  2190. "type": "int32"
  2191. },
  2192. {
  2193. "default": 0,
  2194. "description": "A node will be split if this split induces a decrease of the impurity\ngreater than or equal to this value.\n\nThe weighted impurity decrease equation is the following::\n\nN_t / N * (impurity - N_t_R / N_t * right_impurity\n- N_t_L / N_t * left_impurity)\n\nwhere ``N`` is the total number of samples, ``N_t`` is the number of\nsamples at the current node, ``N_t_L`` is the number of samples in the\nleft child, and ``N_t_R`` is the number of samples in the right child.\n\n``N``, ``N_t``, ``N_t_R`` and ``N_t_L`` all refer to the weighted sum,\nif ``sample_weight`` is passed.\n\n.. versionadded:: 0.19\n",
  2195. "name": "min_impurity_decrease",
  2196. "type": "float32"
  2197. },
  2198. {
  2199. "default": 0,
  2200. "description": "Threshold for early stopping in tree growth. A node will split\nif its impurity is above the threshold, otherwise it is a leaf.\n\n.. deprecated:: 0.19\n``min_impurity_split`` has been deprecated in favor of\n``min_impurity_decrease`` in 0.19. The default value of\n``min_impurity_split`` has changed from 1e-7 to 0 in 0.23 and it\nwill be removed in 0.25. Use ``min_impurity_decrease`` instead.\n",
  2201. "name": "min_impurity_split",
  2202. "type": "float32"
  2203. },
  2204. {
  2205. "default": null,
  2206. "description": "Weights associated with classes in the form ``{class_label: weight}``.\nIf None, all classes are supposed to have weight one. For\nmulti-output problems, a list of dicts can be provided in the same\norder as the columns of y.\n\nNote that for multioutput (including multilabel) weights should be\ndefined for each class of every column in its own dict. For example,\nfor four-class multilabel classification weights should be\n[{0: 1, 1: 1}, {0: 1, 1: 5}, {0: 1, 1: 1}, {0: 1, 1: 1}] instead of\n[{1:1}, {2:5}, {3:1}, {4:1}].\n\nThe \"balanced\" mode uses the values of y to automatically adjust\nweights inversely proportional to class frequencies in the input data\nas ``n_samples / (n_classes * np.bincount(y))``\n\nFor multi-output, the weights of each column of y will be multiplied.\n\nNote that these weights will be multiplied with sample_weight (passed\nthrough the fit method) if sample_weight is specified.\n",
  2207. "name": "class_weight"
  2208. },
  2209. {
  2210. "default": "deprecated",
  2211. "description": "This parameter is deprecated and will be removed in v0.24.\n\n.. deprecated:: 0.22\n",
  2212. "name": "presort"
  2213. },
  2214. {
  2215. "default": "0.0",
  2216. "description": "Complexity parameter used for Minimal Cost-Complexity Pruning. The\nsubtree with the largest cost complexity that is smaller than\n``ccp_alpha`` will be chosen. By default, no pruning is performed. See\n:ref:`minimal_cost_complexity_pruning` for details.\n\n.. versionadded:: 0.22\n",
  2217. "name": "ccp_alpha"
  2218. }
  2219. ]
  2220. }
  2221. ]