Learning Apache Spark with Python
1. Preface
2. Why Spark with Python ?
3. Configure Running Platform
4. An Introduction to Apache Spark
5. Programming with RDDs
6. Statistics and Linear Algebra Preliminaries
7. Data Exploration
8. Data Manipulation: Features
9. Regression
10. Regularization
11. Classification
12. Clustering
13. RFM Analysis
14. Text Mining
15. Social Network Analysis
16. ALS: Stock Portfolio Recommendations
17. Monte Carlo Simulation
18. Markov Chain Monte Carlo
19. Neural Network
20. Automation for Cloudera Distribution Hadoop
21. Wrap PySpark Package
22. PySpark Data Audit Library
23. Zeppelin to jupyter notebook
24. My Cheat Sheet
25. JDBC Connection
26. Databricks Tips
27. PySpark API
28. Main Reference
Learning Apache Spark with Python
Docs
»
Index
Index
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
K
|
L
|
M
|
N
|
O
|
P
|
R
|
S
|
T
|
U
|
V
|
W
A
accuracy() (pyspark.ml.classification.LogisticRegressionSummary property)
addGrid() (pyspark.ml.tuning.ParamGridBuilder method)
AFTSurvivalRegression (class in pyspark.ml.regression)
AFTSurvivalRegressionModel (class in pyspark.ml.regression)
aic() (pyspark.ml.regression.GeneralizedLinearRegressionSummary property)
ALS (class in pyspark.ml.recommendation)
ALSModel (class in pyspark.ml.recommendation)
areaUnderROC() (pyspark.ml.classification.BinaryLogisticRegressionSummary property)
assignClusters() (pyspark.ml.clustering.PowerIterationClustering method)
avgMetrics (pyspark.ml.tuning.CrossValidatorModel attribute)
B
baseOn() (pyspark.ml.tuning.ParamGridBuilder method)
bestModel (pyspark.ml.tuning.CrossValidatorModel attribute)
(pyspark.ml.tuning.TrainValidationSplitModel attribute)
BinaryClassificationEvaluator (class in pyspark.ml.evaluation)
BinaryLogisticRegressionSummary (class in pyspark.ml.classification)
BinaryLogisticRegressionTrainingSummary (class in pyspark.ml.classification)
BisectingKMeans (class in pyspark.ml.clustering)
BisectingKMeansModel (class in pyspark.ml.clustering)
BisectingKMeansSummary (class in pyspark.ml.clustering)
boundaries() (pyspark.ml.regression.IsotonicRegressionModel property)
build() (pyspark.ml.tuning.ParamGridBuilder method)
C
ChiSquareTest (class in pyspark.ml.stat)
clusterCenters() (pyspark.ml.clustering.BisectingKMeansModel method)
(pyspark.ml.clustering.KMeansModel method)
ClusteringEvaluator (class in pyspark.ml.evaluation)
coefficientMatrix() (pyspark.ml.classification.LogisticRegressionModel property)
coefficients() (pyspark.ml.classification.LinearSVCModel property)
(pyspark.ml.classification.LogisticRegressionModel property)
(pyspark.ml.regression.AFTSurvivalRegressionModel property)
(pyspark.ml.regression.GeneralizedLinearRegressionModel property)
(pyspark.ml.regression.LinearRegressionModel property)
coefficientStandardErrors() (pyspark.ml.regression.GeneralizedLinearRegressionTrainingSummary property)
(pyspark.ml.regression.LinearRegressionSummary property)
computeCost() (pyspark.ml.clustering.BisectingKMeansModel method)
(pyspark.ml.clustering.KMeansModel method)
Configure Spark on Mac and Ubuntu
copy() (pyspark.ml.classification.OneVsRest method)
(pyspark.ml.classification.OneVsRestModel method)
(pyspark.ml.pipeline.Pipeline method)
(pyspark.ml.pipeline.PipelineModel method)
(pyspark.ml.tuning.CrossValidator method)
(pyspark.ml.tuning.CrossValidatorModel method)
(pyspark.ml.tuning.TrainValidationSplit method)
(pyspark.ml.tuning.TrainValidationSplitModel method)
corr() (pyspark.ml.stat.Correlation static method)
Correlation (class in pyspark.ml.stat)
count() (pyspark.ml.stat.Summarizer static method)
CrossValidator (class in pyspark.ml.tuning)
CrossValidatorModel (class in pyspark.ml.tuning)
D
DecisionTreeClassificationModel (class in pyspark.ml.classification)
DecisionTreeClassifier (class in pyspark.ml.classification)
DecisionTreeRegressionModel (class in pyspark.ml.regression)
DecisionTreeRegressor (class in pyspark.ml.regression)
degreesOfFreedom() (pyspark.ml.regression.GeneralizedLinearRegressionSummary property)
(pyspark.ml.regression.LinearRegressionSummary property)
describeTopics() (pyspark.ml.clustering.LDAModel method)
deviance() (pyspark.ml.regression.GeneralizedLinearRegressionSummary property)
devianceResiduals() (pyspark.ml.regression.LinearRegressionSummary property)
dispersion() (pyspark.ml.regression.GeneralizedLinearRegressionSummary property)
DistributedLDAModel (class in pyspark.ml.clustering)
E
estimatedDocConcentration() (pyspark.ml.clustering.LDAModel method)
evaluate() (pyspark.ml.classification.LogisticRegressionModel method)
(pyspark.ml.evaluation.Evaluator method)
(pyspark.ml.regression.GeneralizedLinearRegressionModel method)
(pyspark.ml.regression.LinearRegressionModel method)
evaluateEachIteration() (pyspark.ml.classification.GBTClassificationModel method)
(pyspark.ml.regression.GBTRegressionModel method)
Evaluator (class in pyspark.ml.evaluation)
explainedVariance() (pyspark.ml.regression.LinearRegressionSummary property)
F
falsePositiveRateByLabel() (pyspark.ml.classification.LogisticRegressionSummary property)
featureImportances() (pyspark.ml.classification.DecisionTreeClassificationModel property)
(pyspark.ml.classification.GBTClassificationModel property)
(pyspark.ml.classification.RandomForestClassificationModel property)
(pyspark.ml.regression.DecisionTreeRegressionModel property)
(pyspark.ml.regression.GBTRegressionModel property)
(pyspark.ml.regression.RandomForestRegressionModel property)
featuresCol() (pyspark.ml.classification.LogisticRegressionSummary property)
(pyspark.ml.regression.LinearRegressionSummary property)
fMeasureByLabel() (pyspark.ml.classification.LogisticRegressionSummary method)
fMeasureByThreshold() (pyspark.ml.classification.BinaryLogisticRegressionSummary property)
G
GaussianMixture (class in pyspark.ml.clustering)
GaussianMixtureModel (class in pyspark.ml.clustering)
GaussianMixtureSummary (class in pyspark.ml.clustering)
gaussiansDF() (pyspark.ml.clustering.GaussianMixtureModel property)
GBTClassificationModel (class in pyspark.ml.classification)
GBTClassifier (class in pyspark.ml.classification)
GBTRegressionModel (class in pyspark.ml.regression)
GBTRegressor (class in pyspark.ml.regression)
GeneralizedLinearRegression (class in pyspark.ml.regression)
GeneralizedLinearRegressionModel (class in pyspark.ml.regression)
GeneralizedLinearRegressionSummary (class in pyspark.ml.regression)
GeneralizedLinearRegressionTrainingSummary (class in pyspark.ml.regression)
getAlpha() (pyspark.ml.recommendation.ALS method)
getBlockSize() (pyspark.ml.classification.MultilayerPerceptronClassifier method)
getCensorCol() (pyspark.ml.regression.AFTSurvivalRegression method)
getCheckpointFiles() (pyspark.ml.clustering.DistributedLDAModel method)
getColdStartStrategy() (pyspark.ml.recommendation.ALS method)
getDistanceMeasure() (pyspark.ml.clustering.BisectingKMeans method)
(pyspark.ml.clustering.KMeans method)
(pyspark.ml.evaluation.ClusteringEvaluator method)
getDocConcentration() (pyspark.ml.clustering.LDA method)
getDstCol() (pyspark.ml.clustering.PowerIterationClustering method)
getEpsilon() (pyspark.ml.regression.LinearRegression method)
getFamily() (pyspark.ml.classification.LogisticRegression method)
(pyspark.ml.regression.GeneralizedLinearRegression method)
getFeatureIndex() (pyspark.ml.regression.IsotonicRegression method)
getFinalStorageLevel() (pyspark.ml.recommendation.ALS method)
getImplicitPrefs() (pyspark.ml.recommendation.ALS method)
getInitialWeights() (pyspark.ml.classification.MultilayerPerceptronClassifier method)
getInitMode() (pyspark.ml.clustering.KMeans method)
(pyspark.ml.clustering.PowerIterationClustering method)
getInitSteps() (pyspark.ml.clustering.KMeans method)
getIntermediateStorageLevel() (pyspark.ml.recommendation.ALS method)
getIsotonic() (pyspark.ml.regression.IsotonicRegression method)
getItemCol() (pyspark.ml.recommendation.ALS method)
getK() (pyspark.ml.clustering.BisectingKMeans method)
(pyspark.ml.clustering.GaussianMixture method)
(pyspark.ml.clustering.KMeans method)
(pyspark.ml.clustering.LDA method)
(pyspark.ml.clustering.PowerIterationClustering method)
getKeepLastCheckpoint() (pyspark.ml.clustering.LDA method)
getLayers() (pyspark.ml.classification.MultilayerPerceptronClassifier method)
getLearningDecay() (pyspark.ml.clustering.LDA method)
getLearningOffset() (pyspark.ml.clustering.LDA method)
getLink() (pyspark.ml.regression.GeneralizedLinearRegression method)
getLinkPower() (pyspark.ml.regression.GeneralizedLinearRegression method)
getLinkPredictionCol() (pyspark.ml.regression.GeneralizedLinearRegression method)
getLossType() (pyspark.ml.classification.GBTClassifier method)
(pyspark.ml.regression.GBTRegressor method)
getLowerBoundsOnCoefficients() (pyspark.ml.classification.LogisticRegression method)
getLowerBoundsOnIntercepts() (pyspark.ml.classification.LogisticRegression method)
getMetricName() (pyspark.ml.evaluation.BinaryClassificationEvaluator method)
(pyspark.ml.evaluation.ClusteringEvaluator method)
(pyspark.ml.evaluation.MulticlassClassificationEvaluator method)
(pyspark.ml.evaluation.RegressionEvaluator method)
getMinDivisibleClusterSize() (pyspark.ml.clustering.BisectingKMeans method)
getModelType() (pyspark.ml.classification.NaiveBayes method)
getNonnegative() (pyspark.ml.recommendation.ALS method)
getNumFolds() (pyspark.ml.tuning.CrossValidator method)
getNumItemBlocks() (pyspark.ml.recommendation.ALS method)
getNumUserBlocks() (pyspark.ml.recommendation.ALS method)
getOffsetCol() (pyspark.ml.regression.GeneralizedLinearRegression method)
getOptimizeDocConcentration() (pyspark.ml.clustering.LDA method)
getOptimizer() (pyspark.ml.clustering.LDA method)
getQuantileProbabilities() (pyspark.ml.regression.AFTSurvivalRegression method)
getQuantilesCol() (pyspark.ml.regression.AFTSurvivalRegression method)
getRank() (pyspark.ml.recommendation.ALS method)
getRatingCol() (pyspark.ml.recommendation.ALS method)
getSmoothing() (pyspark.ml.classification.NaiveBayes method)
getSrcCol() (pyspark.ml.clustering.PowerIterationClustering method)
getStagePath() (pyspark.ml.pipeline.PipelineSharedReadWrite static method)
getStages() (pyspark.ml.pipeline.Pipeline method)
getStepSize() (pyspark.ml.classification.MultilayerPerceptronClassifier method)
getSubsamplingRate() (pyspark.ml.clustering.LDA method)
getThreshold() (pyspark.ml.classification.LogisticRegression method)
getThresholds() (pyspark.ml.classification.LogisticRegression method)
getTopicConcentration() (pyspark.ml.clustering.LDA method)
getTopicDistributionCol() (pyspark.ml.clustering.LDA method)
getTrainRatio() (pyspark.ml.tuning.TrainValidationSplit method)
getUpperBoundsOnCoefficients() (pyspark.ml.classification.LogisticRegression method)
getUpperBoundsOnIntercepts() (pyspark.ml.classification.LogisticRegression method)
getUserCol() (pyspark.ml.recommendation.ALS method)
getVariancePower() (pyspark.ml.regression.GeneralizedLinearRegression method)
H
hasSummary() (pyspark.ml.classification.LogisticRegressionModel property)
(pyspark.ml.clustering.BisectingKMeansModel property)
(pyspark.ml.clustering.GaussianMixtureModel property)
(pyspark.ml.clustering.KMeansModel property)
(pyspark.ml.regression.GeneralizedLinearRegressionModel property)
(pyspark.ml.regression.LinearRegressionModel property)
I
intercept() (pyspark.ml.classification.LinearSVCModel property)
(pyspark.ml.classification.LogisticRegressionModel property)
(pyspark.ml.regression.AFTSurvivalRegressionModel property)
(pyspark.ml.regression.GeneralizedLinearRegressionModel property)
(pyspark.ml.regression.LinearRegressionModel property)
interceptVector() (pyspark.ml.classification.LogisticRegressionModel property)
isDistributed() (pyspark.ml.clustering.LDAModel method)
isLargerBetter() (pyspark.ml.evaluation.Evaluator method)
IsotonicRegression (class in pyspark.ml.regression)
IsotonicRegressionModel (class in pyspark.ml.regression)
itemFactors() (pyspark.ml.recommendation.ALSModel property)
K
KMeans (class in pyspark.ml.clustering)
KMeansModel (class in pyspark.ml.clustering)
KolmogorovSmirnovTest (class in pyspark.ml.stat)
L
labelCol() (pyspark.ml.classification.LogisticRegressionSummary property)
(pyspark.ml.regression.LinearRegressionSummary property)
labels() (pyspark.ml.classification.LogisticRegressionSummary property)
layers() (pyspark.ml.classification.MultilayerPerceptronClassificationModel property)
LDA (class in pyspark.ml.clustering)
LDAModel (class in pyspark.ml.clustering)
LinearRegression (class in pyspark.ml.regression)
LinearRegressionModel (class in pyspark.ml.regression)
LinearRegressionSummary (class in pyspark.ml.regression)
LinearRegressionTrainingSummary (class in pyspark.ml.regression)
LinearSVC (class in pyspark.ml.classification)
LinearSVCModel (class in pyspark.ml.classification)
load() (pyspark.ml.pipeline.PipelineModelReader method)
(pyspark.ml.pipeline.PipelineReader method)
(pyspark.ml.pipeline.PipelineSharedReadWrite static method)
LocalLDAModel (class in pyspark.ml.clustering)
LogisticRegression (class in pyspark.ml.classification)
LogisticRegressionModel (class in pyspark.ml.classification)
LogisticRegressionSummary (class in pyspark.ml.classification)
LogisticRegressionTrainingSummary (class in pyspark.ml.classification)
logLikelihood() (pyspark.ml.clustering.GaussianMixtureSummary property)
(pyspark.ml.clustering.LDAModel method)
logPerplexity() (pyspark.ml.clustering.LDAModel method)
logPrior() (pyspark.ml.clustering.DistributedLDAModel method)
M
max() (pyspark.ml.stat.Summarizer static method)
mean() (pyspark.ml.stat.Summarizer static method)
meanAbsoluteError() (pyspark.ml.regression.LinearRegressionSummary property)
meanSquaredError() (pyspark.ml.regression.LinearRegressionSummary property)
metrics() (pyspark.ml.stat.Summarizer static method)
min() (pyspark.ml.stat.Summarizer static method)
module
pyspark.ml.classification
pyspark.ml.clustering
pyspark.ml.evaluation
pyspark.ml.pipeline
pyspark.ml.recommendation
pyspark.ml.regression
pyspark.ml.stat
pyspark.ml.tuning
MulticlassClassificationEvaluator (class in pyspark.ml.evaluation)
MultilayerPerceptronClassificationModel (class in pyspark.ml.classification)
MultilayerPerceptronClassifier (class in pyspark.ml.classification)
N
NaiveBayes (class in pyspark.ml.classification)
NaiveBayesModel (class in pyspark.ml.classification)
normL1() (pyspark.ml.stat.Summarizer static method)
normL2() (pyspark.ml.stat.Summarizer static method)
nullDeviance() (pyspark.ml.regression.GeneralizedLinearRegressionSummary property)
numInstances() (pyspark.ml.regression.GeneralizedLinearRegressionSummary property)
(pyspark.ml.regression.LinearRegressionSummary property)
numIterations() (pyspark.ml.regression.GeneralizedLinearRegressionTrainingSummary property)
numNonZeros() (pyspark.ml.stat.Summarizer static method)
O
objectiveHistory() (pyspark.ml.classification.LogisticRegressionTrainingSummary property)
(pyspark.ml.regression.LinearRegressionTrainingSummary property)
OneVsRest (class in pyspark.ml.classification)
OneVsRestModel (class in pyspark.ml.classification)
P
ParamGridBuilder (class in pyspark.ml.tuning)
pi() (pyspark.ml.classification.NaiveBayesModel property)
Pipeline (class in pyspark.ml.pipeline)
PipelineModel (class in pyspark.ml.pipeline)
PipelineModelReader (class in pyspark.ml.pipeline)
PipelineModelWriter (class in pyspark.ml.pipeline)
PipelineReader (class in pyspark.ml.pipeline)
PipelineSharedReadWrite (class in pyspark.ml.pipeline)
PipelineWriter (class in pyspark.ml.pipeline)
PowerIterationClustering (class in pyspark.ml.clustering)
pr() (pyspark.ml.classification.BinaryLogisticRegressionSummary property)
precisionByLabel() (pyspark.ml.classification.LogisticRegressionSummary property)
precisionByThreshold() (pyspark.ml.classification.BinaryLogisticRegressionSummary property)
predict() (pyspark.ml.regression.AFTSurvivalRegressionModel method)
predictionCol() (pyspark.ml.classification.LogisticRegressionSummary property)
(pyspark.ml.regression.GeneralizedLinearRegressionSummary property)
(pyspark.ml.regression.LinearRegressionSummary property)
predictions() (pyspark.ml.classification.LogisticRegressionSummary property)
(pyspark.ml.regression.GeneralizedLinearRegressionSummary property)
(pyspark.ml.regression.IsotonicRegressionModel property)
(pyspark.ml.regression.LinearRegressionSummary property)
predictQuantiles() (pyspark.ml.regression.AFTSurvivalRegressionModel method)
probability() (pyspark.ml.clustering.GaussianMixtureSummary property)
probabilityCol() (pyspark.ml.classification.LogisticRegressionSummary property)
(pyspark.ml.clustering.GaussianMixtureSummary property)
pValues() (pyspark.ml.regression.GeneralizedLinearRegressionTrainingSummary property)
(pyspark.ml.regression.LinearRegressionSummary property)
pyspark.ml.classification
module
pyspark.ml.clustering
module
pyspark.ml.evaluation
module
pyspark.ml.pipeline
module
pyspark.ml.recommendation
module
pyspark.ml.regression
module
pyspark.ml.stat
module
pyspark.ml.tuning
module
R
r2() (pyspark.ml.regression.LinearRegressionSummary property)
r2adj() (pyspark.ml.regression.LinearRegressionSummary property)
RandomForestClassificationModel (class in pyspark.ml.classification)
RandomForestClassifier (class in pyspark.ml.classification)
RandomForestRegressionModel (class in pyspark.ml.regression)
RandomForestRegressor (class in pyspark.ml.regression)
rank() (pyspark.ml.recommendation.ALSModel property)
(pyspark.ml.regression.GeneralizedLinearRegressionSummary property)
read() (pyspark.ml.pipeline.Pipeline class method)
(pyspark.ml.pipeline.PipelineModel class method)
(pyspark.ml.tuning.CrossValidator class method)
(pyspark.ml.tuning.CrossValidatorModel class method)
(pyspark.ml.tuning.TrainValidationSplit class method)
(pyspark.ml.tuning.TrainValidationSplitModel class method)
recallByLabel() (pyspark.ml.classification.LogisticRegressionSummary property)
recallByThreshold() (pyspark.ml.classification.BinaryLogisticRegressionSummary property)
recommendForAllItems() (pyspark.ml.recommendation.ALSModel method)
recommendForAllUsers() (pyspark.ml.recommendation.ALSModel method)
recommendForItemSubset() (pyspark.ml.recommendation.ALSModel method)
recommendForUserSubset() (pyspark.ml.recommendation.ALSModel method)
RegressionEvaluator (class in pyspark.ml.evaluation)
residualDegreeOfFreedom() (pyspark.ml.regression.GeneralizedLinearRegressionSummary property)
residualDegreeOfFreedomNull() (pyspark.ml.regression.GeneralizedLinearRegressionSummary property)
residuals() (pyspark.ml.regression.GeneralizedLinearRegressionSummary method)
(pyspark.ml.regression.LinearRegressionSummary property)
roc() (pyspark.ml.classification.BinaryLogisticRegressionSummary property)
rootMeanSquaredError() (pyspark.ml.regression.LinearRegressionSummary property)
Run on Databricks Community Cloud
S
saveImpl() (pyspark.ml.pipeline.PipelineModelWriter method)
(pyspark.ml.pipeline.PipelineSharedReadWrite static method)
(pyspark.ml.pipeline.PipelineWriter method)
scale() (pyspark.ml.regression.AFTSurvivalRegressionModel property)
(pyspark.ml.regression.LinearRegressionModel property)
Set up Spark on Cloud
setAlpha() (pyspark.ml.recommendation.ALS method)
setBlockSize() (pyspark.ml.classification.MultilayerPerceptronClassifier method)
setCensorCol() (pyspark.ml.regression.AFTSurvivalRegression method)
setColdStartStrategy() (pyspark.ml.recommendation.ALS method)
setDistanceMeasure() (pyspark.ml.clustering.BisectingKMeans method)
(pyspark.ml.clustering.KMeans method)
(pyspark.ml.evaluation.ClusteringEvaluator method)
setDocConcentration() (pyspark.ml.clustering.LDA method)
setDstCol() (pyspark.ml.clustering.PowerIterationClustering method)
setEpsilon() (pyspark.ml.regression.LinearRegression method)
setFamily() (pyspark.ml.classification.LogisticRegression method)
(pyspark.ml.regression.GeneralizedLinearRegression method)
setFeatureIndex() (pyspark.ml.regression.IsotonicRegression method)
setFeatureSubsetStrategy() (pyspark.ml.classification.GBTClassifier method)
(pyspark.ml.classification.RandomForestClassifier method)
(pyspark.ml.regression.GBTRegressor method)
(pyspark.ml.regression.RandomForestRegressor method)
setFinalStorageLevel() (pyspark.ml.recommendation.ALS method)
setImplicitPrefs() (pyspark.ml.recommendation.ALS method)
setInitialWeights() (pyspark.ml.classification.MultilayerPerceptronClassifier method)
setInitMode() (pyspark.ml.clustering.KMeans method)
(pyspark.ml.clustering.PowerIterationClustering method)
setInitSteps() (pyspark.ml.clustering.KMeans method)
setIntermediateStorageLevel() (pyspark.ml.recommendation.ALS method)
setIsotonic() (pyspark.ml.regression.IsotonicRegression method)
setItemCol() (pyspark.ml.recommendation.ALS method)
setK() (pyspark.ml.clustering.BisectingKMeans method)
(pyspark.ml.clustering.GaussianMixture method)
(pyspark.ml.clustering.KMeans method)
(pyspark.ml.clustering.LDA method)
(pyspark.ml.clustering.PowerIterationClustering method)
setKeepLastCheckpoint() (pyspark.ml.clustering.LDA method)
setLayers() (pyspark.ml.classification.MultilayerPerceptronClassifier method)
setLearningDecay() (pyspark.ml.clustering.LDA method)
setLearningOffset() (pyspark.ml.clustering.LDA method)
setLink() (pyspark.ml.regression.GeneralizedLinearRegression method)
setLinkPower() (pyspark.ml.regression.GeneralizedLinearRegression method)
setLinkPredictionCol() (pyspark.ml.regression.GeneralizedLinearRegression method)
setLossType() (pyspark.ml.classification.GBTClassifier method)
(pyspark.ml.regression.GBTRegressor method)
setLowerBoundsOnCoefficients() (pyspark.ml.classification.LogisticRegression method)
setLowerBoundsOnIntercepts() (pyspark.ml.classification.LogisticRegression method)
setMetricName() (pyspark.ml.evaluation.BinaryClassificationEvaluator method)
(pyspark.ml.evaluation.ClusteringEvaluator method)
(pyspark.ml.evaluation.MulticlassClassificationEvaluator method)
(pyspark.ml.evaluation.RegressionEvaluator method)
setMinDivisibleClusterSize() (pyspark.ml.clustering.BisectingKMeans method)
setModelType() (pyspark.ml.classification.NaiveBayes method)
setNonnegative() (pyspark.ml.recommendation.ALS method)
setNumBlocks() (pyspark.ml.recommendation.ALS method)
setNumFolds() (pyspark.ml.tuning.CrossValidator method)
setNumItemBlocks() (pyspark.ml.recommendation.ALS method)
setNumUserBlocks() (pyspark.ml.recommendation.ALS method)
setOffsetCol() (pyspark.ml.regression.GeneralizedLinearRegression method)
setOptimizeDocConcentration() (pyspark.ml.clustering.LDA method)
setOptimizer() (pyspark.ml.clustering.LDA method)
setParams() (pyspark.ml.classification.DecisionTreeClassifier method)
(pyspark.ml.classification.GBTClassifier method)
(pyspark.ml.classification.LinearSVC method)
(pyspark.ml.classification.LogisticRegression method)
(pyspark.ml.classification.MultilayerPerceptronClassifier method)
(pyspark.ml.classification.NaiveBayes method)
(pyspark.ml.classification.OneVsRest method)
(pyspark.ml.classification.RandomForestClassifier method)
(pyspark.ml.clustering.BisectingKMeans method)
(pyspark.ml.clustering.GaussianMixture method)
(pyspark.ml.clustering.KMeans method)
(pyspark.ml.clustering.LDA method)
(pyspark.ml.clustering.PowerIterationClustering method)
(pyspark.ml.evaluation.BinaryClassificationEvaluator method)
(pyspark.ml.evaluation.ClusteringEvaluator method)
(pyspark.ml.evaluation.MulticlassClassificationEvaluator method)
(pyspark.ml.evaluation.RegressionEvaluator method)
(pyspark.ml.pipeline.Pipeline method)
(pyspark.ml.recommendation.ALS method)
(pyspark.ml.regression.AFTSurvivalRegression method)
(pyspark.ml.regression.DecisionTreeRegressor method)
(pyspark.ml.regression.GBTRegressor method)
(pyspark.ml.regression.GeneralizedLinearRegression method)
(pyspark.ml.regression.IsotonicRegression method)
(pyspark.ml.regression.LinearRegression method)
(pyspark.ml.regression.RandomForestRegressor method)
(pyspark.ml.tuning.CrossValidator method)
(pyspark.ml.tuning.TrainValidationSplit method)
setQuantileProbabilities() (pyspark.ml.regression.AFTSurvivalRegression method)
setQuantilesCol() (pyspark.ml.regression.AFTSurvivalRegression method)
setRank() (pyspark.ml.recommendation.ALS method)
setRatingCol() (pyspark.ml.recommendation.ALS method)
setSmoothing() (pyspark.ml.classification.NaiveBayes method)
setSrcCol() (pyspark.ml.clustering.PowerIterationClustering method)
setStages() (pyspark.ml.pipeline.Pipeline method)
setStepSize() (pyspark.ml.classification.MultilayerPerceptronClassifier method)
setSubsamplingRate() (pyspark.ml.clustering.LDA method)
setThreshold() (pyspark.ml.classification.LogisticRegression method)
setThresholds() (pyspark.ml.classification.LogisticRegression method)
setTopicConcentration() (pyspark.ml.clustering.LDA method)
setTopicDistributionCol() (pyspark.ml.clustering.LDA method)
setTrainRatio() (pyspark.ml.tuning.TrainValidationSplit method)
setUpperBoundsOnCoefficients() (pyspark.ml.classification.LogisticRegression method)
setUpperBoundsOnIntercepts() (pyspark.ml.classification.LogisticRegression method)
setUserCol() (pyspark.ml.recommendation.ALS method)
setVariancePower() (pyspark.ml.regression.GeneralizedLinearRegression method)
solver() (pyspark.ml.regression.GeneralizedLinearRegressionTrainingSummary property)
subModels (pyspark.ml.tuning.CrossValidatorModel attribute)
(pyspark.ml.tuning.TrainValidationSplitModel attribute)
Summarizer (class in pyspark.ml.stat)
summary() (pyspark.ml.classification.LogisticRegressionModel property)
(pyspark.ml.clustering.BisectingKMeansModel property)
(pyspark.ml.clustering.GaussianMixtureModel property)
(pyspark.ml.clustering.KMeansModel property)
(pyspark.ml.regression.GeneralizedLinearRegressionModel property)
(pyspark.ml.regression.LinearRegressionModel property)
(pyspark.ml.stat.SummaryBuilder method)
SummaryBuilder (class in pyspark.ml.stat)
T
test() (pyspark.ml.stat.ChiSquareTest static method)
(pyspark.ml.stat.KolmogorovSmirnovTest static method)
theta() (pyspark.ml.classification.NaiveBayesModel property)
toLocal() (pyspark.ml.clustering.DistributedLDAModel method)
topicsMatrix() (pyspark.ml.clustering.LDAModel method)
totalIterations() (pyspark.ml.classification.LogisticRegressionTrainingSummary property)
(pyspark.ml.regression.LinearRegressionTrainingSummary property)
trainingLogLikelihood() (pyspark.ml.clustering.DistributedLDAModel method)
TrainValidationSplit (class in pyspark.ml.tuning)
TrainValidationSplitModel (class in pyspark.ml.tuning)
trees() (pyspark.ml.classification.GBTClassificationModel property)
(pyspark.ml.classification.RandomForestClassificationModel property)
(pyspark.ml.regression.GBTRegressionModel property)
(pyspark.ml.regression.RandomForestRegressionModel property)
truePositiveRateByLabel() (pyspark.ml.classification.LogisticRegressionSummary property)
tValues() (pyspark.ml.regression.GeneralizedLinearRegressionTrainingSummary property)
(pyspark.ml.regression.LinearRegressionSummary property)
U
userFactors() (pyspark.ml.recommendation.ALSModel property)
V
validateStages() (pyspark.ml.pipeline.PipelineSharedReadWrite static method)
validationMetrics (pyspark.ml.tuning.TrainValidationSplitModel attribute)
variance() (pyspark.ml.stat.Summarizer static method)
vocabSize() (pyspark.ml.clustering.LDAModel method)
W
weightedFalsePositiveRate() (pyspark.ml.classification.LogisticRegressionSummary property)
weightedFMeasure() (pyspark.ml.classification.LogisticRegressionSummary method)
weightedPrecision() (pyspark.ml.classification.LogisticRegressionSummary property)
weightedRecall() (pyspark.ml.classification.LogisticRegressionSummary property)
weightedTruePositiveRate() (pyspark.ml.classification.LogisticRegressionSummary property)
weights() (pyspark.ml.classification.MultilayerPerceptronClassificationModel property)
(pyspark.ml.clustering.GaussianMixtureModel property)
write() (pyspark.ml.pipeline.Pipeline method)
(pyspark.ml.pipeline.PipelineModel method)
(pyspark.ml.tuning.CrossValidator method)
(pyspark.ml.tuning.CrossValidatorModel method)
(pyspark.ml.tuning.TrainValidationSplit method)
(pyspark.ml.tuning.TrainValidationSplitModel method)