aboutsummaryrefslogtreecommitdiff
path: root/docs
diff options
context:
space:
mode:
authorSean Owen <sowen@cloudera.com>2015-07-31 13:45:28 -0700
committerXiangrui Meng <meng@databricks.com>2015-07-31 13:45:28 -0700
commit873ab0f9692d8ea6220abdb8d9200041068372a8 (patch)
treee1116f9c5a53c796943ad189be61aceca5f31653 /docs
parent815c8245f47e61226a04e2e02f508457b5e9e536 (diff)
downloadspark-873ab0f9692d8ea6220abdb8d9200041068372a8.tar.gz
spark-873ab0f9692d8ea6220abdb8d9200041068372a8.tar.bz2
spark-873ab0f9692d8ea6220abdb8d9200041068372a8.zip
[SPARK-9490] [DOCS] [MLLIB] MLlib evaluation metrics guide example python code uses deprecated print statement
Use print(x) not print x for Python 3 in eval examples CC sethah mengxr -- just wanted to close this out before 1.5 Author: Sean Owen <sowen@cloudera.com> Closes #7822 from srowen/SPARK-9490 and squashes the following commits: 01abeba [Sean Owen] Change "print x" to "print(x)" in the rest of the docs too bd7f7fb [Sean Owen] Use print(x) not print x for Python 3 in eval examples
Diffstat (limited to 'docs')
-rw-r--r--docs/ml-guide.md2
-rw-r--r--docs/mllib-evaluation-metrics.md66
-rw-r--r--docs/mllib-feature-extraction.md2
-rw-r--r--docs/mllib-statistics.md20
-rw-r--r--docs/quick-start.md2
-rw-r--r--docs/sql-programming-guide.md6
-rw-r--r--docs/streaming-programming-guide.md2
7 files changed, 50 insertions, 50 deletions
diff --git a/docs/ml-guide.md b/docs/ml-guide.md
index 8c46adf256..b6ca50e98d 100644
--- a/docs/ml-guide.md
+++ b/docs/ml-guide.md
@@ -561,7 +561,7 @@ test = sc.parallelize([(4L, "spark i j k"),
prediction = model.transform(test)
selected = prediction.select("id", "text", "prediction")
for row in selected.collect():
- print row
+ print(row)
sc.stop()
{% endhighlight %}
diff --git a/docs/mllib-evaluation-metrics.md b/docs/mllib-evaluation-metrics.md
index 4ca0bb06b2..7066d5c974 100644
--- a/docs/mllib-evaluation-metrics.md
+++ b/docs/mllib-evaluation-metrics.md
@@ -302,10 +302,10 @@ predictionAndLabels = test.map(lambda lp: (float(model.predict(lp.features)), lp
metrics = BinaryClassificationMetrics(predictionAndLabels)
# Area under precision-recall curve
-print "Area under PR = %s" % metrics.areaUnderPR
+print("Area under PR = %s" % metrics.areaUnderPR)
# Area under ROC curve
-print "Area under ROC = %s" % metrics.areaUnderROC
+print("Area under ROC = %s" % metrics.areaUnderROC)
{% endhighlight %}
@@ -606,24 +606,24 @@ metrics = MulticlassMetrics(predictionAndLabels)
precision = metrics.precision()
recall = metrics.recall()
f1Score = metrics.fMeasure()
-print "Summary Stats"
-print "Precision = %s" % precision
-print "Recall = %s" % recall
-print "F1 Score = %s" % f1Score
+print("Summary Stats")
+print("Precision = %s" % precision)
+print("Recall = %s" % recall)
+print("F1 Score = %s" % f1Score)
# Statistics by class
labels = data.map(lambda lp: lp.label).distinct().collect()
for label in sorted(labels):
- print "Class %s precision = %s" % (label, metrics.precision(label))
- print "Class %s recall = %s" % (label, metrics.recall(label))
- print "Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0))
+ print("Class %s precision = %s" % (label, metrics.precision(label)))
+ print("Class %s recall = %s" % (label, metrics.recall(label)))
+ print("Class %s F1 Measure = %s" % (label, metrics.fMeasure(label, beta=1.0)))
# Weighted stats
-print "Weighted recall = %s" % metrics.weightedRecall
-print "Weighted precision = %s" % metrics.weightedPrecision
-print "Weighted F(1) Score = %s" % metrics.weightedFMeasure()
-print "Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5)
-print "Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate
+print("Weighted recall = %s" % metrics.weightedRecall)
+print("Weighted precision = %s" % metrics.weightedPrecision)
+print("Weighted F(1) Score = %s" % metrics.weightedFMeasure())
+print("Weighted F(0.5) Score = %s" % metrics.weightedFMeasure(beta=0.5))
+print("Weighted false positive rate = %s" % metrics.weightedFalsePositiveRate)
{% endhighlight %}
</div>
@@ -881,28 +881,28 @@ scoreAndLabels = sc.parallelize([
metrics = MultilabelMetrics(scoreAndLabels)
# Summary stats
-print "Recall = %s" % metrics.recall()
-print "Precision = %s" % metrics.precision()
-print "F1 measure = %s" % metrics.f1Measure()
-print "Accuracy = %s" % metrics.accuracy
+print("Recall = %s" % metrics.recall())
+print("Precision = %s" % metrics.precision())
+print("F1 measure = %s" % metrics.f1Measure())
+print("Accuracy = %s" % metrics.accuracy)
# Individual label stats
labels = scoreAndLabels.flatMap(lambda x: x[1]).distinct().collect()
for label in labels:
- print "Class %s precision = %s" % (label, metrics.precision(label))
- print "Class %s recall = %s" % (label, metrics.recall(label))
- print "Class %s F1 Measure = %s" % (label, metrics.f1Measure(label))
+ print("Class %s precision = %s" % (label, metrics.precision(label)))
+ print("Class %s recall = %s" % (label, metrics.recall(label)))
+ print("Class %s F1 Measure = %s" % (label, metrics.f1Measure(label)))
# Micro stats
-print "Micro precision = %s" % metrics.microPrecision
-print "Micro recall = %s" % metrics.microRecall
-print "Micro F1 measure = %s" % metrics.microF1Measure
+print("Micro precision = %s" % metrics.microPrecision)
+print("Micro recall = %s" % metrics.microRecall)
+print("Micro F1 measure = %s" % metrics.microF1Measure)
# Hamming loss
-print "Hamming loss = %s" % metrics.hammingLoss
+print("Hamming loss = %s" % metrics.hammingLoss)
# Subset accuracy
-print "Subset accuracy = %s" % metrics.subsetAccuracy
+print("Subset accuracy = %s" % metrics.subsetAccuracy)
{% endhighlight %}
@@ -1283,10 +1283,10 @@ scoreAndLabels = predictions.join(ratingsTuple).map(lambda tup: tup[1])
metrics = RegressionMetrics(scoreAndLabels)
# Root mean sqaured error
-print "RMSE = %s" % metrics.rootMeanSquaredError
+print("RMSE = %s" % metrics.rootMeanSquaredError)
# R-squared
-print "R-squared = %s" % metrics.r2
+print("R-squared = %s" % metrics.r2)
{% endhighlight %}
@@ -1479,17 +1479,17 @@ valuesAndPreds = parsedData.map(lambda p: (float(model.predict(p.features)), p.l
metrics = RegressionMetrics(valuesAndPreds)
# Squared Error
-print "MSE = %s" % metrics.meanSquaredError
-print "RMSE = %s" % metrics.rootMeanSquaredError
+print("MSE = %s" % metrics.meanSquaredError)
+print("RMSE = %s" % metrics.rootMeanSquaredError)
# R-squared
-print "R-squared = %s" % metrics.r2
+print("R-squared = %s" % metrics.r2)
# Mean absolute error
-print "MAE = %s" % metrics.meanAbsoluteError
+print("MAE = %s" % metrics.meanAbsoluteError)
# Explained variance
-print "Explained variance = %s" % metrics.explainedVariance
+print("Explained variance = %s" % metrics.explainedVariance)
{% endhighlight %}
diff --git a/docs/mllib-feature-extraction.md b/docs/mllib-feature-extraction.md
index a69e41e2a1..de86aba2ae 100644
--- a/docs/mllib-feature-extraction.md
+++ b/docs/mllib-feature-extraction.md
@@ -221,7 +221,7 @@ model = word2vec.fit(inp)
synonyms = model.findSynonyms('china', 40)
for word, cosine_distance in synonyms:
- print "{}: {}".format(word, cosine_distance)
+ print("{}: {}".format(word, cosine_distance))
{% endhighlight %}
</div>
</div>
diff --git a/docs/mllib-statistics.md b/docs/mllib-statistics.md
index de5d6485f9..be04d0b4b5 100644
--- a/docs/mllib-statistics.md
+++ b/docs/mllib-statistics.md
@@ -95,9 +95,9 @@ mat = ... # an RDD of Vectors
# Compute column summary statistics.
summary = Statistics.colStats(mat)
-print summary.mean()
-print summary.variance()
-print summary.numNonzeros()
+print(summary.mean())
+print(summary.variance())
+print(summary.numNonzeros())
{% endhighlight %}
</div>
@@ -183,12 +183,12 @@ seriesY = ... # must have the same number of partitions and cardinality as serie
# Compute the correlation using Pearson's method. Enter "spearman" for Spearman's method. If a
# method is not specified, Pearson's method will be used by default.
-print Statistics.corr(seriesX, seriesY, method="pearson")
+print(Statistics.corr(seriesX, seriesY, method="pearson"))
data = ... # an RDD of Vectors
# calculate the correlation matrix using Pearson's method. Use "spearman" for Spearman's method.
# If a method is not specified, Pearson's method will be used by default.
-print Statistics.corr(data, method="pearson")
+print(Statistics.corr(data, method="pearson"))
{% endhighlight %}
</div>
@@ -398,14 +398,14 @@ vec = Vectors.dense(...) # a vector composed of the frequencies of events
# compute the goodness of fit. If a second vector to test against is not supplied as a parameter,
# the test runs against a uniform distribution.
goodnessOfFitTestResult = Statistics.chiSqTest(vec)
-print goodnessOfFitTestResult # summary of the test including the p-value, degrees of freedom,
- # test statistic, the method used, and the null hypothesis.
+print(goodnessOfFitTestResult) # summary of the test including the p-value, degrees of freedom,
+ # test statistic, the method used, and the null hypothesis.
mat = Matrices.dense(...) # a contingency matrix
# conduct Pearson's independence test on the input contingency matrix
independenceTestResult = Statistics.chiSqTest(mat)
-print independenceTestResult # summary of the test including the p-value, degrees of freedom...
+print(independenceTestResult) # summary of the test including the p-value, degrees of freedom...
obs = sc.parallelize(...) # LabeledPoint(feature, label) .
@@ -415,8 +415,8 @@ obs = sc.parallelize(...) # LabeledPoint(feature, label) .
featureTestResults = Statistics.chiSqTest(obs)
for i, result in enumerate(featureTestResults):
- print "Column $d:" % (i + 1)
- print result
+ print("Column $d:" % (i + 1))
+ print(result)
{% endhighlight %}
</div>
diff --git a/docs/quick-start.md b/docs/quick-start.md
index bb39e4111f..ce2cc9d216 100644
--- a/docs/quick-start.md
+++ b/docs/quick-start.md
@@ -406,7 +406,7 @@ logData = sc.textFile(logFile).cache()
numAs = logData.filter(lambda s: 'a' in s).count()
numBs = logData.filter(lambda s: 'b' in s).count()
-print "Lines with a: %i, lines with b: %i" % (numAs, numBs)
+print("Lines with a: %i, lines with b: %i" % (numAs, numBs))
{% endhighlight %}
diff --git a/docs/sql-programming-guide.md b/docs/sql-programming-guide.md
index 95945eb7fc..d31baa080c 100644
--- a/docs/sql-programming-guide.md
+++ b/docs/sql-programming-guide.md
@@ -570,7 +570,7 @@ teenagers = sqlContext.sql("SELECT name FROM people WHERE age >= 13 AND age <= 1
# The results of SQL queries are RDDs and support all the normal RDD operations.
teenNames = teenagers.map(lambda p: "Name: " + p.name)
for teenName in teenNames.collect():
- print teenName
+ print(teenName)
{% endhighlight %}
</div>
@@ -752,7 +752,7 @@ results = sqlContext.sql("SELECT name FROM people")
# The results of SQL queries are RDDs and support all the normal RDD operations.
names = results.map(lambda p: "Name: " + p.name)
for name in names.collect():
- print name
+ print(name)
{% endhighlight %}
</div>
@@ -1006,7 +1006,7 @@ parquetFile.registerTempTable("parquetFile");
teenagers = sqlContext.sql("SELECT name FROM parquetFile WHERE age >= 13 AND age <= 19")
teenNames = teenagers.map(lambda p: "Name: " + p.name)
for teenName in teenNames.collect():
- print teenName
+ print(teenName)
{% endhighlight %}
</div>
diff --git a/docs/streaming-programming-guide.md b/docs/streaming-programming-guide.md
index 2f3013b533..4663b3f14c 100644
--- a/docs/streaming-programming-guide.md
+++ b/docs/streaming-programming-guide.md
@@ -1525,7 +1525,7 @@ def getSqlContextInstance(sparkContext):
words = ... # DStream of strings
def process(time, rdd):
- print "========= %s =========" % str(time)
+ print("========= %s =========" % str(time))
try:
# Get the singleton instance of SQLContext
sqlContext = getSqlContextInstance(rdd.context)