From aea727f68d5fe5e81fc04ece97ad94c6f12c7270 Mon Sep 17 00:00:00 2001 From: Matei Zaharia Date: Wed, 26 Jun 2013 21:14:38 -0700 Subject: Simplify Python docs a little to do substring search --- docs/python-programming-guide.md | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'docs/python-programming-guide.md') diff --git a/docs/python-programming-guide.md b/docs/python-programming-guide.md index 3a7a8db4a6..7f1e7cf93d 100644 --- a/docs/python-programming-guide.md +++ b/docs/python-programming-guide.md @@ -27,14 +27,14 @@ Short functions can be passed to RDD methods using Python's [`lambda`](http://ww {% highlight python %} logData = sc.textFile(logFile).cache() -errors = logData.filter(lambda s: 'ERROR' in s.split()) +errors = logData.filter(lambda line: "ERROR" in line) {% endhighlight %} You can also pass functions that are defined using the `def` keyword; this is useful for more complicated functions that cannot be expressed using `lambda`: {% highlight python %} def is_error(line): - return 'ERROR' in line.split() + return "ERROR" in line errors = logData.filter(is_error) {% endhighlight %} @@ -43,8 +43,7 @@ Functions can access objects in enclosing scopes, although modifications to thos {% highlight python %} error_keywords = ["Exception", "Error"] def is_error(line): - words = line.split() - return any(keyword in words for keyword in error_keywords) + return any(keyword in line for keyword in error_keywords) errors = logData.filter(is_error) {% endhighlight %} -- cgit v1.2.3