aboutsummaryrefslogtreecommitdiff
path: root/python/pyspark/sql/types.py
diff options
context:
space:
mode:
authorDavies Liu <davies@databricks.com>2015-02-11 12:13:16 -0800
committerReynold Xin <rxin@databricks.com>2015-02-11 12:13:16 -0800
commitb694eb9c2fefeaa33891d3e61f9bea369bc09984 (patch)
tree0618924c6564e41ab27676415e79467216d4832f /python/pyspark/sql/types.py
parent1ac099e3e00ddb01af8e6e3a84c70f8363f04b5c (diff)
downloadspark-b694eb9c2fefeaa33891d3e61f9bea369bc09984.tar.gz
spark-b694eb9c2fefeaa33891d3e61f9bea369bc09984.tar.bz2
spark-b694eb9c2fefeaa33891d3e61f9bea369bc09984.zip
[SPARK-5677] [SPARK-5734] [SQL] [PySpark] Python DataFrame API remaining tasks
1. DataFrame.renameColumn 2. DataFrame.show() and _repr_ 3. Use simpleString() rather than jsonValue in DataFrame.dtypes 4. createDataFrame from local Python data, including pandas.DataFrame Author: Davies Liu <davies@databricks.com> Closes #4528 from davies/df3 and squashes the following commits: 014acea [Davies Liu] fix typo 6ba526e [Davies Liu] fix tests 46f5f95 [Davies Liu] address comments 6cbc154 [Davies Liu] dataframe.show() and improve dtypes 6f94f25 [Davies Liu] create DataFrame from local Python data
Diffstat (limited to 'python/pyspark/sql/types.py')
-rw-r--r--python/pyspark/sql/types.py32
1 files changed, 32 insertions, 0 deletions
diff --git a/python/pyspark/sql/types.py b/python/pyspark/sql/types.py
index 41afefe48e..40bd7e54a9 100644
--- a/python/pyspark/sql/types.py
+++ b/python/pyspark/sql/types.py
@@ -52,6 +52,9 @@ class DataType(object):
def typeName(cls):
return cls.__name__[:-4].lower()
+ def simpleString(self):
+ return self.typeName()
+
def jsonValue(self):
return self.typeName()
@@ -145,6 +148,12 @@ class DecimalType(DataType):
self.scale = scale
self.hasPrecisionInfo = precision is not None
+ def simpleString(self):
+ if self.hasPrecisionInfo:
+ return "decimal(%d,%d)" % (self.precision, self.scale)
+ else:
+ return "decimal(10,0)"
+
def jsonValue(self):
if self.hasPrecisionInfo:
return "decimal(%d,%d)" % (self.precision, self.scale)
@@ -180,6 +189,8 @@ class ByteType(PrimitiveType):
The data type representing int values with 1 singed byte.
"""
+ def simpleString(self):
+ return 'tinyint'
class IntegerType(PrimitiveType):
@@ -188,6 +199,8 @@ class IntegerType(PrimitiveType):
The data type representing int values.
"""
+ def simpleString(self):
+ return 'int'
class LongType(PrimitiveType):
@@ -198,6 +211,8 @@ class LongType(PrimitiveType):
beyond the range of [-9223372036854775808, 9223372036854775807],
please use DecimalType.
"""
+ def simpleString(self):
+ return 'bigint'
class ShortType(PrimitiveType):
@@ -206,6 +221,8 @@ class ShortType(PrimitiveType):
The data type representing int values with 2 signed bytes.
"""
+ def simpleString(self):
+ return 'smallint'
class ArrayType(DataType):
@@ -233,6 +250,9 @@ class ArrayType(DataType):
self.elementType = elementType
self.containsNull = containsNull
+ def simpleString(self):
+ return 'array<%s>' % self.elementType.simpleString()
+
def __repr__(self):
return "ArrayType(%s,%s)" % (self.elementType,
str(self.containsNull).lower())
@@ -283,6 +303,9 @@ class MapType(DataType):
self.valueType = valueType
self.valueContainsNull = valueContainsNull
+ def simpleString(self):
+ return 'map<%s,%s>' % (self.keyType.simpleString(), self.valueType.simpleString())
+
def __repr__(self):
return "MapType(%s,%s,%s)" % (self.keyType, self.valueType,
str(self.valueContainsNull).lower())
@@ -337,6 +360,9 @@ class StructField(DataType):
self.nullable = nullable
self.metadata = metadata or {}
+ def simpleString(self):
+ return '%s:%s' % (self.name, self.dataType.simpleString())
+
def __repr__(self):
return "StructField(%s,%s,%s)" % (self.name, self.dataType,
str(self.nullable).lower())
@@ -379,6 +405,9 @@ class StructType(DataType):
"""
self.fields = fields
+ def simpleString(self):
+ return 'struct<%s>' % (','.join(f.simpleString() for f in self.fields))
+
def __repr__(self):
return ("StructType(List(%s))" %
",".join(str(field) for field in self.fields))
@@ -435,6 +464,9 @@ class UserDefinedType(DataType):
"""
raise NotImplementedError("UDT must implement deserialize().")
+ def simpleString(self):
+ return 'null'
+
def json(self):
return json.dumps(self.jsonValue(), separators=(',', ':'), sort_keys=True)