aboutsummaryrefslogtreecommitdiff
path: root/python
diff options
context:
space:
mode:
authorXiangrui Meng <meng@databricks.com>2014-11-01 14:37:00 -0700
committerMichael Armbrust <michael@databricks.com>2014-11-01 14:37:00 -0700
commit1d4f3552037cb667971bea2e5078d8b3ce6c2eae (patch)
treeb4318e8bddec8a5fceaf41ce5a5fd1c3fdab2f41 /python
parent59e626c701227634336110e1bc23afd94c535ede (diff)
downloadspark-1d4f3552037cb667971bea2e5078d8b3ce6c2eae.tar.gz
spark-1d4f3552037cb667971bea2e5078d8b3ce6c2eae.tar.bz2
spark-1d4f3552037cb667971bea2e5078d8b3ce6c2eae.zip
[SPARK-3569][SQL] Add metadata field to StructField
Add `metadata: Metadata` to `StructField` to store extra information of columns. `Metadata` is a simple wrapper over `Map[String, Any]` with value types restricted to Boolean, Long, Double, String, Metadata, and arrays of those types. SerDe is via JSON. Metadata is preserved through simple operations like `SELECT`. marmbrus liancheng Author: Xiangrui Meng <meng@databricks.com> Author: Michael Armbrust <michael@databricks.com> Closes #2701 from mengxr/structfield-metadata and squashes the following commits: dedda56 [Xiangrui Meng] merge remote 5ef930a [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into structfield-metadata c35203f [Xiangrui Meng] Merge pull request #1 from marmbrus/pr/2701 886b85c [Michael Armbrust] Expose Metadata and MetadataBuilder through the public scala and java packages. 589f314 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into structfield-metadata 1e2abcf [Xiangrui Meng] change default value of metadata to None in python 611d3c2 [Xiangrui Meng] move metadata from Expr to NamedExpr ddfcfad [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into structfield-metadata a438440 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into structfield-metadata 4266f4d [Xiangrui Meng] add StructField.toString back for backward compatibility 3f49aab [Xiangrui Meng] remove StructField.toString 24a9f80 [Xiangrui Meng] Merge remote-tracking branch 'apache/master' into structfield-metadata 473a7c5 [Xiangrui Meng] merge master c9d7301 [Xiangrui Meng] organize imports 1fcbf13 [Xiangrui Meng] change metadata type in StructField for Scala/Java 60cc131 [Xiangrui Meng] add doc and header 60614c7 [Xiangrui Meng] add metadata e42c452 [Xiangrui Meng] merge master 93518fb [Xiangrui Meng] support metadata in python 905bb89 [Xiangrui Meng] java conversions 618e349 [Xiangrui Meng] make tests work in scala 61b8e0f [Xiangrui Meng] merge master 7e5a322 [Xiangrui Meng] do not output metadata in StructField.toString c41a664 [Xiangrui Meng] merge master d8af0ed [Xiangrui Meng] move tests to SQLQuerySuite 67fdebb [Xiangrui Meng] add test on join d65072e [Xiangrui Meng] remove Map.empty 367d237 [Xiangrui Meng] add test c194d5e [Xiangrui Meng] add metadata field to StructField and Attribute
Diffstat (limited to 'python')
-rw-r--r--python/pyspark/sql.py15
1 files changed, 11 insertions, 4 deletions
diff --git a/python/pyspark/sql.py b/python/pyspark/sql.py
index f0bd3cbd98..93bfc25bca 100644
--- a/python/pyspark/sql.py
+++ b/python/pyspark/sql.py
@@ -313,12 +313,15 @@ class StructField(DataType):
"""
- def __init__(self, name, dataType, nullable):
+ def __init__(self, name, dataType, nullable, metadata=None):
"""Creates a StructField
:param name: the name of this field.
:param dataType: the data type of this field.
:param nullable: indicates whether values of this field
can be null.
+ :param metadata: metadata of this field, which is a map from string
+ to simple type that can be serialized to JSON
+ automatically
>>> (StructField("f1", StringType, True)
... == StructField("f1", StringType, True))
@@ -330,6 +333,7 @@ class StructField(DataType):
self.name = name
self.dataType = dataType
self.nullable = nullable
+ self.metadata = metadata or {}
def __repr__(self):
return "StructField(%s,%s,%s)" % (self.name, self.dataType,
@@ -338,13 +342,15 @@ class StructField(DataType):
def jsonValue(self):
return {"name": self.name,
"type": self.dataType.jsonValue(),
- "nullable": self.nullable}
+ "nullable": self.nullable,
+ "metadata": self.metadata}
@classmethod
def fromJson(cls, json):
return StructField(json["name"],
_parse_datatype_json_value(json["type"]),
- json["nullable"])
+ json["nullable"],
+ json["metadata"])
class StructType(DataType):
@@ -423,7 +429,8 @@ def _parse_datatype_json_string(json_string):
... StructField("simpleArray", simple_arraytype, True),
... StructField("simpleMap", simple_maptype, True),
... StructField("simpleStruct", simple_structtype, True),
- ... StructField("boolean", BooleanType(), False)])
+ ... StructField("boolean", BooleanType(), False),
+ ... StructField("withMeta", DoubleType(), False, {"name": "age"})])
>>> check_datatype(complex_structtype)
True
>>> # Complex ArrayType.