diff options
author | Sean Owen <sowen@cloudera.com> | 2016-09-16 13:43:05 -0700 |
---|---|---|
committer | Reynold Xin <rxin@databricks.com> | 2016-09-16 13:43:05 -0700 |
commit | b9323fc9381a09af510f542fd5c86473e029caf6 (patch) | |
tree | b7e7c6d0c8656e1a4e4765d03f7c9f09f2ac3e34 | |
parent | dca771bec6edb1cd8fc75861d364e0ba9dccf7c3 (diff) | |
download | spark-b9323fc9381a09af510f542fd5c86473e029caf6.tar.gz spark-b9323fc9381a09af510f542fd5c86473e029caf6.tar.bz2 spark-b9323fc9381a09af510f542fd5c86473e029caf6.zip |
[SPARK-17561][DOCS] DataFrameWriter documentation formatting problems
## What changes were proposed in this pull request?
Fix `<ul> / <li>` problems in SQL scaladoc.
## How was this patch tested?
Scaladoc build and manual verification of generated HTML.
Author: Sean Owen <sowen@cloudera.com>
Closes #15117 from srowen/SPARK-17561.
3 files changed, 53 insertions, 29 deletions
diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala index 93bf74d06b..d29d90ce40 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameReader.scala @@ -269,14 +269,15 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all * character using backslash quoting mechanism</li> * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records - * during parsing.</li> - * <ul> - * <li> - `PERMISSIVE` : sets other fields to `null` when it meets a corrupted record, and puts - * the malformed string into a new field configured by `columnNameOfCorruptRecord`. When - * a schema is set by user, it sets `null` for extra fields.</li> - * <li> - `DROPMALFORMED` : ignores the whole corrupted records.</li> - * <li> - `FAILFAST` : throws an exception when it meets corrupted records.</li> - * </ul> + * during parsing. + * <ul> + * <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record, and puts + * the malformed string into a new field configured by `columnNameOfCorruptRecord`. When + * a schema is set by user, it sets `null` for extra fields.</li> + * <li>`DROPMALFORMED` : ignores the whole corrupted records.</li> + * <li>`FAILFAST` : throws an exception when it meets corrupted records.</li> + * </ul> + * </li> * <li>`columnNameOfCorruptRecord` (default is the value specified in * `spark.sql.columnNameOfCorruptRecord`): allows renaming the new field having malformed string * created by `PERMISSIVE` mode. This overrides `spark.sql.columnNameOfCorruptRecord`.</li> @@ -395,13 +396,14 @@ class DataFrameReader private[sql](sparkSession: SparkSession) extends Logging { * <li>`maxMalformedLogPerPartition` (default `10`): sets the maximum number of malformed rows * Spark will log for each partition. Malformed records beyond this number will be ignored.</li> * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records - * during parsing.</li> - * <ul> - * <li> - `PERMISSIVE` : sets other fields to `null` when it meets a corrupted record. When - * a schema is set by user, it sets `null` for extra fields.</li> - * <li> - `DROPMALFORMED` : ignores the whole corrupted records.</li> - * <li> - `FAILFAST` : throws an exception when it meets corrupted records.</li> - * </ul> + * during parsing. + * <ul> + * <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record. When + * a schema is set by user, it sets `null` for extra fields.</li> + * <li>`DROPMALFORMED` : ignores the whole corrupted records.</li> + * <li>`FAILFAST` : throws an exception when it meets corrupted records.</li> + * </ul> + * </li> * </ul> * @since 2.0.0 */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala index c05c7a6551..e137f076a0 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/DataFrameWriter.scala @@ -397,7 +397,9 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * your external database systems. * * You can set the following JDBC-specific option(s) for storing JDBC: + * <ul> * <li>`truncate` (default `false`): use `TRUNCATE TABLE` instead of `DROP TABLE`.</li> + * </ul> * * In case of failures, users should turn off `truncate` option to use `DROP TABLE` again. Also, * due to the different behavior of `TRUNCATE TABLE` among DBMS, it's not always safe to use this. @@ -486,6 +488,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * }}} * * You can set the following JSON-specific option(s) for writing JSON files: + * <ul> * <li>`compression` (default `null`): compression codec to use when saving to file. This can be * one of the known case-insensitive shorten names (`none`, `bzip2`, `gzip`, `lz4`, * `snappy` and `deflate`). </li> @@ -495,6 +498,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSZZ`): sets the string that * indicates a timestamp format. Custom date formats follow the formats at * `java.text.SimpleDateFormat`. This applies to timestamp type.</li> + * </ul> * * @since 1.4.0 */ @@ -510,10 +514,12 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * }}} * * You can set the following Parquet-specific option(s) for writing Parquet files: + * <ul> * <li>`compression` (default is the value specified in `spark.sql.parquet.compression.codec`): * compression codec to use when saving to file. This can be one of the known case-insensitive * shorten names(none, `snappy`, `gzip`, and `lzo`). This will override * `spark.sql.parquet.compression.codec`.</li> + * </ul> * * @since 1.4.0 */ @@ -529,9 +535,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * }}} * * You can set the following ORC-specific option(s) for writing ORC files: + * <ul> * <li>`compression` (default `snappy`): compression codec to use when saving to file. This can be * one of the known case-insensitive shorten names(`none`, `snappy`, `zlib`, and `lzo`). * This will override `orc.compress`.</li> + * </ul> * * @since 1.5.0 * @note Currently, this method can only be used after enabling Hive support @@ -553,9 +561,11 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * }}} * * You can set the following option(s) for writing text files: + * <ul> * <li>`compression` (default `null`): compression codec to use when saving to file. This can be * one of the known case-insensitive shorten names (`none`, `bzip2`, `gzip`, `lz4`, * `snappy` and `deflate`). </li> + * </ul> * * @since 1.6.0 */ @@ -571,6 +581,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * }}} * * You can set the following CSV-specific option(s) for writing CSV files: + * <ul> * <li>`sep` (default `,`): sets the single character as a separator for each * field and value.</li> * <li>`quote` (default `"`): sets the single character used for escaping quoted values where @@ -593,6 +604,7 @@ final class DataFrameWriter[T] private[sql](ds: Dataset[T]) { * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSZZ`): sets the string that * indicates a timestamp format. Custom date formats follow the formats at * `java.text.SimpleDateFormat`. This applies to timestamp type.</li> + * </ul> * * @since 2.0.0 */ diff --git a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala index 3ad1125229..c25f71af73 100644 --- a/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala +++ b/sql/core/src/main/scala/org/apache/spark/sql/streaming/DataStreamReader.scala @@ -161,6 +161,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * schema in advance, use the version that specifies the schema to avoid the extra scan. * * You can set the following JSON-specific options to deal with non-standard JSON files: + * <ul> * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be * considered in every trigger.</li> * <li>`primitivesAsString` (default `false`): infers all primitive values as a string type</li> @@ -175,14 +176,15 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * <li>`allowBackslashEscapingAnyCharacter` (default `false`): allows accepting quoting of all * character using backslash quoting mechanism</li> * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records - * during parsing.</li> - * <ul> - * <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record, and puts the - * malformed string into a new field configured by `columnNameOfCorruptRecord`. When - * a schema is set by user, it sets `null` for extra fields.</li> - * <li>`DROPMALFORMED` : ignores the whole corrupted records.</li> - * <li>`FAILFAST` : throws an exception when it meets corrupted records.</li> - * </ul> + * during parsing. + * <ul> + * <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record, and puts + * the malformed string into a new field configured by `columnNameOfCorruptRecord`. When + * a schema is set by user, it sets `null` for extra fields.</li> + * <li>`DROPMALFORMED` : ignores the whole corrupted records.</li> + * <li>`FAILFAST` : throws an exception when it meets corrupted records.</li> + * </ul> + * </li> * <li>`columnNameOfCorruptRecord` (default is the value specified in * `spark.sql.columnNameOfCorruptRecord`): allows renaming the new field having malformed string * created by `PERMISSIVE` mode. This overrides `spark.sql.columnNameOfCorruptRecord`.</li> @@ -192,6 +194,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * <li>`timestampFormat` (default `yyyy-MM-dd'T'HH:mm:ss.SSSZZ`): sets the string that * indicates a timestamp format. Custom date formats follow the formats at * `java.text.SimpleDateFormat`. This applies to timestamp type.</li> + * </ul> * * @since 2.0.0 */ @@ -207,6 +210,7 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * specify the schema explicitly using [[schema]]. * * You can set the following CSV-specific options to deal with CSV files: + * <ul> * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be * considered in every trigger.</li> * <li>`sep` (default `,`): sets the single character as a separator for each @@ -245,12 +249,14 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * <li>`maxCharsPerColumn` (default `1000000`): defines the maximum number of characters allowed * for any given value being read.</li> * <li>`mode` (default `PERMISSIVE`): allows a mode for dealing with corrupt records - * during parsing.</li> - * <ul> - * <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record. When - * a schema is set by user, it sets `null` for extra fields.</li> - * <li>`DROPMALFORMED` : ignores the whole corrupted records.</li> - * <li>`FAILFAST` : throws an exception when it meets corrupted records.</li> + * during parsing. + * <ul> + * <li>`PERMISSIVE` : sets other fields to `null` when it meets a corrupted record. When + * a schema is set by user, it sets `null` for extra fields.</li> + * <li>`DROPMALFORMED` : ignores the whole corrupted records.</li> + * <li>`FAILFAST` : throws an exception when it meets corrupted records.</li> + * </ul> + * </li> * </ul> * * @since 2.0.0 @@ -263,12 +269,14 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * Loads a Parquet file stream, returning the result as a [[DataFrame]]. * * You can set the following Parquet-specific option(s) for reading Parquet files: + * <ul> * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be * considered in every trigger.</li> * <li>`mergeSchema` (default is the value specified in `spark.sql.parquet.mergeSchema`): sets * whether we should merge schemas collected from all * Parquet part-files. This will override * `spark.sql.parquet.mergeSchema`.</li> + * </ul> * * @since 2.0.0 */ @@ -292,8 +300,10 @@ final class DataStreamReader private[sql](sparkSession: SparkSession) extends Lo * }}} * * You can set the following text-specific options to deal with text files: + * <ul> * <li>`maxFilesPerTrigger` (default: no max limit): sets the maximum number of new files to be * considered in every trigger.</li> + * </ul> * * @since 2.0.0 */ |