Skip to main content
Ctrl
+
K
Site Navigation
Overview
Getting Started
User Guides
API Reference
Development
Migration Guides
4.0.0-preview1
GitHub
PyPI
Site Navigation
Overview
Getting Started
User Guides
API Reference
Development
Migration Guides
4.0.0-preview1
GitHub
PyPI
Section Navigation
Spark SQL
Core Classes
pyspark.sql.SparkSession
pyspark.sql.Catalog
pyspark.sql.DataFrame
pyspark.sql.Column
pyspark.sql.Observation
pyspark.sql.Row
pyspark.sql.GroupedData
pyspark.sql.PandasCogroupedOps
pyspark.sql.DataFrameNaFunctions
pyspark.sql.DataFrameStatFunctions
pyspark.sql.Window
pyspark.sql.DataFrameReader
pyspark.sql.DataFrameWriter
pyspark.sql.DataFrameWriterV2
pyspark.sql.UDFRegistration
pyspark.sql.UDTFRegistration
pyspark.sql.udf.UserDefinedFunction
pyspark.sql.udtf.UserDefinedTableFunction
pyspark.sql.datasource.DataSource
pyspark.sql.datasource.DataSourceReader
pyspark.sql.datasource.DataSourceStreamReader
pyspark.sql.datasource.DataSourceWriter
pyspark.sql.datasource.DataSourceRegistration
pyspark.sql.datasource.InputPartition
pyspark.sql.datasource.WriterCommitMessage
pyspark.sql.VariantVal
Spark Session
pyspark.sql.SparkSession.active
pyspark.sql.SparkSession.builder.appName
pyspark.sql.SparkSession.builder.config
pyspark.sql.SparkSession.builder.enableHiveSupport
pyspark.sql.SparkSession.builder.getOrCreate
pyspark.sql.SparkSession.builder.master
pyspark.sql.SparkSession.builder.remote
pyspark.sql.SparkSession.catalog
pyspark.sql.SparkSession.conf
pyspark.sql.SparkSession.createDataFrame
pyspark.sql.SparkSession.dataSource
pyspark.sql.SparkSession.getActiveSession
pyspark.sql.SparkSession.newSession
pyspark.sql.SparkSession.profile
pyspark.sql.SparkSession.range
pyspark.sql.SparkSession.read
pyspark.sql.SparkSession.readStream
pyspark.sql.SparkSession.sparkContext
pyspark.sql.SparkSession.sql
pyspark.sql.SparkSession.stop
pyspark.sql.SparkSession.streams
pyspark.sql.SparkSession.table
pyspark.sql.SparkSession.udf
pyspark.sql.SparkSession.udtf
pyspark.sql.SparkSession.version
pyspark.sql.is_remote
pyspark.sql.SparkSession.builder.create
pyspark.sql.SparkSession.addArtifact
pyspark.sql.SparkSession.addArtifacts
pyspark.sql.SparkSession.addTag
pyspark.sql.SparkSession.clearProgressHandlers
pyspark.sql.SparkSession.clearTags
pyspark.sql.SparkSession.client
pyspark.sql.SparkSession.copyFromLocalToFs
pyspark.sql.SparkSession.getTags
pyspark.sql.SparkSession.interruptAll
pyspark.sql.SparkSession.interruptOperation
pyspark.sql.SparkSession.interruptTag
pyspark.sql.SparkSession.registerProgressHandler
pyspark.sql.SparkSession.removeProgressHandler
pyspark.sql.SparkSession.removeTag
Configuration
pyspark.sql.conf.RuntimeConfig
Input/Output
pyspark.sql.DataFrameReader.csv
pyspark.sql.DataFrameReader.format
pyspark.sql.DataFrameReader.jdbc
pyspark.sql.DataFrameReader.json
pyspark.sql.DataFrameReader.load
pyspark.sql.DataFrameReader.option
pyspark.sql.DataFrameReader.options
pyspark.sql.DataFrameReader.orc
pyspark.sql.DataFrameReader.parquet
pyspark.sql.DataFrameReader.schema
pyspark.sql.DataFrameReader.table
pyspark.sql.DataFrameReader.text
pyspark.sql.DataFrameWriter.bucketBy
pyspark.sql.DataFrameWriter.csv
pyspark.sql.DataFrameWriter.format
pyspark.sql.DataFrameWriter.insertInto
pyspark.sql.DataFrameWriter.jdbc
pyspark.sql.DataFrameWriter.json
pyspark.sql.DataFrameWriter.mode
pyspark.sql.DataFrameWriter.option
pyspark.sql.DataFrameWriter.options
pyspark.sql.DataFrameWriter.orc
pyspark.sql.DataFrameWriter.parquet
pyspark.sql.DataFrameWriter.partitionBy
pyspark.sql.DataFrameWriter.save
pyspark.sql.DataFrameWriter.saveAsTable
pyspark.sql.DataFrameWriter.sortBy
pyspark.sql.DataFrameWriter.text
pyspark.sql.DataFrameWriterV2.using
pyspark.sql.DataFrameWriterV2.option
pyspark.sql.DataFrameWriterV2.options
pyspark.sql.DataFrameWriterV2.tableProperty
pyspark.sql.DataFrameWriterV2.partitionedBy
pyspark.sql.DataFrameWriterV2.create
pyspark.sql.DataFrameWriterV2.replace
pyspark.sql.DataFrameWriterV2.createOrReplace
pyspark.sql.DataFrameWriterV2.append
pyspark.sql.DataFrameWriterV2.overwrite
pyspark.sql.DataFrameWriterV2.overwritePartitions
DataFrame
pyspark.sql.DataFrame.__getattr__
pyspark.sql.DataFrame.__getitem__
pyspark.sql.DataFrame.agg
pyspark.sql.DataFrame.alias
pyspark.sql.DataFrame.approxQuantile
pyspark.sql.DataFrame.cache
pyspark.sql.DataFrame.checkpoint
pyspark.sql.DataFrame.coalesce
pyspark.sql.DataFrame.colRegex
pyspark.sql.DataFrame.collect
pyspark.sql.DataFrame.columns
pyspark.sql.DataFrame.corr
pyspark.sql.DataFrame.count
pyspark.sql.DataFrame.cov
pyspark.sql.DataFrame.createGlobalTempView
pyspark.sql.DataFrame.createOrReplaceGlobalTempView
pyspark.sql.DataFrame.createOrReplaceTempView
pyspark.sql.DataFrame.createTempView
pyspark.sql.DataFrame.crossJoin
pyspark.sql.DataFrame.crosstab
pyspark.sql.DataFrame.cube
pyspark.sql.DataFrame.describe
pyspark.sql.DataFrame.distinct
pyspark.sql.DataFrame.drop
pyspark.sql.DataFrame.dropDuplicates
pyspark.sql.DataFrame.dropDuplicatesWithinWatermark
pyspark.sql.DataFrame.drop_duplicates
pyspark.sql.DataFrame.dropna
pyspark.sql.DataFrame.dtypes
pyspark.sql.DataFrame.exceptAll
pyspark.sql.DataFrame.explain
pyspark.sql.DataFrame.fillna
pyspark.sql.DataFrame.filter
pyspark.sql.DataFrame.first
pyspark.sql.DataFrame.foreach
pyspark.sql.DataFrame.foreachPartition
pyspark.sql.DataFrame.freqItems
pyspark.sql.DataFrame.groupBy
pyspark.sql.DataFrame.groupingSets
pyspark.sql.DataFrame.head
pyspark.sql.DataFrame.hint
pyspark.sql.DataFrame.inputFiles
pyspark.sql.DataFrame.intersect
pyspark.sql.DataFrame.intersectAll
pyspark.sql.DataFrame.isEmpty
pyspark.sql.DataFrame.isLocal
pyspark.sql.DataFrame.isStreaming
pyspark.sql.DataFrame.join
pyspark.sql.DataFrame.limit
pyspark.sql.DataFrame.localCheckpoint
pyspark.sql.DataFrame.mapInPandas
pyspark.sql.DataFrame.mapInArrow
pyspark.sql.DataFrame.melt
pyspark.sql.DataFrame.na
pyspark.sql.DataFrame.observe
pyspark.sql.DataFrame.offset
pyspark.sql.DataFrame.orderBy
pyspark.sql.DataFrame.persist
pyspark.sql.DataFrame.printSchema
pyspark.sql.DataFrame.randomSplit
pyspark.sql.DataFrame.rdd
pyspark.sql.DataFrame.registerTempTable
pyspark.sql.DataFrame.repartition
pyspark.sql.DataFrame.repartitionByRange
pyspark.sql.DataFrame.replace
pyspark.sql.DataFrame.rollup
pyspark.sql.DataFrame.sameSemantics
pyspark.sql.DataFrame.sample
pyspark.sql.DataFrame.sampleBy
pyspark.sql.DataFrame.schema
pyspark.sql.DataFrame.select
pyspark.sql.DataFrame.selectExpr
pyspark.sql.DataFrame.semanticHash
pyspark.sql.DataFrame.show
pyspark.sql.DataFrame.sort
pyspark.sql.DataFrame.sortWithinPartitions
pyspark.sql.DataFrame.sparkSession
pyspark.sql.DataFrame.stat
pyspark.sql.DataFrame.storageLevel
pyspark.sql.DataFrame.subtract
pyspark.sql.DataFrame.summary
pyspark.sql.DataFrame.tail
pyspark.sql.DataFrame.take
pyspark.sql.DataFrame.to
pyspark.sql.DataFrame.toArrow
pyspark.sql.DataFrame.toDF
pyspark.sql.DataFrame.toJSON
pyspark.sql.DataFrame.toLocalIterator
pyspark.sql.DataFrame.toPandas
pyspark.sql.DataFrame.transform
pyspark.sql.DataFrame.union
pyspark.sql.DataFrame.unionAll
pyspark.sql.DataFrame.unionByName
pyspark.sql.DataFrame.unpersist
pyspark.sql.DataFrame.unpivot
pyspark.sql.DataFrame.where
pyspark.sql.DataFrame.withColumn
pyspark.sql.DataFrame.withColumns
pyspark.sql.DataFrame.withColumnRenamed
pyspark.sql.DataFrame.withColumnsRenamed
pyspark.sql.DataFrame.withMetadata
pyspark.sql.DataFrame.withWatermark
pyspark.sql.DataFrame.write
pyspark.sql.DataFrame.writeStream
pyspark.sql.DataFrame.writeTo
pyspark.sql.DataFrame.pandas_api
pyspark.sql.DataFrameNaFunctions.drop
pyspark.sql.DataFrameNaFunctions.fill
pyspark.sql.DataFrameNaFunctions.replace
pyspark.sql.DataFrameStatFunctions.approxQuantile
pyspark.sql.DataFrameStatFunctions.corr
pyspark.sql.DataFrameStatFunctions.cov
pyspark.sql.DataFrameStatFunctions.crosstab
pyspark.sql.DataFrameStatFunctions.freqItems
pyspark.sql.DataFrameStatFunctions.sampleBy
Column
pyspark.sql.Column.__getattr__
pyspark.sql.Column.__getitem__
pyspark.sql.Column.alias
pyspark.sql.Column.asc
pyspark.sql.Column.asc_nulls_first
pyspark.sql.Column.asc_nulls_last
pyspark.sql.Column.astype
pyspark.sql.Column.between
pyspark.sql.Column.bitwiseAND
pyspark.sql.Column.bitwiseOR
pyspark.sql.Column.bitwiseXOR
pyspark.sql.Column.cast
pyspark.sql.Column.contains
pyspark.sql.Column.desc
pyspark.sql.Column.desc_nulls_first
pyspark.sql.Column.desc_nulls_last
pyspark.sql.Column.dropFields
pyspark.sql.Column.endswith
pyspark.sql.Column.eqNullSafe
pyspark.sql.Column.getField
pyspark.sql.Column.getItem
pyspark.sql.Column.ilike
pyspark.sql.Column.isNaN
pyspark.sql.Column.isNotNull
pyspark.sql.Column.isNull
pyspark.sql.Column.isin
pyspark.sql.Column.like
pyspark.sql.Column.name
pyspark.sql.Column.otherwise
pyspark.sql.Column.over
pyspark.sql.Column.rlike
pyspark.sql.Column.startswith
pyspark.sql.Column.substr
pyspark.sql.Column.try_cast
pyspark.sql.Column.when
pyspark.sql.Column.withField
Data Types
ArrayType
BinaryType
BooleanType
ByteType
DataType
DateType
DecimalType
DoubleType
FloatType
IntegerType
LongType
MapType
NullType
ShortType
StringType
CharType
VarcharType
StructField
StructType
VariantType
TimestampType
TimestampNTZType
DayTimeIntervalType
YearMonthIntervalType
CalendarIntervalType
Row
pyspark.sql.Row.asDict
Functions
pyspark.sql.functions.broadcast
pyspark.sql.functions.call_function
pyspark.sql.functions.col
pyspark.sql.functions.column
pyspark.sql.functions.lit
pyspark.sql.functions.expr
pyspark.sql.functions.coalesce
pyspark.sql.functions.ifnull
pyspark.sql.functions.nanvl
pyspark.sql.functions.nullif
pyspark.sql.functions.nvl
pyspark.sql.functions.nvl2
pyspark.sql.functions.when
pyspark.sql.functions.equal_null
pyspark.sql.functions.ilike
pyspark.sql.functions.isnan
pyspark.sql.functions.isnotnull
pyspark.sql.functions.isnull
pyspark.sql.functions.like
pyspark.sql.functions.regexp
pyspark.sql.functions.regexp_like
pyspark.sql.functions.rlike
pyspark.sql.functions.asc
pyspark.sql.functions.asc_nulls_first
pyspark.sql.functions.asc_nulls_last
pyspark.sql.functions.desc
pyspark.sql.functions.desc_nulls_first
pyspark.sql.functions.desc_nulls_last
pyspark.sql.functions.abs
pyspark.sql.functions.acos
pyspark.sql.functions.acosh
pyspark.sql.functions.asin
pyspark.sql.functions.asinh
pyspark.sql.functions.atan
pyspark.sql.functions.atan2
pyspark.sql.functions.atanh
pyspark.sql.functions.bin
pyspark.sql.functions.bround
pyspark.sql.functions.cbrt
pyspark.sql.functions.ceil
pyspark.sql.functions.ceiling
pyspark.sql.functions.conv
pyspark.sql.functions.cos
pyspark.sql.functions.cosh
pyspark.sql.functions.cot
pyspark.sql.functions.csc
pyspark.sql.functions.degrees
pyspark.sql.functions.e
pyspark.sql.functions.exp
pyspark.sql.functions.expm1
pyspark.sql.functions.factorial
pyspark.sql.functions.floor
pyspark.sql.functions.greatest
pyspark.sql.functions.hex
pyspark.sql.functions.hypot
pyspark.sql.functions.least
pyspark.sql.functions.ln
pyspark.sql.functions.log
pyspark.sql.functions.log10
pyspark.sql.functions.log1p
pyspark.sql.functions.log2
pyspark.sql.functions.negate
pyspark.sql.functions.negative
pyspark.sql.functions.pi
pyspark.sql.functions.pmod
pyspark.sql.functions.positive
pyspark.sql.functions.pow
pyspark.sql.functions.power
pyspark.sql.functions.radians
pyspark.sql.functions.rand
pyspark.sql.functions.randn
pyspark.sql.functions.rint
pyspark.sql.functions.round
pyspark.sql.functions.sec
pyspark.sql.functions.sign
pyspark.sql.functions.signum
pyspark.sql.functions.sin
pyspark.sql.functions.sinh
pyspark.sql.functions.sqrt
pyspark.sql.functions.tan
pyspark.sql.functions.tanh
pyspark.sql.functions.try_add
pyspark.sql.functions.try_divide
pyspark.sql.functions.try_multiply
pyspark.sql.functions.try_remainder
pyspark.sql.functions.try_subtract
pyspark.sql.functions.unhex
pyspark.sql.functions.width_bucket
pyspark.sql.functions.ascii
pyspark.sql.functions.base64
pyspark.sql.functions.bit_length
pyspark.sql.functions.btrim
pyspark.sql.functions.char
pyspark.sql.functions.char_length
pyspark.sql.functions.character_length
pyspark.sql.functions.collate
pyspark.sql.functions.collation
pyspark.sql.functions.concat_ws
pyspark.sql.functions.contains
pyspark.sql.functions.decode
pyspark.sql.functions.elt
pyspark.sql.functions.encode
pyspark.sql.functions.endswith
pyspark.sql.functions.find_in_set
pyspark.sql.functions.format_number
pyspark.sql.functions.format_string
pyspark.sql.functions.initcap
pyspark.sql.functions.instr
pyspark.sql.functions.lcase
pyspark.sql.functions.left
pyspark.sql.functions.length
pyspark.sql.functions.levenshtein
pyspark.sql.functions.locate
pyspark.sql.functions.lower
pyspark.sql.functions.lpad
pyspark.sql.functions.ltrim
pyspark.sql.functions.mask
pyspark.sql.functions.octet_length
pyspark.sql.functions.overlay
pyspark.sql.functions.position
pyspark.sql.functions.printf
pyspark.sql.functions.regexp_count
pyspark.sql.functions.regexp_extract
pyspark.sql.functions.regexp_extract_all
pyspark.sql.functions.regexp_instr
pyspark.sql.functions.regexp_replace
pyspark.sql.functions.regexp_substr
pyspark.sql.functions.repeat
pyspark.sql.functions.replace
pyspark.sql.functions.right
pyspark.sql.functions.rpad
pyspark.sql.functions.rtrim
pyspark.sql.functions.sentences
pyspark.sql.functions.soundex
pyspark.sql.functions.split
pyspark.sql.functions.split_part
pyspark.sql.functions.startswith
pyspark.sql.functions.substr
pyspark.sql.functions.substring
pyspark.sql.functions.substring_index
pyspark.sql.functions.to_binary
pyspark.sql.functions.to_char
pyspark.sql.functions.to_number
pyspark.sql.functions.to_varchar
pyspark.sql.functions.translate
pyspark.sql.functions.trim
pyspark.sql.functions.try_to_binary
pyspark.sql.functions.try_to_number
pyspark.sql.functions.ucase
pyspark.sql.functions.unbase64
pyspark.sql.functions.upper
pyspark.sql.functions.bit_count
pyspark.sql.functions.bit_get
pyspark.sql.functions.bitwise_not
pyspark.sql.functions.getbit
pyspark.sql.functions.shiftleft
pyspark.sql.functions.shiftright
pyspark.sql.functions.shiftrightunsigned
pyspark.sql.functions.add_months
pyspark.sql.functions.convert_timezone
pyspark.sql.functions.curdate
pyspark.sql.functions.current_date
pyspark.sql.functions.current_timestamp
pyspark.sql.functions.current_timezone
pyspark.sql.functions.date_add
pyspark.sql.functions.date_diff
pyspark.sql.functions.date_format
pyspark.sql.functions.date_from_unix_date
pyspark.sql.functions.date_part
pyspark.sql.functions.date_sub
pyspark.sql.functions.date_trunc
pyspark.sql.functions.dateadd
pyspark.sql.functions.datediff
pyspark.sql.functions.datepart
pyspark.sql.functions.day
pyspark.sql.functions.dayname
pyspark.sql.functions.dayofmonth
pyspark.sql.functions.dayofweek
pyspark.sql.functions.dayofyear
pyspark.sql.functions.extract
pyspark.sql.functions.from_unixtime
pyspark.sql.functions.from_utc_timestamp
pyspark.sql.functions.hour
pyspark.sql.functions.last_day
pyspark.sql.functions.localtimestamp
pyspark.sql.functions.make_date
pyspark.sql.functions.make_dt_interval
pyspark.sql.functions.make_interval
pyspark.sql.functions.make_timestamp
pyspark.sql.functions.make_timestamp_ltz
pyspark.sql.functions.make_timestamp_ntz
pyspark.sql.functions.make_ym_interval
pyspark.sql.functions.minute
pyspark.sql.functions.month
pyspark.sql.functions.monthname
pyspark.sql.functions.months_between
pyspark.sql.functions.next_day
pyspark.sql.functions.now
pyspark.sql.functions.quarter
pyspark.sql.functions.second
pyspark.sql.functions.session_window
pyspark.sql.functions.timestamp_add
pyspark.sql.functions.timestamp_diff
pyspark.sql.functions.timestamp_micros
pyspark.sql.functions.timestamp_millis
pyspark.sql.functions.timestamp_seconds
pyspark.sql.functions.to_date
pyspark.sql.functions.to_timestamp
pyspark.sql.functions.to_timestamp_ltz
pyspark.sql.functions.to_timestamp_ntz
pyspark.sql.functions.to_unix_timestamp
pyspark.sql.functions.to_utc_timestamp
pyspark.sql.functions.trunc
pyspark.sql.functions.try_to_timestamp
pyspark.sql.functions.unix_date
pyspark.sql.functions.unix_micros
pyspark.sql.functions.unix_millis
pyspark.sql.functions.unix_seconds
pyspark.sql.functions.unix_timestamp
pyspark.sql.functions.weekday
pyspark.sql.functions.weekofyear
pyspark.sql.functions.window
pyspark.sql.functions.window_time
pyspark.sql.functions.year
pyspark.sql.functions.crc32
pyspark.sql.functions.hash
pyspark.sql.functions.md5
pyspark.sql.functions.sha
pyspark.sql.functions.sha1
pyspark.sql.functions.sha2
pyspark.sql.functions.xxhash64
pyspark.sql.functions.aggregate
pyspark.sql.functions.array_sort
pyspark.sql.functions.cardinality
pyspark.sql.functions.concat
pyspark.sql.functions.element_at
pyspark.sql.functions.exists
pyspark.sql.functions.filter
pyspark.sql.functions.forall
pyspark.sql.functions.map_filter
pyspark.sql.functions.map_zip_with
pyspark.sql.functions.reduce
pyspark.sql.functions.reverse
pyspark.sql.functions.size
pyspark.sql.functions.transform
pyspark.sql.functions.transform_keys
pyspark.sql.functions.transform_values
pyspark.sql.functions.try_element_at
pyspark.sql.functions.zip_with
pyspark.sql.functions.array
pyspark.sql.functions.array_append
pyspark.sql.functions.array_compact
pyspark.sql.functions.array_contains
pyspark.sql.functions.array_distinct
pyspark.sql.functions.array_except
pyspark.sql.functions.array_insert
pyspark.sql.functions.array_intersect
pyspark.sql.functions.array_join
pyspark.sql.functions.array_max
pyspark.sql.functions.array_min
pyspark.sql.functions.array_position
pyspark.sql.functions.array_prepend
pyspark.sql.functions.array_remove
pyspark.sql.functions.array_repeat
pyspark.sql.functions.array_size
pyspark.sql.functions.array_union
pyspark.sql.functions.arrays_overlap
pyspark.sql.functions.arrays_zip
pyspark.sql.functions.flatten
pyspark.sql.functions.get
pyspark.sql.functions.sequence
pyspark.sql.functions.shuffle
pyspark.sql.functions.slice
pyspark.sql.functions.sort_array
pyspark.sql.functions.named_struct
pyspark.sql.functions.struct
pyspark.sql.functions.create_map
pyspark.sql.functions.map_concat
pyspark.sql.functions.map_contains_key
pyspark.sql.functions.map_entries
pyspark.sql.functions.map_from_arrays
pyspark.sql.functions.map_from_entries
pyspark.sql.functions.map_keys
pyspark.sql.functions.map_values
pyspark.sql.functions.str_to_map
pyspark.sql.functions.any_value
pyspark.sql.functions.approx_count_distinct
pyspark.sql.functions.approx_percentile
pyspark.sql.functions.array_agg
pyspark.sql.functions.avg
pyspark.sql.functions.bit_and
pyspark.sql.functions.bit_or
pyspark.sql.functions.bit_xor
pyspark.sql.functions.bitmap_construct_agg
pyspark.sql.functions.bitmap_or_agg
pyspark.sql.functions.bool_and
pyspark.sql.functions.bool_or
pyspark.sql.functions.collect_list
pyspark.sql.functions.collect_set
pyspark.sql.functions.corr
pyspark.sql.functions.count
pyspark.sql.functions.count_distinct
pyspark.sql.functions.count_if
pyspark.sql.functions.count_min_sketch
pyspark.sql.functions.covar_pop
pyspark.sql.functions.covar_samp
pyspark.sql.functions.every
pyspark.sql.functions.first
pyspark.sql.functions.first_value
pyspark.sql.functions.grouping
pyspark.sql.functions.grouping_id
pyspark.sql.functions.histogram_numeric
pyspark.sql.functions.hll_sketch_agg
pyspark.sql.functions.hll_union_agg
pyspark.sql.functions.kurtosis
pyspark.sql.functions.last
pyspark.sql.functions.last_value
pyspark.sql.functions.max
pyspark.sql.functions.max_by
pyspark.sql.functions.mean
pyspark.sql.functions.median
pyspark.sql.functions.min
pyspark.sql.functions.min_by
pyspark.sql.functions.mode
pyspark.sql.functions.percentile
pyspark.sql.functions.percentile_approx
pyspark.sql.functions.product
pyspark.sql.functions.regr_avgx
pyspark.sql.functions.regr_avgy
pyspark.sql.functions.regr_count
pyspark.sql.functions.regr_intercept
pyspark.sql.functions.regr_r2
pyspark.sql.functions.regr_slope
pyspark.sql.functions.regr_sxx
pyspark.sql.functions.regr_sxy
pyspark.sql.functions.regr_syy
pyspark.sql.functions.skewness
pyspark.sql.functions.some
pyspark.sql.functions.std
pyspark.sql.functions.stddev
pyspark.sql.functions.stddev_pop
pyspark.sql.functions.stddev_samp
pyspark.sql.functions.sum
pyspark.sql.functions.sum_distinct
pyspark.sql.functions.try_avg
pyspark.sql.functions.try_sum
pyspark.sql.functions.var_pop
pyspark.sql.functions.var_samp
pyspark.sql.functions.variance
pyspark.sql.functions.cume_dist
pyspark.sql.functions.dense_rank
pyspark.sql.functions.lag
pyspark.sql.functions.lead
pyspark.sql.functions.nth_value
pyspark.sql.functions.ntile
pyspark.sql.functions.percent_rank
pyspark.sql.functions.rank
pyspark.sql.functions.row_number
pyspark.sql.functions.explode
pyspark.sql.functions.explode_outer
pyspark.sql.functions.inline
pyspark.sql.functions.inline_outer
pyspark.sql.functions.posexplode
pyspark.sql.functions.posexplode_outer
pyspark.sql.functions.stack
pyspark.sql.functions.partitioning.years
pyspark.sql.functions.partitioning.months
pyspark.sql.functions.partitioning.days
pyspark.sql.functions.partitioning.hours
pyspark.sql.functions.partitioning.bucket
pyspark.sql.functions.from_csv
pyspark.sql.functions.schema_of_csv
pyspark.sql.functions.to_csv
pyspark.sql.functions.from_json
pyspark.sql.functions.get_json_object
pyspark.sql.functions.json_array_length
pyspark.sql.functions.json_object_keys
pyspark.sql.functions.json_tuple
pyspark.sql.functions.schema_of_json
pyspark.sql.functions.to_json
pyspark.sql.functions.is_variant_null
pyspark.sql.functions.parse_json
pyspark.sql.functions.schema_of_variant
pyspark.sql.functions.schema_of_variant_agg
pyspark.sql.functions.try_variant_get
pyspark.sql.functions.variant_get
pyspark.sql.functions.try_parse_json
pyspark.sql.functions.from_xml
pyspark.sql.functions.schema_of_xml
pyspark.sql.functions.to_xml
pyspark.sql.functions.xpath
pyspark.sql.functions.xpath_boolean
pyspark.sql.functions.xpath_double
pyspark.sql.functions.xpath_float
pyspark.sql.functions.xpath_int
pyspark.sql.functions.xpath_long
pyspark.sql.functions.xpath_number
pyspark.sql.functions.xpath_short
pyspark.sql.functions.xpath_string
pyspark.sql.functions.parse_url
pyspark.sql.functions.url_decode
pyspark.sql.functions.url_encode
pyspark.sql.functions.aes_decrypt
pyspark.sql.functions.aes_encrypt
pyspark.sql.functions.assert_true
pyspark.sql.functions.bitmap_bit_position
pyspark.sql.functions.bitmap_bucket_number
pyspark.sql.functions.bitmap_count
pyspark.sql.functions.current_catalog
pyspark.sql.functions.current_database
pyspark.sql.functions.current_schema
pyspark.sql.functions.current_user
pyspark.sql.functions.hll_sketch_estimate
pyspark.sql.functions.hll_union
pyspark.sql.functions.input_file_block_length
pyspark.sql.functions.input_file_block_start
pyspark.sql.functions.input_file_name
pyspark.sql.functions.java_method
pyspark.sql.functions.monotonically_increasing_id
pyspark.sql.functions.raise_error
pyspark.sql.functions.reflect
pyspark.sql.functions.session_user
pyspark.sql.functions.spark_partition_id
pyspark.sql.functions.try_aes_decrypt
pyspark.sql.functions.try_reflect
pyspark.sql.functions.typeof
pyspark.sql.functions.user
pyspark.sql.functions.version
pyspark.sql.functions.call_udf
pyspark.sql.functions.pandas_udf
pyspark.sql.functions.udf
pyspark.sql.functions.udtf
pyspark.sql.functions.unwrap_udt
Window
pyspark.sql.Window.currentRow
pyspark.sql.Window.orderBy
pyspark.sql.Window.partitionBy
pyspark.sql.Window.rangeBetween
pyspark.sql.Window.rowsBetween
pyspark.sql.Window.unboundedFollowing
pyspark.sql.Window.unboundedPreceding
pyspark.sql.WindowSpec.orderBy
pyspark.sql.WindowSpec.partitionBy
pyspark.sql.WindowSpec.rangeBetween
pyspark.sql.WindowSpec.rowsBetween
Grouping
pyspark.sql.GroupedData.agg
pyspark.sql.GroupedData.apply
pyspark.sql.GroupedData.applyInArrow
pyspark.sql.GroupedData.applyInPandas
pyspark.sql.GroupedData.applyInPandasWithState
pyspark.sql.GroupedData.avg
pyspark.sql.GroupedData.cogroup
pyspark.sql.GroupedData.count
pyspark.sql.GroupedData.max
pyspark.sql.GroupedData.mean
pyspark.sql.GroupedData.min
pyspark.sql.GroupedData.pivot
pyspark.sql.GroupedData.sum
pyspark.sql.PandasCogroupedOps.applyInArrow
pyspark.sql.PandasCogroupedOps.applyInPandas
Catalog
pyspark.sql.Catalog.cacheTable
pyspark.sql.Catalog.clearCache
pyspark.sql.Catalog.createExternalTable
pyspark.sql.Catalog.createTable
pyspark.sql.Catalog.currentCatalog
pyspark.sql.Catalog.currentDatabase
pyspark.sql.Catalog.databaseExists
pyspark.sql.Catalog.dropGlobalTempView
pyspark.sql.Catalog.dropTempView
pyspark.sql.Catalog.functionExists
pyspark.sql.Catalog.getDatabase
pyspark.sql.Catalog.getFunction
pyspark.sql.Catalog.getTable
pyspark.sql.Catalog.isCached
pyspark.sql.Catalog.listCatalogs
pyspark.sql.Catalog.listColumns
pyspark.sql.Catalog.listDatabases
pyspark.sql.Catalog.listFunctions
pyspark.sql.Catalog.listTables
pyspark.sql.Catalog.recoverPartitions
pyspark.sql.Catalog.refreshByPath
pyspark.sql.Catalog.refreshTable
pyspark.sql.Catalog.registerFunction
pyspark.sql.Catalog.setCurrentCatalog
pyspark.sql.Catalog.setCurrentDatabase
pyspark.sql.Catalog.tableExists
pyspark.sql.Catalog.uncacheTable
Avro
pyspark.sql.avro.functions.from_avro
pyspark.sql.avro.functions.to_avro
Observation
pyspark.sql.Observation.get
UDF
pyspark.sql.udf.UserDefinedFunction.asNondeterministic
pyspark.sql.udf.UserDefinedFunction.returnType
pyspark.sql.UDFRegistration.register
pyspark.sql.UDFRegistration.registerJavaFunction
pyspark.sql.UDFRegistration.registerJavaUDAF
UDTF
pyspark.sql.udtf.UserDefinedTableFunction.asDeterministic
pyspark.sql.udtf.UserDefinedTableFunction.returnType
pyspark.sql.UDTFRegistration.register
VariantVal
pyspark.sql.VariantVal.toPython
pyspark.sql.VariantVal.toJson
Protobuf
pyspark.sql.protobuf.functions.from_protobuf
pyspark.sql.protobuf.functions.to_protobuf
Python Data Source
pyspark.sql.datasource.DataSource.name
pyspark.sql.datasource.DataSource.reader
pyspark.sql.datasource.DataSource.schema
pyspark.sql.datasource.DataSource.streamReader
pyspark.sql.datasource.DataSource.writer
pyspark.sql.datasource.DataSourceReader.partitions
pyspark.sql.datasource.DataSourceReader.read
pyspark.sql.datasource.DataSourceRegistration.register
pyspark.sql.datasource.DataSourceStreamReader.commit
pyspark.sql.datasource.DataSourceStreamReader.initialOffset
pyspark.sql.datasource.DataSourceStreamReader.latestOffset
pyspark.sql.datasource.DataSourceStreamReader.partitions
pyspark.sql.datasource.DataSourceStreamReader.read
pyspark.sql.datasource.DataSourceStreamReader.stop
pyspark.sql.datasource.DataSourceWriter.abort
pyspark.sql.datasource.DataSourceWriter.commit
pyspark.sql.datasource.DataSourceWriter.write
Pandas API on Spark
Input/Output
pyspark.pandas.range
pyspark.pandas.read_table
pyspark.pandas.DataFrame.to_table
pyspark.pandas.read_delta
pyspark.pandas.DataFrame.to_delta
pyspark.pandas.read_parquet
pyspark.pandas.DataFrame.to_parquet
pyspark.pandas.read_orc
pyspark.pandas.DataFrame.to_orc
pyspark.pandas.read_spark_io
pyspark.pandas.DataFrame.spark.to_spark_io
pyspark.pandas.read_csv
pyspark.pandas.DataFrame.to_csv
pyspark.pandas.read_clipboard
pyspark.pandas.DataFrame.to_clipboard
pyspark.pandas.read_excel
pyspark.pandas.DataFrame.to_excel
pyspark.pandas.read_json
pyspark.pandas.DataFrame.to_json
pyspark.pandas.read_html
pyspark.pandas.DataFrame.to_html
pyspark.pandas.read_sql_table
pyspark.pandas.read_sql_query
pyspark.pandas.read_sql
General functions
pyspark.pandas.melt
pyspark.pandas.merge
pyspark.pandas.merge_asof
pyspark.pandas.get_dummies
pyspark.pandas.concat
pyspark.pandas.sql
pyspark.pandas.broadcast
pyspark.pandas.isna
pyspark.pandas.isnull
pyspark.pandas.notna
pyspark.pandas.notnull
pyspark.pandas.to_numeric
pyspark.pandas.to_datetime
pyspark.pandas.date_range
pyspark.pandas.to_timedelta
pyspark.pandas.timedelta_range
Series
pyspark.pandas.Series
pyspark.pandas.Series.index
pyspark.pandas.Series.dtype
pyspark.pandas.Series.dtypes
pyspark.pandas.Series.ndim
pyspark.pandas.Series.name
pyspark.pandas.Series.shape
pyspark.pandas.Series.axes
pyspark.pandas.Series.size
pyspark.pandas.Series.empty
pyspark.pandas.Series.T
pyspark.pandas.Series.hasnans
pyspark.pandas.Series.values
pyspark.pandas.Series.astype
pyspark.pandas.Series.copy
pyspark.pandas.Series.bool
pyspark.pandas.Series.at
pyspark.pandas.Series.iat
pyspark.pandas.Series.loc
pyspark.pandas.Series.iloc
pyspark.pandas.Series.keys
pyspark.pandas.Series.pop
pyspark.pandas.Series.items
pyspark.pandas.Series.item
pyspark.pandas.Series.xs
pyspark.pandas.Series.get
pyspark.pandas.Series.add
pyspark.pandas.Series.div
pyspark.pandas.Series.mul
pyspark.pandas.Series.radd
pyspark.pandas.Series.rdiv
pyspark.pandas.Series.rmul
pyspark.pandas.Series.rsub
pyspark.pandas.Series.rtruediv
pyspark.pandas.Series.sub
pyspark.pandas.Series.truediv
pyspark.pandas.Series.pow
pyspark.pandas.Series.rpow
pyspark.pandas.Series.mod
pyspark.pandas.Series.rmod
pyspark.pandas.Series.floordiv
pyspark.pandas.Series.rfloordiv
pyspark.pandas.Series.divmod
pyspark.pandas.Series.rdivmod
pyspark.pandas.Series.combine_first
pyspark.pandas.Series.lt
pyspark.pandas.Series.gt
pyspark.pandas.Series.le
pyspark.pandas.Series.ge
pyspark.pandas.Series.ne
pyspark.pandas.Series.eq
pyspark.pandas.Series.product
pyspark.pandas.Series.dot
pyspark.pandas.Series.apply
pyspark.pandas.Series.agg
pyspark.pandas.Series.aggregate
pyspark.pandas.Series.transform
pyspark.pandas.Series.map
pyspark.pandas.Series.groupby
pyspark.pandas.Series.rolling
pyspark.pandas.Series.expanding
pyspark.pandas.Series.pipe
pyspark.pandas.Series.abs
pyspark.pandas.Series.all
pyspark.pandas.Series.any
pyspark.pandas.Series.autocorr
pyspark.pandas.Series.between
pyspark.pandas.Series.clip
pyspark.pandas.Series.corr
pyspark.pandas.Series.count
pyspark.pandas.Series.cov
pyspark.pandas.Series.cummax
pyspark.pandas.Series.cummin
pyspark.pandas.Series.cumsum
pyspark.pandas.Series.cumprod
pyspark.pandas.Series.describe
pyspark.pandas.Series.ewm
pyspark.pandas.Series.filter
pyspark.pandas.Series.kurt
pyspark.pandas.Series.max
pyspark.pandas.Series.mean
pyspark.pandas.Series.min
pyspark.pandas.Series.mode
pyspark.pandas.Series.nlargest
pyspark.pandas.Series.nsmallest
pyspark.pandas.Series.pct_change
pyspark.pandas.Series.prod
pyspark.pandas.Series.nunique
pyspark.pandas.Series.is_unique
pyspark.pandas.Series.quantile
pyspark.pandas.Series.rank
pyspark.pandas.Series.sem
pyspark.pandas.Series.skew
pyspark.pandas.Series.std
pyspark.pandas.Series.sum
pyspark.pandas.Series.median
pyspark.pandas.Series.var
pyspark.pandas.Series.kurtosis
pyspark.pandas.Series.unique
pyspark.pandas.Series.value_counts
pyspark.pandas.Series.round
pyspark.pandas.Series.diff
pyspark.pandas.Series.is_monotonic_increasing
pyspark.pandas.Series.is_monotonic_decreasing
pyspark.pandas.Series.align
pyspark.pandas.Series.drop
pyspark.pandas.Series.droplevel
pyspark.pandas.Series.drop_duplicates
pyspark.pandas.Series.duplicated
pyspark.pandas.Series.equals
pyspark.pandas.Series.add_prefix
pyspark.pandas.Series.add_suffix
pyspark.pandas.Series.first
pyspark.pandas.Series.head
pyspark.pandas.Series.idxmax
pyspark.pandas.Series.idxmin
pyspark.pandas.Series.isin
pyspark.pandas.Series.last
pyspark.pandas.Series.rename
pyspark.pandas.Series.rename_axis
pyspark.pandas.Series.reindex
pyspark.pandas.Series.reindex_like
pyspark.pandas.Series.reset_index
pyspark.pandas.Series.sample
pyspark.pandas.Series.searchsorted
pyspark.pandas.Series.swaplevel
pyspark.pandas.Series.swapaxes
pyspark.pandas.Series.take
pyspark.pandas.Series.tail
pyspark.pandas.Series.where
pyspark.pandas.Series.mask
pyspark.pandas.Series.truncate
pyspark.pandas.Series.backfill
pyspark.pandas.Series.bfill
pyspark.pandas.Series.ffill
pyspark.pandas.Series.isna
pyspark.pandas.Series.isnull
pyspark.pandas.Series.notna
pyspark.pandas.Series.notnull
pyspark.pandas.Series.pad
pyspark.pandas.Series.dropna
pyspark.pandas.Series.fillna
pyspark.pandas.Series.interpolate
pyspark.pandas.Series.argsort
pyspark.pandas.Series.argmin
pyspark.pandas.Series.argmax
pyspark.pandas.Series.sort_index
pyspark.pandas.Series.sort_values
pyspark.pandas.Series.unstack
pyspark.pandas.Series.explode
pyspark.pandas.Series.repeat
pyspark.pandas.Series.squeeze
pyspark.pandas.Series.factorize
pyspark.pandas.Series.compare
pyspark.pandas.Series.replace
pyspark.pandas.Series.update
pyspark.pandas.Series.asof
pyspark.pandas.Series.resample
pyspark.pandas.Series.shift
pyspark.pandas.Series.first_valid_index
pyspark.pandas.Series.last_valid_index
pyspark.pandas.Series.at_time
pyspark.pandas.Series.between_time
pyspark.pandas.Series.spark.column
pyspark.pandas.Series.spark.transform
pyspark.pandas.Series.spark.apply
pyspark.pandas.Series.dt.date
pyspark.pandas.Series.dt.year
pyspark.pandas.Series.dt.month
pyspark.pandas.Series.dt.day
pyspark.pandas.Series.dt.hour
pyspark.pandas.Series.dt.minute
pyspark.pandas.Series.dt.second
pyspark.pandas.Series.dt.microsecond
pyspark.pandas.Series.dt.isocalendar
pyspark.pandas.Series.dt.dayofweek
pyspark.pandas.Series.dt.weekday
pyspark.pandas.Series.dt.dayofyear
pyspark.pandas.Series.dt.quarter
pyspark.pandas.Series.dt.is_month_start
pyspark.pandas.Series.dt.is_month_end
pyspark.pandas.Series.dt.is_quarter_start
pyspark.pandas.Series.dt.is_quarter_end
pyspark.pandas.Series.dt.is_year_start
pyspark.pandas.Series.dt.is_year_end
pyspark.pandas.Series.dt.is_leap_year
pyspark.pandas.Series.dt.daysinmonth
pyspark.pandas.Series.dt.days_in_month
pyspark.pandas.Series.dt.normalize
pyspark.pandas.Series.dt.strftime
pyspark.pandas.Series.dt.round
pyspark.pandas.Series.dt.floor
pyspark.pandas.Series.dt.ceil
pyspark.pandas.Series.dt.month_name
pyspark.pandas.Series.dt.day_name
pyspark.pandas.Series.str.capitalize
pyspark.pandas.Series.str.cat
pyspark.pandas.Series.str.center
pyspark.pandas.Series.str.contains
pyspark.pandas.Series.str.count
pyspark.pandas.Series.str.decode
pyspark.pandas.Series.str.encode
pyspark.pandas.Series.str.endswith
pyspark.pandas.Series.str.extract
pyspark.pandas.Series.str.extractall
pyspark.pandas.Series.str.find
pyspark.pandas.Series.str.findall
pyspark.pandas.Series.str.get
pyspark.pandas.Series.str.get_dummies
pyspark.pandas.Series.str.index
pyspark.pandas.Series.str.isalnum
pyspark.pandas.Series.str.isalpha
pyspark.pandas.Series.str.isdigit
pyspark.pandas.Series.str.isspace
pyspark.pandas.Series.str.islower
pyspark.pandas.Series.str.isupper
pyspark.pandas.Series.str.istitle
pyspark.pandas.Series.str.isnumeric
pyspark.pandas.Series.str.isdecimal
pyspark.pandas.Series.str.join
pyspark.pandas.Series.str.len
pyspark.pandas.Series.str.ljust
pyspark.pandas.Series.str.lower
pyspark.pandas.Series.str.lstrip
pyspark.pandas.Series.str.match
pyspark.pandas.Series.str.normalize
pyspark.pandas.Series.str.pad
pyspark.pandas.Series.str.partition
pyspark.pandas.Series.str.repeat
pyspark.pandas.Series.str.replace
pyspark.pandas.Series.str.rfind
pyspark.pandas.Series.str.rindex
pyspark.pandas.Series.str.rjust
pyspark.pandas.Series.str.rpartition
pyspark.pandas.Series.str.rsplit
pyspark.pandas.Series.str.rstrip
pyspark.pandas.Series.str.slice
pyspark.pandas.Series.str.slice_replace
pyspark.pandas.Series.str.split
pyspark.pandas.Series.str.startswith
pyspark.pandas.Series.str.strip
pyspark.pandas.Series.str.swapcase
pyspark.pandas.Series.str.title
pyspark.pandas.Series.str.translate
pyspark.pandas.Series.str.upper
pyspark.pandas.Series.str.wrap
pyspark.pandas.Series.str.zfill
pyspark.pandas.Series.cat.categories
pyspark.pandas.Series.cat.ordered
pyspark.pandas.Series.cat.codes
pyspark.pandas.Series.cat.rename_categories
pyspark.pandas.Series.cat.reorder_categories
pyspark.pandas.Series.cat.add_categories
pyspark.pandas.Series.cat.remove_categories
pyspark.pandas.Series.cat.remove_unused_categories
pyspark.pandas.Series.cat.set_categories
pyspark.pandas.Series.cat.as_ordered
pyspark.pandas.Series.cat.as_unordered
pyspark.pandas.Series.plot.area
pyspark.pandas.Series.plot.bar
pyspark.pandas.Series.plot.barh
pyspark.pandas.Series.plot.box
pyspark.pandas.Series.plot.density
pyspark.pandas.Series.plot.hist
pyspark.pandas.Series.plot.line
pyspark.pandas.Series.plot.pie
pyspark.pandas.Series.plot.kde
pyspark.pandas.Series.hist
pyspark.pandas.Series.to_pandas
pyspark.pandas.Series.to_numpy
pyspark.pandas.Series.to_list
pyspark.pandas.Series.to_string
pyspark.pandas.Series.to_dict
pyspark.pandas.Series.to_clipboard
pyspark.pandas.Series.to_latex
pyspark.pandas.Series.to_markdown
pyspark.pandas.Series.to_json
pyspark.pandas.Series.to_csv
pyspark.pandas.Series.to_excel
pyspark.pandas.Series.to_hdf
pyspark.pandas.Series.to_frame
pyspark.pandas.Series.pandas_on_spark.transform_batch
DataFrame
pyspark.pandas.DataFrame
pyspark.pandas.DataFrame.index
pyspark.pandas.DataFrame.info
pyspark.pandas.DataFrame.columns
pyspark.pandas.DataFrame.empty
pyspark.pandas.DataFrame.dtypes
pyspark.pandas.DataFrame.shape
pyspark.pandas.DataFrame.axes
pyspark.pandas.DataFrame.ndim
pyspark.pandas.DataFrame.size
pyspark.pandas.DataFrame.select_dtypes
pyspark.pandas.DataFrame.values
pyspark.pandas.DataFrame.copy
pyspark.pandas.DataFrame.isna
pyspark.pandas.DataFrame.astype
pyspark.pandas.DataFrame.isnull
pyspark.pandas.DataFrame.notna
pyspark.pandas.DataFrame.notnull
pyspark.pandas.DataFrame.bool
pyspark.pandas.DataFrame.at
pyspark.pandas.DataFrame.iat
pyspark.pandas.DataFrame.head
pyspark.pandas.DataFrame.idxmax
pyspark.pandas.DataFrame.idxmin
pyspark.pandas.DataFrame.loc
pyspark.pandas.DataFrame.iloc
pyspark.pandas.DataFrame.insert
pyspark.pandas.DataFrame.items
pyspark.pandas.DataFrame.iterrows
pyspark.pandas.DataFrame.itertuples
pyspark.pandas.DataFrame.keys
pyspark.pandas.DataFrame.pop
pyspark.pandas.DataFrame.tail
pyspark.pandas.DataFrame.xs
pyspark.pandas.DataFrame.get
pyspark.pandas.DataFrame.where
pyspark.pandas.DataFrame.mask
pyspark.pandas.DataFrame.query
pyspark.pandas.DataFrame.add
pyspark.pandas.DataFrame.radd
pyspark.pandas.DataFrame.div
pyspark.pandas.DataFrame.rdiv
pyspark.pandas.DataFrame.truediv
pyspark.pandas.DataFrame.rtruediv
pyspark.pandas.DataFrame.mul
pyspark.pandas.DataFrame.rmul
pyspark.pandas.DataFrame.sub
pyspark.pandas.DataFrame.rsub
pyspark.pandas.DataFrame.pow
pyspark.pandas.DataFrame.rpow
pyspark.pandas.DataFrame.mod
pyspark.pandas.DataFrame.rmod
pyspark.pandas.DataFrame.floordiv
pyspark.pandas.DataFrame.rfloordiv
pyspark.pandas.DataFrame.lt
pyspark.pandas.DataFrame.gt
pyspark.pandas.DataFrame.le
pyspark.pandas.DataFrame.ge
pyspark.pandas.DataFrame.ne
pyspark.pandas.DataFrame.eq
pyspark.pandas.DataFrame.dot
pyspark.pandas.DataFrame.combine_first
pyspark.pandas.DataFrame.apply
pyspark.pandas.DataFrame.applymap
pyspark.pandas.DataFrame.map
pyspark.pandas.DataFrame.pipe
pyspark.pandas.DataFrame.agg
pyspark.pandas.DataFrame.aggregate
pyspark.pandas.DataFrame.groupby
pyspark.pandas.DataFrame.rolling
pyspark.pandas.DataFrame.expanding
pyspark.pandas.DataFrame.transform
pyspark.pandas.DataFrame.abs
pyspark.pandas.DataFrame.all
pyspark.pandas.DataFrame.any
pyspark.pandas.DataFrame.clip
pyspark.pandas.DataFrame.corr
pyspark.pandas.DataFrame.corrwith
pyspark.pandas.DataFrame.count
pyspark.pandas.DataFrame.cov
pyspark.pandas.DataFrame.describe
pyspark.pandas.DataFrame.ewm
pyspark.pandas.DataFrame.kurt
pyspark.pandas.DataFrame.kurtosis
pyspark.pandas.DataFrame.max
pyspark.pandas.DataFrame.mean
pyspark.pandas.DataFrame.min
pyspark.pandas.DataFrame.median
pyspark.pandas.DataFrame.mode
pyspark.pandas.DataFrame.pct_change
pyspark.pandas.DataFrame.prod
pyspark.pandas.DataFrame.product
pyspark.pandas.DataFrame.quantile
pyspark.pandas.DataFrame.rank
pyspark.pandas.DataFrame.nunique
pyspark.pandas.DataFrame.sem
pyspark.pandas.DataFrame.skew
pyspark.pandas.DataFrame.sum
pyspark.pandas.DataFrame.std
pyspark.pandas.DataFrame.var
pyspark.pandas.DataFrame.cummin
pyspark.pandas.DataFrame.cummax
pyspark.pandas.DataFrame.cumsum
pyspark.pandas.DataFrame.cumprod
pyspark.pandas.DataFrame.round
pyspark.pandas.DataFrame.diff
pyspark.pandas.DataFrame.eval
pyspark.pandas.DataFrame.add_prefix
pyspark.pandas.DataFrame.add_suffix
pyspark.pandas.DataFrame.align
pyspark.pandas.DataFrame.at_time
pyspark.pandas.DataFrame.between_time
pyspark.pandas.DataFrame.drop
pyspark.pandas.DataFrame.droplevel
pyspark.pandas.DataFrame.drop_duplicates
pyspark.pandas.DataFrame.duplicated
pyspark.pandas.DataFrame.equals
pyspark.pandas.DataFrame.filter
pyspark.pandas.DataFrame.first
pyspark.pandas.DataFrame.head
pyspark.pandas.DataFrame.last
pyspark.pandas.DataFrame.reindex
pyspark.pandas.DataFrame.reindex_like
pyspark.pandas.DataFrame.rename
pyspark.pandas.DataFrame.rename_axis
pyspark.pandas.DataFrame.reset_index
pyspark.pandas.DataFrame.set_index
pyspark.pandas.DataFrame.swapaxes
pyspark.pandas.DataFrame.swaplevel
pyspark.pandas.DataFrame.take
pyspark.pandas.DataFrame.isin
pyspark.pandas.DataFrame.sample
pyspark.pandas.DataFrame.truncate
pyspark.pandas.DataFrame.backfill
pyspark.pandas.DataFrame.dropna
pyspark.pandas.DataFrame.fillna
pyspark.pandas.DataFrame.replace
pyspark.pandas.DataFrame.bfill
pyspark.pandas.DataFrame.ffill
pyspark.pandas.DataFrame.interpolate
pyspark.pandas.DataFrame.pad
pyspark.pandas.DataFrame.pivot_table
pyspark.pandas.DataFrame.pivot
pyspark.pandas.DataFrame.sort_index
pyspark.pandas.DataFrame.sort_values
pyspark.pandas.DataFrame.nlargest
pyspark.pandas.DataFrame.nsmallest
pyspark.pandas.DataFrame.stack
pyspark.pandas.DataFrame.unstack
pyspark.pandas.DataFrame.melt
pyspark.pandas.DataFrame.explode
pyspark.pandas.DataFrame.squeeze
pyspark.pandas.DataFrame.T
pyspark.pandas.DataFrame.transpose
pyspark.pandas.DataFrame.assign
pyspark.pandas.DataFrame.merge
pyspark.pandas.DataFrame.join
pyspark.pandas.DataFrame.update
pyspark.pandas.DataFrame.resample
pyspark.pandas.DataFrame.shift
pyspark.pandas.DataFrame.first_valid_index
pyspark.pandas.DataFrame.last_valid_index
pyspark.pandas.DataFrame.from_dict
pyspark.pandas.DataFrame.from_records
pyspark.pandas.DataFrame.to_table
pyspark.pandas.DataFrame.to_delta
pyspark.pandas.DataFrame.to_parquet
pyspark.pandas.DataFrame.to_csv
pyspark.pandas.DataFrame.to_orc
pyspark.pandas.DataFrame.to_pandas
pyspark.pandas.DataFrame.to_html
pyspark.pandas.DataFrame.to_numpy
pyspark.pandas.DataFrame.to_spark
pyspark.pandas.DataFrame.to_string
pyspark.pandas.DataFrame.to_feather
pyspark.pandas.DataFrame.to_stata
pyspark.pandas.DataFrame.to_json
pyspark.pandas.DataFrame.to_dict
pyspark.pandas.DataFrame.to_excel
pyspark.pandas.DataFrame.to_hdf
pyspark.pandas.DataFrame.to_clipboard
pyspark.pandas.DataFrame.to_markdown
pyspark.pandas.DataFrame.to_records
pyspark.pandas.DataFrame.to_latex
pyspark.pandas.DataFrame.style
pyspark.pandas.DataFrame.spark.frame
pyspark.pandas.DataFrame.spark.cache
pyspark.pandas.DataFrame.spark.persist
pyspark.pandas.DataFrame.spark.hint
pyspark.pandas.DataFrame.spark.to_table
pyspark.pandas.DataFrame.spark.to_spark_io
pyspark.pandas.DataFrame.spark.apply
pyspark.pandas.DataFrame.spark.repartition
pyspark.pandas.DataFrame.spark.coalesce
pyspark.pandas.DataFrame.plot.area
pyspark.pandas.DataFrame.plot.barh
pyspark.pandas.DataFrame.plot.bar
pyspark.pandas.DataFrame.plot.hist
pyspark.pandas.DataFrame.plot.box
pyspark.pandas.DataFrame.plot.line
pyspark.pandas.DataFrame.plot.pie
pyspark.pandas.DataFrame.plot.scatter
pyspark.pandas.DataFrame.plot.density
pyspark.pandas.DataFrame.hist
pyspark.pandas.DataFrame.boxplot
pyspark.pandas.DataFrame.kde
pyspark.pandas.DataFrame.pandas_on_spark.apply_batch
pyspark.pandas.DataFrame.pandas_on_spark.transform_batch
Index objects
pyspark.pandas.Index
pyspark.pandas.Index.is_monotonic_increasing
pyspark.pandas.Index.is_monotonic_decreasing
pyspark.pandas.Index.is_unique
pyspark.pandas.Index.has_duplicates
pyspark.pandas.Index.hasnans
pyspark.pandas.Index.dtype
pyspark.pandas.Index.inferred_type
pyspark.pandas.Index.shape
pyspark.pandas.Index.name
pyspark.pandas.Index.names
pyspark.pandas.Index.ndim
pyspark.pandas.Index.size
pyspark.pandas.Index.nlevels
pyspark.pandas.Index.empty
pyspark.pandas.Index.T
pyspark.pandas.Index.values
pyspark.pandas.Index.all
pyspark.pandas.Index.any
pyspark.pandas.Index.argmin
pyspark.pandas.Index.argmax
pyspark.pandas.Index.copy
pyspark.pandas.Index.delete
pyspark.pandas.Index.equals
pyspark.pandas.Index.factorize
pyspark.pandas.Index.identical
pyspark.pandas.Index.insert
pyspark.pandas.Index.is_boolean
pyspark.pandas.Index.is_categorical
pyspark.pandas.Index.is_floating
pyspark.pandas.Index.is_integer
pyspark.pandas.Index.is_interval
pyspark.pandas.Index.is_numeric
pyspark.pandas.Index.is_object
pyspark.pandas.Index.drop
pyspark.pandas.Index.drop_duplicates
pyspark.pandas.Index.min
pyspark.pandas.Index.max
pyspark.pandas.Index.map
pyspark.pandas.Index.rename
pyspark.pandas.Index.repeat
pyspark.pandas.Index.take
pyspark.pandas.Index.unique
pyspark.pandas.Index.nunique
pyspark.pandas.Index.value_counts
pyspark.pandas.Index.set_names
pyspark.pandas.Index.droplevel
pyspark.pandas.Index.fillna
pyspark.pandas.Index.dropna
pyspark.pandas.Index.isna
pyspark.pandas.Index.isnull
pyspark.pandas.Index.notna
pyspark.pandas.Index.notnull
pyspark.pandas.Index.astype
pyspark.pandas.Index.item
pyspark.pandas.Index.to_list
pyspark.pandas.Index.to_series
pyspark.pandas.Index.to_frame
pyspark.pandas.Index.view
pyspark.pandas.Index.to_numpy
pyspark.pandas.Index.spark.column
pyspark.pandas.Index.spark.transform
pyspark.pandas.Index.sort_values
pyspark.pandas.Index.shift
pyspark.pandas.Index.append
pyspark.pandas.Index.intersection
pyspark.pandas.Index.union
pyspark.pandas.Index.difference
pyspark.pandas.Index.symmetric_difference
pyspark.pandas.Index.asof
pyspark.pandas.Index.isin
pyspark.pandas.CategoricalIndex
pyspark.pandas.CategoricalIndex.codes
pyspark.pandas.CategoricalIndex.categories
pyspark.pandas.CategoricalIndex.ordered
pyspark.pandas.CategoricalIndex.rename_categories
pyspark.pandas.CategoricalIndex.reorder_categories
pyspark.pandas.CategoricalIndex.add_categories
pyspark.pandas.CategoricalIndex.remove_categories
pyspark.pandas.CategoricalIndex.remove_unused_categories
pyspark.pandas.CategoricalIndex.set_categories
pyspark.pandas.CategoricalIndex.as_ordered
pyspark.pandas.CategoricalIndex.as_unordered
pyspark.pandas.CategoricalIndex.map
pyspark.pandas.CategoricalIndex.equals
pyspark.pandas.CategoricalIndex.max
pyspark.pandas.CategoricalIndex.min
pyspark.pandas.CategoricalIndex.tolist
pyspark.pandas.MultiIndex
pyspark.pandas.MultiIndex.from_arrays
pyspark.pandas.MultiIndex.from_tuples
pyspark.pandas.MultiIndex.from_product
pyspark.pandas.MultiIndex.from_frame
pyspark.pandas.MultiIndex.has_duplicates
pyspark.pandas.MultiIndex.hasnans
pyspark.pandas.MultiIndex.inferred_type
pyspark.pandas.MultiIndex.shape
pyspark.pandas.MultiIndex.names
pyspark.pandas.MultiIndex.ndim
pyspark.pandas.MultiIndex.empty
pyspark.pandas.MultiIndex.T
pyspark.pandas.MultiIndex.size
pyspark.pandas.MultiIndex.nlevels
pyspark.pandas.MultiIndex.levshape
pyspark.pandas.MultiIndex.values
pyspark.pandas.MultiIndex.dtypes
pyspark.pandas.MultiIndex.swaplevel
pyspark.pandas.MultiIndex.droplevel
pyspark.pandas.MultiIndex.fillna
pyspark.pandas.MultiIndex.dropna
pyspark.pandas.MultiIndex.equals
pyspark.pandas.MultiIndex.equal_levels
pyspark.pandas.MultiIndex.identical
pyspark.pandas.MultiIndex.insert
pyspark.pandas.MultiIndex.drop
pyspark.pandas.MultiIndex.copy
pyspark.pandas.MultiIndex.delete
pyspark.pandas.MultiIndex.rename
pyspark.pandas.MultiIndex.repeat
pyspark.pandas.MultiIndex.take
pyspark.pandas.MultiIndex.unique
pyspark.pandas.MultiIndex.min
pyspark.pandas.MultiIndex.max
pyspark.pandas.MultiIndex.value_counts
pyspark.pandas.MultiIndex.append
pyspark.pandas.MultiIndex.intersection
pyspark.pandas.MultiIndex.union
pyspark.pandas.MultiIndex.difference
pyspark.pandas.MultiIndex.symmetric_difference
pyspark.pandas.MultiIndex.astype
pyspark.pandas.MultiIndex.item
pyspark.pandas.MultiIndex.to_list
pyspark.pandas.MultiIndex.to_series
pyspark.pandas.MultiIndex.to_frame
pyspark.pandas.MultiIndex.view
pyspark.pandas.MultiIndex.to_numpy
pyspark.pandas.MultiIndex.spark.data_type
pyspark.pandas.MultiIndex.spark.column
pyspark.pandas.MultiIndex.spark.transform
pyspark.pandas.MultiIndex.sort_values
pyspark.pandas.DatetimeIndex
pyspark.pandas.DatetimeIndex.year
pyspark.pandas.DatetimeIndex.month
pyspark.pandas.DatetimeIndex.day
pyspark.pandas.DatetimeIndex.hour
pyspark.pandas.DatetimeIndex.minute
pyspark.pandas.DatetimeIndex.second
pyspark.pandas.DatetimeIndex.microsecond
pyspark.pandas.DatetimeIndex.isocalendar
pyspark.pandas.DatetimeIndex.dayofweek
pyspark.pandas.DatetimeIndex.day_of_week
pyspark.pandas.DatetimeIndex.weekday
pyspark.pandas.DatetimeIndex.dayofyear
pyspark.pandas.DatetimeIndex.day_of_year
pyspark.pandas.DatetimeIndex.quarter
pyspark.pandas.DatetimeIndex.is_month_start
pyspark.pandas.DatetimeIndex.is_month_end
pyspark.pandas.DatetimeIndex.is_quarter_start
pyspark.pandas.DatetimeIndex.is_quarter_end
pyspark.pandas.DatetimeIndex.is_year_start
pyspark.pandas.DatetimeIndex.is_year_end
pyspark.pandas.DatetimeIndex.is_leap_year
pyspark.pandas.DatetimeIndex.daysinmonth
pyspark.pandas.DatetimeIndex.days_in_month
pyspark.pandas.DatetimeIndex.indexer_between_time
pyspark.pandas.DatetimeIndex.indexer_at_time
pyspark.pandas.DatetimeIndex.normalize
pyspark.pandas.DatetimeIndex.strftime
pyspark.pandas.DatetimeIndex.round
pyspark.pandas.DatetimeIndex.floor
pyspark.pandas.DatetimeIndex.ceil
pyspark.pandas.DatetimeIndex.month_name
pyspark.pandas.DatetimeIndex.day_name
pyspark.pandas.TimedeltaIndex
pyspark.pandas.TimedeltaIndex.days
pyspark.pandas.TimedeltaIndex.seconds
pyspark.pandas.TimedeltaIndex.microseconds
Window
pyspark.pandas.window.Rolling.count
pyspark.pandas.window.Rolling.sum
pyspark.pandas.window.Rolling.min
pyspark.pandas.window.Rolling.max
pyspark.pandas.window.Rolling.mean
pyspark.pandas.window.Rolling.quantile
pyspark.pandas.window.Expanding.count
pyspark.pandas.window.Expanding.sum
pyspark.pandas.window.Expanding.min
pyspark.pandas.window.Expanding.max
pyspark.pandas.window.Expanding.mean
pyspark.pandas.window.Expanding.quantile
pyspark.pandas.window.ExponentialMoving.mean
GroupBy
pyspark.pandas.groupby.GroupBy.get_group
pyspark.pandas.groupby.GroupBy.apply
pyspark.pandas.groupby.GroupBy.transform
pyspark.pandas.groupby.DataFrameGroupBy.agg
pyspark.pandas.groupby.DataFrameGroupBy.aggregate
pyspark.pandas.groupby.GroupBy.all
pyspark.pandas.groupby.GroupBy.any
pyspark.pandas.groupby.GroupBy.count
pyspark.pandas.groupby.GroupBy.cumcount
pyspark.pandas.groupby.GroupBy.cummax
pyspark.pandas.groupby.GroupBy.cummin
pyspark.pandas.groupby.GroupBy.cumprod
pyspark.pandas.groupby.GroupBy.cumsum
pyspark.pandas.groupby.GroupBy.ewm
pyspark.pandas.groupby.GroupBy.filter
pyspark.pandas.groupby.GroupBy.first
pyspark.pandas.groupby.GroupBy.last
pyspark.pandas.groupby.GroupBy.max
pyspark.pandas.groupby.GroupBy.mean
pyspark.pandas.groupby.GroupBy.median
pyspark.pandas.groupby.GroupBy.min
pyspark.pandas.groupby.GroupBy.nth
pyspark.pandas.groupby.GroupBy.prod
pyspark.pandas.groupby.GroupBy.rank
pyspark.pandas.groupby.GroupBy.sem
pyspark.pandas.groupby.GroupBy.std
pyspark.pandas.groupby.GroupBy.sum
pyspark.pandas.groupby.GroupBy.var
pyspark.pandas.groupby.GroupBy.nunique
pyspark.pandas.groupby.GroupBy.quantile
pyspark.pandas.groupby.GroupBy.size
pyspark.pandas.groupby.GroupBy.diff
pyspark.pandas.groupby.GroupBy.idxmax
pyspark.pandas.groupby.GroupBy.idxmin
pyspark.pandas.groupby.GroupBy.fillna
pyspark.pandas.groupby.GroupBy.bfill
pyspark.pandas.groupby.GroupBy.ffill
pyspark.pandas.groupby.GroupBy.head
pyspark.pandas.groupby.GroupBy.shift
pyspark.pandas.groupby.GroupBy.tail
pyspark.pandas.groupby.DataFrameGroupBy.describe
pyspark.pandas.groupby.SeriesGroupBy.nsmallest
pyspark.pandas.groupby.SeriesGroupBy.nlargest
pyspark.pandas.groupby.SeriesGroupBy.value_counts
pyspark.pandas.groupby.SeriesGroupBy.unique
Resampling
pyspark.pandas.resample.Resampler.max
pyspark.pandas.resample.Resampler.mean
pyspark.pandas.resample.Resampler.min
pyspark.pandas.resample.Resampler.std
pyspark.pandas.resample.Resampler.sum
pyspark.pandas.resample.Resampler.var
Options and settings
pyspark.pandas.reset_option
pyspark.pandas.get_option
pyspark.pandas.set_option
pyspark.pandas.option_context
Machine Learning utilities
pyspark.pandas.mlflow.PythonModelWrapper
pyspark.pandas.mlflow.load_model
Extensions
pyspark.pandas.extensions.register_dataframe_accessor
pyspark.pandas.extensions.register_series_accessor
pyspark.pandas.extensions.register_index_accessor
Testing
pyspark.pandas.testing.assert_frame_equal
pyspark.pandas.testing.assert_series_equal
pyspark.pandas.testing.assert_index_equal
Structured Streaming
Core Classes
pyspark.sql.streaming.DataStreamReader
pyspark.sql.streaming.DataStreamWriter
pyspark.sql.streaming.StreamingQuery
pyspark.sql.streaming.StreamingQueryManager
pyspark.sql.streaming.StreamingQueryListener
Input/Output
pyspark.sql.streaming.DataStreamReader.csv
pyspark.sql.streaming.DataStreamReader.format
pyspark.sql.streaming.DataStreamReader.json
pyspark.sql.streaming.DataStreamReader.load
pyspark.sql.streaming.DataStreamReader.option
pyspark.sql.streaming.DataStreamReader.options
pyspark.sql.streaming.DataStreamReader.orc
pyspark.sql.streaming.DataStreamReader.parquet
pyspark.sql.streaming.DataStreamReader.schema
pyspark.sql.streaming.DataStreamReader.table
pyspark.sql.streaming.DataStreamReader.text
pyspark.sql.streaming.DataStreamWriter.foreach
pyspark.sql.streaming.DataStreamWriter.foreachBatch
pyspark.sql.streaming.DataStreamWriter.format
pyspark.sql.streaming.DataStreamWriter.option
pyspark.sql.streaming.DataStreamWriter.options
pyspark.sql.streaming.DataStreamWriter.outputMode
pyspark.sql.streaming.DataStreamWriter.partitionBy
pyspark.sql.streaming.DataStreamWriter.queryName
pyspark.sql.streaming.DataStreamWriter.start
pyspark.sql.streaming.DataStreamWriter.toTable
pyspark.sql.streaming.DataStreamWriter.trigger
Query Management
pyspark.sql.streaming.StreamingQuery.awaitTermination
pyspark.sql.streaming.StreamingQuery.exception
pyspark.sql.streaming.StreamingQuery.explain
pyspark.sql.streaming.StreamingQuery.id
pyspark.sql.streaming.StreamingQuery.isActive
pyspark.sql.streaming.StreamingQuery.lastProgress
pyspark.sql.streaming.StreamingQuery.name
pyspark.sql.streaming.StreamingQuery.processAllAvailable
pyspark.sql.streaming.StreamingQuery.recentProgress
pyspark.sql.streaming.StreamingQuery.runId
pyspark.sql.streaming.StreamingQuery.status
pyspark.sql.streaming.StreamingQuery.stop
pyspark.sql.streaming.StreamingQueryManager.active
pyspark.sql.streaming.StreamingQueryManager.addListener
pyspark.sql.streaming.StreamingQueryManager.awaitAnyTermination
pyspark.sql.streaming.StreamingQueryManager.get
pyspark.sql.streaming.StreamingQueryManager.removeListener
pyspark.sql.streaming.StreamingQueryManager.resetTerminated
MLlib (DataFrame-based)
Transformer
UnaryTransformer
Estimator
Model
Predictor
PredictionModel
Pipeline
PipelineModel
Param
Params
TypeConverters
Binarizer
BucketedRandomProjectionLSH
BucketedRandomProjectionLSHModel
Bucketizer
ChiSqSelector
ChiSqSelectorModel
CountVectorizer
CountVectorizerModel
DCT
ElementwiseProduct
FeatureHasher
HashingTF
IDF
IDFModel
Imputer
ImputerModel
IndexToString
Interaction
MaxAbsScaler
MaxAbsScalerModel
MinHashLSH
MinHashLSHModel
MinMaxScaler
MinMaxScalerModel
NGram
Normalizer
OneHotEncoder
OneHotEncoderModel
PCA
PCAModel
PolynomialExpansion
QuantileDiscretizer
RobustScaler
RobustScalerModel
RegexTokenizer
RFormula
RFormulaModel
SQLTransformer
StandardScaler
StandardScalerModel
StopWordsRemover
StringIndexer
StringIndexerModel
Tokenizer
UnivariateFeatureSelector
UnivariateFeatureSelectorModel
VarianceThresholdSelector
VarianceThresholdSelectorModel
VectorAssembler
VectorIndexer
VectorIndexerModel
VectorSizeHint
VectorSlicer
Word2Vec
Word2VecModel
LinearSVC
LinearSVCModel
LinearSVCSummary
LinearSVCTrainingSummary
LogisticRegression
LogisticRegressionModel
LogisticRegressionSummary
LogisticRegressionTrainingSummary
BinaryLogisticRegressionSummary
BinaryLogisticRegressionTrainingSummary
DecisionTreeClassifier
DecisionTreeClassificationModel
GBTClassifier
GBTClassificationModel
RandomForestClassifier
RandomForestClassificationModel
RandomForestClassificationSummary
RandomForestClassificationTrainingSummary
BinaryRandomForestClassificationSummary
BinaryRandomForestClassificationTrainingSummary
NaiveBayes
NaiveBayesModel
MultilayerPerceptronClassifier
MultilayerPerceptronClassificationModel
MultilayerPerceptronClassificationSummary
MultilayerPerceptronClassificationTrainingSummary
OneVsRest
OneVsRestModel
FMClassifier
FMClassificationModel
FMClassificationSummary
FMClassificationTrainingSummary
BisectingKMeans
BisectingKMeansModel
BisectingKMeansSummary
KMeans
KMeansModel
KMeansSummary
GaussianMixture
GaussianMixtureModel
GaussianMixtureSummary
LDA
LDAModel
LocalLDAModel
DistributedLDAModel
PowerIterationClustering
pyspark.ml.functions.array_to_vector
pyspark.ml.functions.vector_to_array
pyspark.ml.functions.predict_batch_udf
Vector
DenseVector
SparseVector
Vectors
Matrix
DenseMatrix
SparseMatrix
Matrices
ALS
ALSModel
AFTSurvivalRegression
AFTSurvivalRegressionModel
DecisionTreeRegressor
DecisionTreeRegressionModel
GBTRegressor
GBTRegressionModel
GeneralizedLinearRegression
GeneralizedLinearRegressionModel
GeneralizedLinearRegressionSummary
GeneralizedLinearRegressionTrainingSummary
IsotonicRegression
IsotonicRegressionModel
LinearRegression
LinearRegressionModel
LinearRegressionSummary
LinearRegressionTrainingSummary
RandomForestRegressor
RandomForestRegressionModel
FMRegressor
FMRegressionModel
ChiSquareTest
Correlation
KolmogorovSmirnovTest
MultivariateGaussian
Summarizer
SummaryBuilder
ParamGridBuilder
CrossValidator
CrossValidatorModel
TrainValidationSplit
TrainValidationSplitModel
Evaluator
BinaryClassificationEvaluator
RegressionEvaluator
MulticlassClassificationEvaluator
MultilabelClassificationEvaluator
ClusteringEvaluator
RankingEvaluator
FPGrowth
FPGrowthModel
PrefixSpan
ImageSchema
_ImageSchema
TorchDistributor
DeepspeedTorchDistributor
BaseReadWrite
DefaultParamsReadable
DefaultParamsReader
DefaultParamsWritable
DefaultParamsWriter
GeneralMLWriter
HasTrainingSummary
Identifiable
MLReadable
MLReader
MLWritable
MLWriter
MLlib (DataFrame-based) for Spark Connect
Transformer
Estimator
Model
Evaluator
Pipeline
PipelineModel
MaxAbsScaler
MaxAbsScalerModel
StandardScaler
StandardScalerModel
ArrayAssembler
LogisticRegression
LogisticRegressionModel
pyspark.ml.connect.functions.array_to_vector
pyspark.ml.connect.functions.vector_to_array
CrossValidator
CrossValidatorModel
RegressionEvaluator
BinaryClassificationEvaluator
MulticlassClassificationEvaluator
ParamsReadWrite
CoreModelReadWrite
MetaAlgorithmReadWrite
Spark Streaming (Legacy)
pyspark.streaming.StreamingContext
pyspark.streaming.DStream
pyspark.streaming.StreamingContext.addStreamingListener
pyspark.streaming.StreamingContext.awaitTermination
pyspark.streaming.StreamingContext.awaitTerminationOrTimeout
pyspark.streaming.StreamingContext.checkpoint
pyspark.streaming.StreamingContext.getActive
pyspark.streaming.StreamingContext.getActiveOrCreate
pyspark.streaming.StreamingContext.getOrCreate
pyspark.streaming.StreamingContext.remember
pyspark.streaming.StreamingContext.sparkContext
pyspark.streaming.StreamingContext.start
pyspark.streaming.StreamingContext.stop
pyspark.streaming.StreamingContext.transform
pyspark.streaming.StreamingContext.union
pyspark.streaming.StreamingContext.binaryRecordsStream
pyspark.streaming.StreamingContext.queueStream
pyspark.streaming.StreamingContext.socketTextStream
pyspark.streaming.StreamingContext.textFileStream
pyspark.streaming.DStream.pprint
pyspark.streaming.DStream.saveAsTextFiles
pyspark.streaming.DStream.cache
pyspark.streaming.DStream.checkpoint
pyspark.streaming.DStream.cogroup
pyspark.streaming.DStream.combineByKey
pyspark.streaming.DStream.context
pyspark.streaming.DStream.count
pyspark.streaming.DStream.countByValue
pyspark.streaming.DStream.countByValueAndWindow
pyspark.streaming.DStream.countByWindow
pyspark.streaming.DStream.filter
pyspark.streaming.DStream.flatMap
pyspark.streaming.DStream.flatMapValues
pyspark.streaming.DStream.foreachRDD
pyspark.streaming.DStream.fullOuterJoin
pyspark.streaming.DStream.glom
pyspark.streaming.DStream.groupByKey
pyspark.streaming.DStream.groupByKeyAndWindow
pyspark.streaming.DStream.join
pyspark.streaming.DStream.leftOuterJoin
pyspark.streaming.DStream.map
pyspark.streaming.DStream.mapPartitions
pyspark.streaming.DStream.mapPartitionsWithIndex
pyspark.streaming.DStream.mapValues
pyspark.streaming.DStream.partitionBy
pyspark.streaming.DStream.persist
pyspark.streaming.DStream.reduce
pyspark.streaming.DStream.reduceByKey
pyspark.streaming.DStream.reduceByKeyAndWindow
pyspark.streaming.DStream.reduceByWindow
pyspark.streaming.DStream.repartition
pyspark.streaming.DStream.rightOuterJoin
pyspark.streaming.DStream.slice
pyspark.streaming.DStream.transform
pyspark.streaming.DStream.transformWith
pyspark.streaming.DStream.union
pyspark.streaming.DStream.updateStateByKey
pyspark.streaming.DStream.window
pyspark.streaming.kinesis.KinesisUtils.createStream
pyspark.streaming.kinesis.InitialPositionInStream.LATEST
pyspark.streaming.kinesis.InitialPositionInStream.TRIM_HORIZON
MLlib (RDD-based)
LogisticRegressionModel
LogisticRegressionWithSGD
LogisticRegressionWithLBFGS
SVMModel
SVMWithSGD
NaiveBayesModel
NaiveBayes
StreamingLogisticRegressionWithSGD
BisectingKMeansModel
BisectingKMeans
KMeansModel
KMeans
GaussianMixtureModel
GaussianMixture
PowerIterationClusteringModel
PowerIterationClustering
StreamingKMeans
StreamingKMeansModel
LDA
LDAModel
BinaryClassificationMetrics
RegressionMetrics
MulticlassMetrics
RankingMetrics
Normalizer
StandardScalerModel
StandardScaler
HashingTF
IDFModel
IDF
Word2Vec
Word2VecModel
ChiSqSelector
ChiSqSelectorModel
ElementwiseProduct
FPGrowth
FPGrowthModel
PrefixSpan
PrefixSpanModel
Vector
DenseVector
SparseVector
Vectors
Matrix
DenseMatrix
SparseMatrix
Matrices
QRDecomposition
BlockMatrix
CoordinateMatrix
DistributedMatrix
IndexedRow
IndexedRowMatrix
MatrixEntry
RowMatrix
SingularValueDecomposition
RandomRDDs
MatrixFactorizationModel
ALS
Rating
LabeledPoint
LinearModel
LinearRegressionModel
LinearRegressionWithSGD
RidgeRegressionModel
RidgeRegressionWithSGD
LassoModel
LassoWithSGD
IsotonicRegressionModel
IsotonicRegression
StreamingLinearAlgorithm
StreamingLinearRegressionWithSGD
Statistics
MultivariateStatisticalSummary
ChiSqTestResult
MultivariateGaussian
KernelDensity
ChiSqTestResult
KolmogorovSmirnovTestResult
DecisionTreeModel
DecisionTree
RandomForestModel
RandomForest
GradientBoostedTreesModel
GradientBoostedTrees
JavaLoader
JavaSaveable
LinearDataGenerator
Loader
MLUtils
Saveable
Spark Core
pyspark.SparkContext
pyspark.RDD
pyspark.Broadcast
pyspark.Accumulator
pyspark.AccumulatorParam
pyspark.SparkConf
pyspark.SparkFiles
pyspark.StorageLevel
pyspark.TaskContext
pyspark.RDDBarrier
pyspark.BarrierTaskContext
pyspark.BarrierTaskInfo
pyspark.InheritableThread
pyspark.util.VersionUtils
pyspark.SparkContext.PACKAGE_EXTENSIONS
pyspark.SparkContext.accumulator
pyspark.SparkContext.addArchive
pyspark.SparkContext.addFile
pyspark.SparkContext.addJobTag
pyspark.SparkContext.addPyFile
pyspark.SparkContext.applicationId
pyspark.SparkContext.binaryFiles
pyspark.SparkContext.binaryRecords
pyspark.SparkContext.broadcast
pyspark.SparkContext.cancelAllJobs
pyspark.SparkContext.cancelJobGroup
pyspark.SparkContext.cancelJobsWithTag
pyspark.SparkContext.clearJobTags
pyspark.SparkContext.defaultMinPartitions
pyspark.SparkContext.defaultParallelism
pyspark.SparkContext.dump_profiles
pyspark.SparkContext.emptyRDD
pyspark.SparkContext.getCheckpointDir
pyspark.SparkContext.getConf
pyspark.SparkContext.getJobTags
pyspark.SparkContext.getLocalProperty
pyspark.SparkContext.getOrCreate
pyspark.SparkContext.hadoopFile
pyspark.SparkContext.hadoopRDD
pyspark.SparkContext.listArchives
pyspark.SparkContext.listFiles
pyspark.SparkContext.newAPIHadoopFile
pyspark.SparkContext.newAPIHadoopRDD
pyspark.SparkContext.parallelize
pyspark.SparkContext.pickleFile
pyspark.SparkContext.range
pyspark.SparkContext.resources
pyspark.SparkContext.removeJobTag
pyspark.SparkContext.runJob
pyspark.SparkContext.sequenceFile
pyspark.SparkContext.setCheckpointDir
pyspark.SparkContext.setInterruptOnCancel
pyspark.SparkContext.setJobDescription
pyspark.SparkContext.setJobGroup
pyspark.SparkContext.setLocalProperty
pyspark.SparkContext.setLogLevel
pyspark.SparkContext.setSystemProperty
pyspark.SparkContext.show_profiles
pyspark.SparkContext.sparkUser
pyspark.SparkContext.startTime
pyspark.SparkContext.statusTracker
pyspark.SparkContext.stop
pyspark.SparkContext.textFile
pyspark.SparkContext.uiWebUrl
pyspark.SparkContext.union
pyspark.SparkContext.version
pyspark.SparkContext.wholeTextFiles
pyspark.RDD.aggregate
pyspark.RDD.aggregateByKey
pyspark.RDD.barrier
pyspark.RDD.cache
pyspark.RDD.cartesian
pyspark.RDD.checkpoint
pyspark.RDD.cleanShuffleDependencies
pyspark.RDD.coalesce
pyspark.RDD.cogroup
pyspark.RDD.collect
pyspark.RDD.collectAsMap
pyspark.RDD.collectWithJobGroup
pyspark.RDD.combineByKey
pyspark.RDD.context
pyspark.RDD.count
pyspark.RDD.countApprox
pyspark.RDD.countApproxDistinct
pyspark.RDD.countByKey
pyspark.RDD.countByValue
pyspark.RDD.distinct
pyspark.RDD.filter
pyspark.RDD.first
pyspark.RDD.flatMap
pyspark.RDD.flatMapValues
pyspark.RDD.fold
pyspark.RDD.foldByKey
pyspark.RDD.foreach
pyspark.RDD.foreachPartition
pyspark.RDD.fullOuterJoin
pyspark.RDD.getCheckpointFile
pyspark.RDD.getNumPartitions
pyspark.RDD.getResourceProfile
pyspark.RDD.getStorageLevel
pyspark.RDD.glom
pyspark.RDD.groupBy
pyspark.RDD.groupByKey
pyspark.RDD.groupWith
pyspark.RDD.histogram
pyspark.RDD.id
pyspark.RDD.intersection
pyspark.RDD.isCheckpointed
pyspark.RDD.isEmpty
pyspark.RDD.isLocallyCheckpointed
pyspark.RDD.join
pyspark.RDD.keyBy
pyspark.RDD.keys
pyspark.RDD.leftOuterJoin
pyspark.RDD.localCheckpoint
pyspark.RDD.lookup
pyspark.RDD.map
pyspark.RDD.mapPartitions
pyspark.RDD.mapPartitionsWithIndex
pyspark.RDD.mapPartitionsWithSplit
pyspark.RDD.mapValues
pyspark.RDD.max
pyspark.RDD.mean
pyspark.RDD.meanApprox
pyspark.RDD.min
pyspark.RDD.name
pyspark.RDD.partitionBy
pyspark.RDD.persist
pyspark.RDD.pipe
pyspark.RDD.randomSplit
pyspark.RDD.reduce
pyspark.RDD.reduceByKey
pyspark.RDD.reduceByKeyLocally
pyspark.RDD.repartition
pyspark.RDD.repartitionAndSortWithinPartitions
pyspark.RDD.rightOuterJoin
pyspark.RDD.sample
pyspark.RDD.sampleByKey
pyspark.RDD.sampleStdev
pyspark.RDD.sampleVariance
pyspark.RDD.saveAsHadoopDataset
pyspark.RDD.saveAsHadoopFile
pyspark.RDD.saveAsNewAPIHadoopDataset
pyspark.RDD.saveAsNewAPIHadoopFile
pyspark.RDD.saveAsPickleFile
pyspark.RDD.saveAsSequenceFile
pyspark.RDD.saveAsTextFile
pyspark.RDD.setName
pyspark.RDD.sortBy
pyspark.RDD.sortByKey
pyspark.RDD.stats
pyspark.RDD.stdev
pyspark.RDD.subtract
pyspark.RDD.subtractByKey
pyspark.RDD.sum
pyspark.RDD.sumApprox
pyspark.RDD.take
pyspark.RDD.takeOrdered
pyspark.RDD.takeSample
pyspark.RDD.toDebugString
pyspark.RDD.toLocalIterator
pyspark.RDD.top
pyspark.RDD.treeAggregate
pyspark.RDD.treeReduce
pyspark.RDD.union
pyspark.RDD.unpersist
pyspark.RDD.values
pyspark.RDD.variance
pyspark.RDD.withResources
pyspark.RDD.zip
pyspark.RDD.zipWithIndex
pyspark.RDD.zipWithUniqueId
pyspark.Broadcast.destroy
pyspark.Broadcast.dump
pyspark.Broadcast.load
pyspark.Broadcast.load_from_path
pyspark.Broadcast.unpersist
pyspark.Broadcast.value
pyspark.Accumulator.add
pyspark.Accumulator.value
pyspark.AccumulatorParam.addInPlace
pyspark.AccumulatorParam.zero
pyspark.inheritable_thread_target
pyspark.SparkConf.contains
pyspark.SparkConf.get
pyspark.SparkConf.getAll
pyspark.SparkConf.set
pyspark.SparkConf.setAll
pyspark.SparkConf.setAppName
pyspark.SparkConf.setExecutorEnv
pyspark.SparkConf.setIfMissing
pyspark.SparkConf.setMaster
pyspark.SparkConf.setSparkHome
pyspark.SparkConf.toDebugString
pyspark.SparkFiles.get
pyspark.SparkFiles.getRootDirectory
pyspark.StorageLevel.DISK_ONLY
pyspark.StorageLevel.DISK_ONLY_2
pyspark.StorageLevel.DISK_ONLY_3
pyspark.StorageLevel.MEMORY_AND_DISK
pyspark.StorageLevel.MEMORY_AND_DISK_2
pyspark.StorageLevel.MEMORY_AND_DISK_DESER
pyspark.StorageLevel.MEMORY_ONLY
pyspark.StorageLevel.MEMORY_ONLY_2
pyspark.StorageLevel.OFF_HEAP
pyspark.TaskContext.attemptNumber
pyspark.TaskContext.cpus
pyspark.TaskContext.get
pyspark.TaskContext.getLocalProperty
pyspark.TaskContext.partitionId
pyspark.TaskContext.resources
pyspark.TaskContext.stageId
pyspark.TaskContext.taskAttemptId
pyspark.RDDBarrier.mapPartitions
pyspark.RDDBarrier.mapPartitionsWithIndex
pyspark.BarrierTaskContext.allGather
pyspark.BarrierTaskContext.attemptNumber
pyspark.BarrierTaskContext.barrier
pyspark.BarrierTaskContext.cpus
pyspark.BarrierTaskContext.get
pyspark.BarrierTaskContext.getLocalProperty
pyspark.BarrierTaskContext.getTaskInfos
pyspark.BarrierTaskContext.partitionId
pyspark.BarrierTaskContext.resources
pyspark.BarrierTaskContext.stageId
pyspark.BarrierTaskContext.taskAttemptId
pyspark.util.VersionUtils.majorMinorVersion
Resource Management
pyspark.resource.ResourceInformation
pyspark.resource.ResourceProfile
pyspark.resource.ResourceProfileBuilder
pyspark.resource.ExecutorResourceRequest
pyspark.resource.ExecutorResourceRequests
pyspark.resource.TaskResourceRequest
pyspark.resource.TaskResourceRequests
Errors
pyspark.errors.AnalysisException
pyspark.errors.ArithmeticException
pyspark.errors.ArrayIndexOutOfBoundsException
pyspark.errors.DateTimeException
pyspark.errors.IllegalArgumentException
pyspark.errors.NumberFormatException
pyspark.errors.ParseException
pyspark.errors.PySparkAssertionError
pyspark.errors.PySparkAttributeError
pyspark.errors.PySparkException
pyspark.errors.PySparkKeyError
pyspark.errors.PySparkNotImplementedError
pyspark.errors.PySparkPicklingError
pyspark.errors.PySparkRuntimeError
pyspark.errors.PySparkTypeError
pyspark.errors.PySparkValueError
pyspark.errors.PySparkImportError
pyspark.errors.PySparkIndexError
pyspark.errors.PythonException
pyspark.errors.QueryContext
pyspark.errors.QueryContextType
pyspark.errors.QueryExecutionException
pyspark.errors.RetriesExceeded
pyspark.errors.SessionNotSameException
pyspark.errors.SparkRuntimeException
pyspark.errors.SparkUpgradeException
pyspark.errors.SparkNoSuchElementException
pyspark.errors.StreamingQueryException
pyspark.errors.TempTableAlreadyExistsException
pyspark.errors.UnknownException
pyspark.errors.UnsupportedOperationException
pyspark.errors.PySparkException.getErrorClass
pyspark.errors.PySparkException.getMessage
pyspark.errors.PySparkException.getMessageParameters
pyspark.errors.PySparkException.getQueryContext
pyspark.errors.PySparkException.getSqlState
Testing
pyspark.testing.assertDataFrameEqual
pyspark.testing.assertSchemaEqual
API Reference
Spark Core
pyspark.SparkContext.PACKAGE_EXTENSIONS
pyspark.SparkContext.PACKAGE_EXTENSIONS
#
SparkContext.
PACKAGE_EXTENSIONS
=
('.zip',
'.egg',
'.jar')
#
Show Source