sqlglot.dialects.clickhouse
1from __future__ import annotations 2 3import typing as t 4import datetime 5 6from sqlglot import exp, generator, parser, tokens 7from sqlglot.dialects.dialect import ( 8 Dialect, 9 NormalizationStrategy, 10 arg_max_or_min_no_count, 11 build_date_delta, 12 build_formatted_time, 13 inline_array_sql, 14 json_extract_segments, 15 json_path_key_only_name, 16 no_pivot_sql, 17 build_json_extract_path, 18 rename_func, 19 sha256_sql, 20 var_map_sql, 21 timestamptrunc_sql, 22 unit_to_var, 23 trim_sql, 24) 25from sqlglot.generator import Generator 26from sqlglot.helper import is_int, seq_get 27from sqlglot.tokens import Token, TokenType 28 29DATEΤΙΜΕ_DELTA = t.Union[exp.DateAdd, exp.DateDiff, exp.DateSub, exp.TimestampSub, exp.TimestampAdd] 30 31 32def _build_date_format(args: t.List) -> exp.TimeToStr: 33 expr = build_formatted_time(exp.TimeToStr, "clickhouse")(args) 34 35 timezone = seq_get(args, 2) 36 if timezone: 37 expr.set("zone", timezone) 38 39 return expr 40 41 42def _unix_to_time_sql(self: ClickHouse.Generator, expression: exp.UnixToTime) -> str: 43 scale = expression.args.get("scale") 44 timestamp = expression.this 45 46 if scale in (None, exp.UnixToTime.SECONDS): 47 return self.func("fromUnixTimestamp", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 48 if scale == exp.UnixToTime.MILLIS: 49 return self.func("fromUnixTimestamp64Milli", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 50 if scale == exp.UnixToTime.MICROS: 51 return self.func("fromUnixTimestamp64Micro", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 52 if scale == exp.UnixToTime.NANOS: 53 return self.func("fromUnixTimestamp64Nano", exp.cast(timestamp, exp.DataType.Type.BIGINT)) 54 55 return self.func( 56 "fromUnixTimestamp", 57 exp.cast( 58 exp.Div(this=timestamp, expression=exp.func("POW", 10, scale)), exp.DataType.Type.BIGINT 59 ), 60 ) 61 62 63def _lower_func(sql: str) -> str: 64 index = sql.index("(") 65 return sql[:index].lower() + sql[index:] 66 67 68def _quantile_sql(self: ClickHouse.Generator, expression: exp.Quantile) -> str: 69 quantile = expression.args["quantile"] 70 args = f"({self.sql(expression, 'this')})" 71 72 if isinstance(quantile, exp.Array): 73 func = self.func("quantiles", *quantile) 74 else: 75 func = self.func("quantile", quantile) 76 77 return func + args 78 79 80def _build_count_if(args: t.List) -> exp.CountIf | exp.CombinedAggFunc: 81 if len(args) == 1: 82 return exp.CountIf(this=seq_get(args, 0)) 83 84 return exp.CombinedAggFunc(this="countIf", expressions=args, parts=("count", "If")) 85 86 87def _build_str_to_date(args: t.List) -> exp.Cast | exp.Anonymous: 88 if len(args) == 3: 89 return exp.Anonymous(this="STR_TO_DATE", expressions=args) 90 91 strtodate = exp.StrToDate.from_arg_list(args) 92 return exp.cast(strtodate, exp.DataType.build(exp.DataType.Type.DATETIME)) 93 94 95def _datetime_delta_sql(name: str) -> t.Callable[[Generator, DATEΤΙΜΕ_DELTA], str]: 96 def _delta_sql(self: Generator, expression: DATEΤΙΜΕ_DELTA) -> str: 97 if not expression.unit: 98 return rename_func(name)(self, expression) 99 100 return self.func( 101 name, 102 unit_to_var(expression), 103 expression.expression, 104 expression.this, 105 ) 106 107 return _delta_sql 108 109 110def _timestrtotime_sql(self: ClickHouse.Generator, expression: exp.TimeStrToTime): 111 tz = expression.args.get("zone") 112 datatype = exp.DataType.build(exp.DataType.Type.TIMESTAMP) 113 ts = expression.this 114 if tz: 115 # build a datatype that encodes the timezone as a type parameter, eg DateTime('America/Los_Angeles') 116 datatype = exp.DataType.build( 117 exp.DataType.Type.TIMESTAMPTZ, # Type.TIMESTAMPTZ maps to DateTime 118 expressions=[exp.DataTypeParam(this=tz)], 119 ) 120 121 if isinstance(ts, exp.Literal): 122 # strip the timezone out of the literal, eg turn '2020-01-01 12:13:14-08:00' into '2020-01-01 12:13:14' 123 # this is because Clickhouse encodes the timezone as a data type parameter and throws an error if it's part of the timestamp string 124 ts_without_tz = ( 125 datetime.datetime.fromisoformat(ts.name).replace(tzinfo=None).isoformat(sep=" ") 126 ) 127 ts = exp.Literal.string(ts_without_tz) 128 129 return self.sql(exp.cast(ts, datatype, dialect=self.dialect)) 130 131 132class ClickHouse(Dialect): 133 NORMALIZE_FUNCTIONS: bool | str = False 134 NULL_ORDERING = "nulls_are_last" 135 SUPPORTS_USER_DEFINED_TYPES = False 136 SAFE_DIVISION = True 137 LOG_BASE_FIRST: t.Optional[bool] = None 138 FORCE_EARLY_ALIAS_REF_EXPANSION = True 139 140 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 141 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 142 143 UNESCAPED_SEQUENCES = { 144 "\\0": "\0", 145 } 146 147 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 148 149 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 150 exp.Except: False, 151 exp.Intersect: False, 152 exp.Union: None, 153 } 154 155 class Tokenizer(tokens.Tokenizer): 156 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 157 IDENTIFIERS = ['"', "`"] 158 STRING_ESCAPES = ["'", "\\"] 159 BIT_STRINGS = [("0b", "")] 160 HEX_STRINGS = [("0x", ""), ("0X", "")] 161 HEREDOC_STRINGS = ["$"] 162 163 KEYWORDS = { 164 **tokens.Tokenizer.KEYWORDS, 165 "ATTACH": TokenType.COMMAND, 166 "DATE32": TokenType.DATE32, 167 "DATETIME64": TokenType.DATETIME64, 168 "DICTIONARY": TokenType.DICTIONARY, 169 "ENUM8": TokenType.ENUM8, 170 "ENUM16": TokenType.ENUM16, 171 "FINAL": TokenType.FINAL, 172 "FIXEDSTRING": TokenType.FIXEDSTRING, 173 "FLOAT32": TokenType.FLOAT, 174 "FLOAT64": TokenType.DOUBLE, 175 "GLOBAL": TokenType.GLOBAL, 176 "INT256": TokenType.INT256, 177 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 178 "MAP": TokenType.MAP, 179 "NESTED": TokenType.NESTED, 180 "SAMPLE": TokenType.TABLE_SAMPLE, 181 "TUPLE": TokenType.STRUCT, 182 "UINT128": TokenType.UINT128, 183 "UINT16": TokenType.USMALLINT, 184 "UINT256": TokenType.UINT256, 185 "UINT32": TokenType.UINT, 186 "UINT64": TokenType.UBIGINT, 187 "UINT8": TokenType.UTINYINT, 188 "IPV4": TokenType.IPV4, 189 "IPV6": TokenType.IPV6, 190 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 191 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 192 "SYSTEM": TokenType.COMMAND, 193 "PREWHERE": TokenType.PREWHERE, 194 } 195 KEYWORDS.pop("/*+") 196 197 SINGLE_TOKENS = { 198 **tokens.Tokenizer.SINGLE_TOKENS, 199 "$": TokenType.HEREDOC_STRING, 200 } 201 202 class Parser(parser.Parser): 203 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 204 # * select x from t1 union all select x from t2 limit 1; 205 # * select x from t1 union all (select x from t2 limit 1); 206 MODIFIERS_ATTACHED_TO_SET_OP = False 207 INTERVAL_SPANS = False 208 209 FUNCTIONS = { 210 **parser.Parser.FUNCTIONS, 211 "ANY": exp.AnyValue.from_arg_list, 212 "ARRAYSUM": exp.ArraySum.from_arg_list, 213 "COUNTIF": _build_count_if, 214 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 215 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 217 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATE_FORMAT": _build_date_format, 219 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 220 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 221 "FORMATDATETIME": _build_date_format, 222 "JSONEXTRACTSTRING": build_json_extract_path( 223 exp.JSONExtractScalar, zero_based_indexing=False 224 ), 225 "MAP": parser.build_var_map, 226 "MATCH": exp.RegexpLike.from_arg_list, 227 "RANDCANONICAL": exp.Rand.from_arg_list, 228 "STR_TO_DATE": _build_str_to_date, 229 "TUPLE": exp.Struct.from_arg_list, 230 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 231 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 233 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "UNIQ": exp.ApproxDistinct.from_arg_list, 235 "XOR": lambda args: exp.Xor(expressions=args), 236 "MD5": exp.MD5Digest.from_arg_list, 237 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 238 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 239 } 240 241 AGG_FUNCTIONS = { 242 "count", 243 "min", 244 "max", 245 "sum", 246 "avg", 247 "any", 248 "stddevPop", 249 "stddevSamp", 250 "varPop", 251 "varSamp", 252 "corr", 253 "covarPop", 254 "covarSamp", 255 "entropy", 256 "exponentialMovingAverage", 257 "intervalLengthSum", 258 "kolmogorovSmirnovTest", 259 "mannWhitneyUTest", 260 "median", 261 "rankCorr", 262 "sumKahan", 263 "studentTTest", 264 "welchTTest", 265 "anyHeavy", 266 "anyLast", 267 "boundingRatio", 268 "first_value", 269 "last_value", 270 "argMin", 271 "argMax", 272 "avgWeighted", 273 "topK", 274 "topKWeighted", 275 "deltaSum", 276 "deltaSumTimestamp", 277 "groupArray", 278 "groupArrayLast", 279 "groupUniqArray", 280 "groupArrayInsertAt", 281 "groupArrayMovingAvg", 282 "groupArrayMovingSum", 283 "groupArraySample", 284 "groupBitAnd", 285 "groupBitOr", 286 "groupBitXor", 287 "groupBitmap", 288 "groupBitmapAnd", 289 "groupBitmapOr", 290 "groupBitmapXor", 291 "sumWithOverflow", 292 "sumMap", 293 "minMap", 294 "maxMap", 295 "skewSamp", 296 "skewPop", 297 "kurtSamp", 298 "kurtPop", 299 "uniq", 300 "uniqExact", 301 "uniqCombined", 302 "uniqCombined64", 303 "uniqHLL12", 304 "uniqTheta", 305 "quantile", 306 "quantiles", 307 "quantileExact", 308 "quantilesExact", 309 "quantileExactLow", 310 "quantilesExactLow", 311 "quantileExactHigh", 312 "quantilesExactHigh", 313 "quantileExactWeighted", 314 "quantilesExactWeighted", 315 "quantileTiming", 316 "quantilesTiming", 317 "quantileTimingWeighted", 318 "quantilesTimingWeighted", 319 "quantileDeterministic", 320 "quantilesDeterministic", 321 "quantileTDigest", 322 "quantilesTDigest", 323 "quantileTDigestWeighted", 324 "quantilesTDigestWeighted", 325 "quantileBFloat16", 326 "quantilesBFloat16", 327 "quantileBFloat16Weighted", 328 "quantilesBFloat16Weighted", 329 "simpleLinearRegression", 330 "stochasticLinearRegression", 331 "stochasticLogisticRegression", 332 "categoricalInformationValue", 333 "contingency", 334 "cramersV", 335 "cramersVBiasCorrected", 336 "theilsU", 337 "maxIntersections", 338 "maxIntersectionsPosition", 339 "meanZTest", 340 "quantileInterpolatedWeighted", 341 "quantilesInterpolatedWeighted", 342 "quantileGK", 343 "quantilesGK", 344 "sparkBar", 345 "sumCount", 346 "largestTriangleThreeBuckets", 347 "histogram", 348 "sequenceMatch", 349 "sequenceCount", 350 "windowFunnel", 351 "retention", 352 "uniqUpTo", 353 "sequenceNextNode", 354 "exponentialTimeDecayedAvg", 355 } 356 357 AGG_FUNCTIONS_SUFFIXES = [ 358 "If", 359 "Array", 360 "ArrayIf", 361 "Map", 362 "SimpleState", 363 "State", 364 "Merge", 365 "MergeState", 366 "ForEach", 367 "Distinct", 368 "OrDefault", 369 "OrNull", 370 "Resample", 371 "ArgMin", 372 "ArgMax", 373 ] 374 375 FUNC_TOKENS = { 376 *parser.Parser.FUNC_TOKENS, 377 TokenType.SET, 378 } 379 380 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 381 382 ID_VAR_TOKENS = { 383 *parser.Parser.ID_VAR_TOKENS, 384 TokenType.LIKE, 385 } 386 387 AGG_FUNC_MAPPING = ( 388 lambda functions, suffixes: { 389 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 390 } 391 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 392 393 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 394 395 FUNCTION_PARSERS = { 396 **parser.Parser.FUNCTION_PARSERS, 397 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 398 "QUANTILE": lambda self: self._parse_quantile(), 399 "COLUMNS": lambda self: self._parse_columns(), 400 } 401 402 FUNCTION_PARSERS.pop("MATCH") 403 404 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 405 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 406 407 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 408 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 409 410 RANGE_PARSERS = { 411 **parser.Parser.RANGE_PARSERS, 412 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 413 and self._parse_in(this, is_global=True), 414 } 415 416 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 417 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 418 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 419 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 420 421 JOIN_KINDS = { 422 *parser.Parser.JOIN_KINDS, 423 TokenType.ANY, 424 TokenType.ASOF, 425 TokenType.ARRAY, 426 } 427 428 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 429 TokenType.ANY, 430 TokenType.ARRAY, 431 TokenType.FINAL, 432 TokenType.FORMAT, 433 TokenType.SETTINGS, 434 } 435 436 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 437 TokenType.FORMAT, 438 } 439 440 LOG_DEFAULTS_TO_LN = True 441 442 QUERY_MODIFIER_PARSERS = { 443 **parser.Parser.QUERY_MODIFIER_PARSERS, 444 TokenType.SETTINGS: lambda self: ( 445 "settings", 446 self._advance() or self._parse_csv(self._parse_assignment), 447 ), 448 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 449 } 450 451 CONSTRAINT_PARSERS = { 452 **parser.Parser.CONSTRAINT_PARSERS, 453 "INDEX": lambda self: self._parse_index_constraint(), 454 "CODEC": lambda self: self._parse_compress(), 455 } 456 457 ALTER_PARSERS = { 458 **parser.Parser.ALTER_PARSERS, 459 "REPLACE": lambda self: self._parse_alter_table_replace(), 460 } 461 462 SCHEMA_UNNAMED_CONSTRAINTS = { 463 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 464 "INDEX", 465 } 466 467 PLACEHOLDER_PARSERS = { 468 **parser.Parser.PLACEHOLDER_PARSERS, 469 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 470 } 471 472 def _parse_types( 473 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 474 ) -> t.Optional[exp.Expression]: 475 dtype = super()._parse_types( 476 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 477 ) 478 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 479 # Mark every type as non-nullable which is ClickHouse's default, unless it's 480 # already marked as nullable. This marker helps us transpile types from other 481 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 482 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 483 # fail in ClickHouse without the `Nullable` type constructor. 484 dtype.set("nullable", False) 485 486 return dtype 487 488 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 489 index = self._index 490 this = self._parse_bitwise() 491 if self._match(TokenType.FROM): 492 self._retreat(index) 493 return super()._parse_extract() 494 495 # We return Anonymous here because extract and regexpExtract have different semantics, 496 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 497 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 498 # 499 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 500 self._match(TokenType.COMMA) 501 return self.expression( 502 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 503 ) 504 505 def _parse_assignment(self) -> t.Optional[exp.Expression]: 506 this = super()._parse_assignment() 507 508 if self._match(TokenType.PLACEHOLDER): 509 return self.expression( 510 exp.If, 511 this=this, 512 true=self._parse_assignment(), 513 false=self._match(TokenType.COLON) and self._parse_assignment(), 514 ) 515 516 return this 517 518 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 519 """ 520 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 521 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 522 """ 523 this = self._parse_id_var() 524 self._match(TokenType.COLON) 525 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 526 self._match_text_seq("IDENTIFIER") and "Identifier" 527 ) 528 529 if not kind: 530 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 531 elif not self._match(TokenType.R_BRACE): 532 self.raise_error("Expecting }") 533 534 return self.expression(exp.Placeholder, this=this, kind=kind) 535 536 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 537 this = super()._parse_in(this) 538 this.set("is_global", is_global) 539 return this 540 541 def _parse_table( 542 self, 543 schema: bool = False, 544 joins: bool = False, 545 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 546 parse_bracket: bool = False, 547 is_db_reference: bool = False, 548 parse_partition: bool = False, 549 ) -> t.Optional[exp.Expression]: 550 this = super()._parse_table( 551 schema=schema, 552 joins=joins, 553 alias_tokens=alias_tokens, 554 parse_bracket=parse_bracket, 555 is_db_reference=is_db_reference, 556 ) 557 558 if self._match(TokenType.FINAL): 559 this = self.expression(exp.Final, this=this) 560 561 return this 562 563 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 564 return super()._parse_position(haystack_first=True) 565 566 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 567 def _parse_cte(self) -> exp.CTE: 568 # WITH <identifier> AS <subquery expression> 569 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 570 571 if not cte: 572 # WITH <expression> AS <identifier> 573 cte = self.expression( 574 exp.CTE, 575 this=self._parse_assignment(), 576 alias=self._parse_table_alias(), 577 scalar=True, 578 ) 579 580 return cte 581 582 def _parse_join_parts( 583 self, 584 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 585 is_global = self._match(TokenType.GLOBAL) and self._prev 586 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 587 588 if kind_pre: 589 kind = self._match_set(self.JOIN_KINDS) and self._prev 590 side = self._match_set(self.JOIN_SIDES) and self._prev 591 return is_global, side, kind 592 593 return ( 594 is_global, 595 self._match_set(self.JOIN_SIDES) and self._prev, 596 self._match_set(self.JOIN_KINDS) and self._prev, 597 ) 598 599 def _parse_join( 600 self, skip_join_token: bool = False, parse_bracket: bool = False 601 ) -> t.Optional[exp.Join]: 602 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 603 if join: 604 join.set("global", join.args.pop("method", None)) 605 606 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 607 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 608 if join.kind == "ARRAY": 609 for table in join.find_all(exp.Table): 610 table.replace(table.to_column()) 611 612 return join 613 614 def _parse_function( 615 self, 616 functions: t.Optional[t.Dict[str, t.Callable]] = None, 617 anonymous: bool = False, 618 optional_parens: bool = True, 619 any_token: bool = False, 620 ) -> t.Optional[exp.Expression]: 621 expr = super()._parse_function( 622 functions=functions, 623 anonymous=anonymous, 624 optional_parens=optional_parens, 625 any_token=any_token, 626 ) 627 628 func = expr.this if isinstance(expr, exp.Window) else expr 629 630 # Aggregate functions can be split in 2 parts: <func_name><suffix> 631 parts = ( 632 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 633 ) 634 635 if parts: 636 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 637 params = self._parse_func_params(anon_func) 638 639 kwargs = { 640 "this": anon_func.this, 641 "expressions": anon_func.expressions, 642 } 643 if parts[1]: 644 kwargs["parts"] = parts 645 exp_class: t.Type[exp.Expression] = ( 646 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 647 ) 648 else: 649 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 650 651 kwargs["exp_class"] = exp_class 652 if params: 653 kwargs["params"] = params 654 655 func = self.expression(**kwargs) 656 657 if isinstance(expr, exp.Window): 658 # The window's func was parsed as Anonymous in base parser, fix its 659 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 660 expr.set("this", func) 661 elif params: 662 # Params have blocked super()._parse_function() from parsing the following window 663 # (if that exists) as they're standing between the function call and the window spec 664 expr = self._parse_window(func) 665 else: 666 expr = func 667 668 return expr 669 670 def _parse_func_params( 671 self, this: t.Optional[exp.Func] = None 672 ) -> t.Optional[t.List[exp.Expression]]: 673 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 674 return self._parse_csv(self._parse_lambda) 675 676 if self._match(TokenType.L_PAREN): 677 params = self._parse_csv(self._parse_lambda) 678 self._match_r_paren(this) 679 return params 680 681 return None 682 683 def _parse_quantile(self) -> exp.Quantile: 684 this = self._parse_lambda() 685 params = self._parse_func_params() 686 if params: 687 return self.expression(exp.Quantile, this=params[0], quantile=this) 688 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 689 690 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 691 return super()._parse_wrapped_id_vars(optional=True) 692 693 def _parse_primary_key( 694 self, wrapped_optional: bool = False, in_props: bool = False 695 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 696 return super()._parse_primary_key( 697 wrapped_optional=wrapped_optional or in_props, in_props=in_props 698 ) 699 700 def _parse_on_property(self) -> t.Optional[exp.Expression]: 701 index = self._index 702 if self._match_text_seq("CLUSTER"): 703 this = self._parse_id_var() 704 if this: 705 return self.expression(exp.OnCluster, this=this) 706 else: 707 self._retreat(index) 708 return None 709 710 def _parse_index_constraint( 711 self, kind: t.Optional[str] = None 712 ) -> exp.IndexColumnConstraint: 713 # INDEX name1 expr TYPE type1(args) GRANULARITY value 714 this = self._parse_id_var() 715 expression = self._parse_assignment() 716 717 index_type = self._match_text_seq("TYPE") and ( 718 self._parse_function() or self._parse_var() 719 ) 720 721 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 722 723 return self.expression( 724 exp.IndexColumnConstraint, 725 this=this, 726 expression=expression, 727 index_type=index_type, 728 granularity=granularity, 729 ) 730 731 def _parse_partition(self) -> t.Optional[exp.Partition]: 732 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 733 if not self._match(TokenType.PARTITION): 734 return None 735 736 if self._match_text_seq("ID"): 737 # Corresponds to the PARTITION ID <string_value> syntax 738 expressions: t.List[exp.Expression] = [ 739 self.expression(exp.PartitionId, this=self._parse_string()) 740 ] 741 else: 742 expressions = self._parse_expressions() 743 744 return self.expression(exp.Partition, expressions=expressions) 745 746 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 747 partition = self._parse_partition() 748 749 if not partition or not self._match(TokenType.FROM): 750 return None 751 752 return self.expression( 753 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 754 ) 755 756 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 757 if not self._match_text_seq("PROJECTION"): 758 return None 759 760 return self.expression( 761 exp.ProjectionDef, 762 this=self._parse_id_var(), 763 expression=self._parse_wrapped(self._parse_statement), 764 ) 765 766 def _parse_constraint(self) -> t.Optional[exp.Expression]: 767 return super()._parse_constraint() or self._parse_projection_def() 768 769 def _parse_alias( 770 self, this: t.Optional[exp.Expression], explicit: bool = False 771 ) -> t.Optional[exp.Expression]: 772 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 773 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 774 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 775 return this 776 777 return super()._parse_alias(this=this, explicit=explicit) 778 779 def _parse_expression(self) -> t.Optional[exp.Expression]: 780 this = super()._parse_expression() 781 782 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 783 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 784 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 785 self._match(TokenType.R_PAREN) 786 787 return this 788 789 def _parse_columns(self) -> exp.Expression: 790 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 791 792 while self._next and self._match_text_seq(")", "APPLY", "("): 793 self._match(TokenType.R_PAREN) 794 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 795 return this 796 797 class Generator(generator.Generator): 798 QUERY_HINTS = False 799 STRUCT_DELIMITER = ("(", ")") 800 NVL2_SUPPORTED = False 801 TABLESAMPLE_REQUIRES_PARENS = False 802 TABLESAMPLE_SIZE_IS_ROWS = False 803 TABLESAMPLE_KEYWORDS = "SAMPLE" 804 LAST_DAY_SUPPORTS_DATE_PART = False 805 CAN_IMPLEMENT_ARRAY_ANY = True 806 SUPPORTS_TO_NUMBER = False 807 JOIN_HINTS = False 808 TABLE_HINTS = False 809 GROUPINGS_SEP = "" 810 SET_OP_MODIFIERS = False 811 SUPPORTS_TABLE_ALIAS_COLUMNS = False 812 VALUES_AS_TABLE = False 813 814 STRING_TYPE_MAPPING = { 815 exp.DataType.Type.CHAR: "String", 816 exp.DataType.Type.LONGBLOB: "String", 817 exp.DataType.Type.LONGTEXT: "String", 818 exp.DataType.Type.MEDIUMBLOB: "String", 819 exp.DataType.Type.MEDIUMTEXT: "String", 820 exp.DataType.Type.TINYBLOB: "String", 821 exp.DataType.Type.TINYTEXT: "String", 822 exp.DataType.Type.TEXT: "String", 823 exp.DataType.Type.VARBINARY: "String", 824 exp.DataType.Type.VARCHAR: "String", 825 } 826 827 SUPPORTED_JSON_PATH_PARTS = { 828 exp.JSONPathKey, 829 exp.JSONPathRoot, 830 exp.JSONPathSubscript, 831 } 832 833 TYPE_MAPPING = { 834 **generator.Generator.TYPE_MAPPING, 835 **STRING_TYPE_MAPPING, 836 exp.DataType.Type.ARRAY: "Array", 837 exp.DataType.Type.BOOLEAN: "Bool", 838 exp.DataType.Type.BIGINT: "Int64", 839 exp.DataType.Type.DATE32: "Date32", 840 exp.DataType.Type.DATETIME: "DateTime", 841 exp.DataType.Type.DATETIME64: "DateTime64", 842 exp.DataType.Type.TIMESTAMP: "DateTime", 843 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 844 exp.DataType.Type.DOUBLE: "Float64", 845 exp.DataType.Type.ENUM: "Enum", 846 exp.DataType.Type.ENUM8: "Enum8", 847 exp.DataType.Type.ENUM16: "Enum16", 848 exp.DataType.Type.FIXEDSTRING: "FixedString", 849 exp.DataType.Type.FLOAT: "Float32", 850 exp.DataType.Type.INT: "Int32", 851 exp.DataType.Type.MEDIUMINT: "Int32", 852 exp.DataType.Type.INT128: "Int128", 853 exp.DataType.Type.INT256: "Int256", 854 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 855 exp.DataType.Type.MAP: "Map", 856 exp.DataType.Type.NESTED: "Nested", 857 exp.DataType.Type.SMALLINT: "Int16", 858 exp.DataType.Type.STRUCT: "Tuple", 859 exp.DataType.Type.TINYINT: "Int8", 860 exp.DataType.Type.UBIGINT: "UInt64", 861 exp.DataType.Type.UINT: "UInt32", 862 exp.DataType.Type.UINT128: "UInt128", 863 exp.DataType.Type.UINT256: "UInt256", 864 exp.DataType.Type.USMALLINT: "UInt16", 865 exp.DataType.Type.UTINYINT: "UInt8", 866 exp.DataType.Type.IPV4: "IPv4", 867 exp.DataType.Type.IPV6: "IPv6", 868 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 869 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 870 } 871 872 TRANSFORMS = { 873 **generator.Generator.TRANSFORMS, 874 exp.AnyValue: rename_func("any"), 875 exp.ApproxDistinct: rename_func("uniq"), 876 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 877 exp.ArraySize: rename_func("LENGTH"), 878 exp.ArraySum: rename_func("arraySum"), 879 exp.ArgMax: arg_max_or_min_no_count("argMax"), 880 exp.ArgMin: arg_max_or_min_no_count("argMin"), 881 exp.Array: inline_array_sql, 882 exp.CastToStrType: rename_func("CAST"), 883 exp.CountIf: rename_func("countIf"), 884 exp.CompressColumnConstraint: lambda self, 885 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 886 exp.ComputedColumnConstraint: lambda self, 887 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 888 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 889 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 890 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 891 exp.DateStrToDate: rename_func("toDate"), 892 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 893 exp.Explode: rename_func("arrayJoin"), 894 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 895 exp.IsNan: rename_func("isNaN"), 896 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 897 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 898 exp.JSONPathKey: json_path_key_only_name, 899 exp.JSONPathRoot: lambda *_: "", 900 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 901 exp.Nullif: rename_func("nullIf"), 902 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 903 exp.Pivot: no_pivot_sql, 904 exp.Quantile: _quantile_sql, 905 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 906 exp.Rand: rename_func("randCanonical"), 907 exp.StartsWith: rename_func("startsWith"), 908 exp.StrPosition: lambda self, e: self.func( 909 "position", e.this, e.args.get("substr"), e.args.get("position") 910 ), 911 exp.TimeToStr: lambda self, e: self.func( 912 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 913 ), 914 exp.TimeStrToTime: _timestrtotime_sql, 915 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 916 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 917 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 918 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 919 exp.MD5Digest: rename_func("MD5"), 920 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 921 exp.SHA: rename_func("SHA1"), 922 exp.SHA2: sha256_sql, 923 exp.UnixToTime: _unix_to_time_sql, 924 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 925 exp.Trim: trim_sql, 926 exp.Variance: rename_func("varSamp"), 927 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 928 exp.Stddev: rename_func("stddevSamp"), 929 exp.Chr: rename_func("CHAR"), 930 exp.Lag: lambda self, e: self.func( 931 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 932 ), 933 exp.Lead: lambda self, e: self.func( 934 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 935 ), 936 } 937 938 PROPERTIES_LOCATION = { 939 **generator.Generator.PROPERTIES_LOCATION, 940 exp.OnCluster: exp.Properties.Location.POST_NAME, 941 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 942 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 943 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 944 } 945 946 # There's no list in docs, but it can be found in Clickhouse code 947 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 948 ON_CLUSTER_TARGETS = { 949 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 950 "DATABASE", 951 "TABLE", 952 "VIEW", 953 "DICTIONARY", 954 "INDEX", 955 "FUNCTION", 956 "NAMED COLLECTION", 957 } 958 959 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 960 NON_NULLABLE_TYPES = { 961 exp.DataType.Type.ARRAY, 962 exp.DataType.Type.MAP, 963 exp.DataType.Type.STRUCT, 964 } 965 966 def strtodate_sql(self, expression: exp.StrToDate) -> str: 967 strtodate_sql = self.function_fallback_sql(expression) 968 969 if not isinstance(expression.parent, exp.Cast): 970 # StrToDate returns DATEs in other dialects (eg. postgres), so 971 # this branch aims to improve the transpilation to clickhouse 972 return f"CAST({strtodate_sql} AS DATE)" 973 974 return strtodate_sql 975 976 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 977 this = expression.this 978 979 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 980 return self.sql(this) 981 982 return super().cast_sql(expression, safe_prefix=safe_prefix) 983 984 def trycast_sql(self, expression: exp.TryCast) -> str: 985 dtype = expression.to 986 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 987 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 988 dtype.set("nullable", True) 989 990 return super().cast_sql(expression) 991 992 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 993 this = self.json_path_part(expression.this) 994 return str(int(this) + 1) if is_int(this) else this 995 996 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 997 return f"AS {self.sql(expression, 'this')}" 998 999 def _any_to_has( 1000 self, 1001 expression: exp.EQ | exp.NEQ, 1002 default: t.Callable[[t.Any], str], 1003 prefix: str = "", 1004 ) -> str: 1005 if isinstance(expression.left, exp.Any): 1006 arr = expression.left 1007 this = expression.right 1008 elif isinstance(expression.right, exp.Any): 1009 arr = expression.right 1010 this = expression.left 1011 else: 1012 return default(expression) 1013 1014 return prefix + self.func("has", arr.this.unnest(), this) 1015 1016 def eq_sql(self, expression: exp.EQ) -> str: 1017 return self._any_to_has(expression, super().eq_sql) 1018 1019 def neq_sql(self, expression: exp.NEQ) -> str: 1020 return self._any_to_has(expression, super().neq_sql, "NOT ") 1021 1022 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1023 # Manually add a flag to make the search case-insensitive 1024 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1025 return self.func("match", expression.this, regex) 1026 1027 def datatype_sql(self, expression: exp.DataType) -> str: 1028 # String is the standard ClickHouse type, every other variant is just an alias. 1029 # Additionally, any supplied length parameter will be ignored. 1030 # 1031 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1032 if expression.this in self.STRING_TYPE_MAPPING: 1033 dtype = "String" 1034 else: 1035 dtype = super().datatype_sql(expression) 1036 1037 # This section changes the type to `Nullable(...)` if the following conditions hold: 1038 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1039 # and change their semantics 1040 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1041 # constraint: "Type of Map key must be a type, that can be represented by integer or 1042 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1043 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1044 parent = expression.parent 1045 nullable = expression.args.get("nullable") 1046 if nullable is True or ( 1047 nullable is None 1048 and not ( 1049 isinstance(parent, exp.DataType) 1050 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1051 and expression.index in (None, 0) 1052 ) 1053 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1054 ): 1055 dtype = f"Nullable({dtype})" 1056 1057 return dtype 1058 1059 def cte_sql(self, expression: exp.CTE) -> str: 1060 if expression.args.get("scalar"): 1061 this = self.sql(expression, "this") 1062 alias = self.sql(expression, "alias") 1063 return f"{this} AS {alias}" 1064 1065 return super().cte_sql(expression) 1066 1067 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1068 return super().after_limit_modifiers(expression) + [ 1069 ( 1070 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1071 if expression.args.get("settings") 1072 else "" 1073 ), 1074 ( 1075 self.seg("FORMAT ") + self.sql(expression, "format") 1076 if expression.args.get("format") 1077 else "" 1078 ), 1079 ] 1080 1081 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1082 params = self.expressions(expression, key="params", flat=True) 1083 return self.func(expression.name, *expression.expressions) + f"({params})" 1084 1085 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1086 return self.func(expression.name, *expression.expressions) 1087 1088 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1089 return self.anonymousaggfunc_sql(expression) 1090 1091 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1092 return self.parameterizedagg_sql(expression) 1093 1094 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1095 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1096 1097 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1098 return f"ON CLUSTER {self.sql(expression, 'this')}" 1099 1100 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1101 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1102 exp.Properties.Location.POST_NAME 1103 ): 1104 this_name = self.sql( 1105 expression.this if isinstance(expression.this, exp.Schema) else expression, 1106 "this", 1107 ) 1108 this_properties = " ".join( 1109 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1110 ) 1111 this_schema = self.schema_columns_sql(expression.this) 1112 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1113 1114 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1115 1116 return super().createable_sql(expression, locations) 1117 1118 def create_sql(self, expression: exp.Create) -> str: 1119 # The comment property comes last in CTAS statements, i.e. after the query 1120 query = expression.expression 1121 if isinstance(query, exp.Query): 1122 comment_prop = expression.find(exp.SchemaCommentProperty) 1123 if comment_prop: 1124 comment_prop.pop() 1125 query.replace(exp.paren(query)) 1126 else: 1127 comment_prop = None 1128 1129 create_sql = super().create_sql(expression) 1130 1131 comment_sql = self.sql(comment_prop) 1132 comment_sql = f" {comment_sql}" if comment_sql else "" 1133 1134 return f"{create_sql}{comment_sql}" 1135 1136 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1137 this = self.indent(self.sql(expression, "this")) 1138 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1139 1140 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1141 this = self.sql(expression, "this") 1142 this = f" {this}" if this else "" 1143 expr = self.sql(expression, "expression") 1144 expr = f" {expr}" if expr else "" 1145 index_type = self.sql(expression, "index_type") 1146 index_type = f" TYPE {index_type}" if index_type else "" 1147 granularity = self.sql(expression, "granularity") 1148 granularity = f" GRANULARITY {granularity}" if granularity else "" 1149 1150 return f"INDEX{this}{expr}{index_type}{granularity}" 1151 1152 def partition_sql(self, expression: exp.Partition) -> str: 1153 return f"PARTITION {self.expressions(expression, flat=True)}" 1154 1155 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1156 return f"ID {self.sql(expression.this)}" 1157 1158 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1159 return ( 1160 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1161 ) 1162 1163 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1164 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
133class ClickHouse(Dialect): 134 NORMALIZE_FUNCTIONS: bool | str = False 135 NULL_ORDERING = "nulls_are_last" 136 SUPPORTS_USER_DEFINED_TYPES = False 137 SAFE_DIVISION = True 138 LOG_BASE_FIRST: t.Optional[bool] = None 139 FORCE_EARLY_ALIAS_REF_EXPANSION = True 140 141 # https://github.com/ClickHouse/ClickHouse/issues/33935#issue-1112165779 142 NORMALIZATION_STRATEGY = NormalizationStrategy.CASE_SENSITIVE 143 144 UNESCAPED_SEQUENCES = { 145 "\\0": "\0", 146 } 147 148 CREATABLE_KIND_MAPPING = {"DATABASE": "SCHEMA"} 149 150 SET_OP_DISTINCT_BY_DEFAULT: t.Dict[t.Type[exp.Expression], t.Optional[bool]] = { 151 exp.Except: False, 152 exp.Intersect: False, 153 exp.Union: None, 154 } 155 156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 } 202 203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 "COLUMNS": lambda self: self._parse_columns(), 401 } 402 403 FUNCTION_PARSERS.pop("MATCH") 404 405 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 406 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 407 408 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 409 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 410 411 RANGE_PARSERS = { 412 **parser.Parser.RANGE_PARSERS, 413 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 414 and self._parse_in(this, is_global=True), 415 } 416 417 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 418 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 419 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 420 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 421 422 JOIN_KINDS = { 423 *parser.Parser.JOIN_KINDS, 424 TokenType.ANY, 425 TokenType.ASOF, 426 TokenType.ARRAY, 427 } 428 429 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 430 TokenType.ANY, 431 TokenType.ARRAY, 432 TokenType.FINAL, 433 TokenType.FORMAT, 434 TokenType.SETTINGS, 435 } 436 437 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 438 TokenType.FORMAT, 439 } 440 441 LOG_DEFAULTS_TO_LN = True 442 443 QUERY_MODIFIER_PARSERS = { 444 **parser.Parser.QUERY_MODIFIER_PARSERS, 445 TokenType.SETTINGS: lambda self: ( 446 "settings", 447 self._advance() or self._parse_csv(self._parse_assignment), 448 ), 449 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 450 } 451 452 CONSTRAINT_PARSERS = { 453 **parser.Parser.CONSTRAINT_PARSERS, 454 "INDEX": lambda self: self._parse_index_constraint(), 455 "CODEC": lambda self: self._parse_compress(), 456 } 457 458 ALTER_PARSERS = { 459 **parser.Parser.ALTER_PARSERS, 460 "REPLACE": lambda self: self._parse_alter_table_replace(), 461 } 462 463 SCHEMA_UNNAMED_CONSTRAINTS = { 464 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 465 "INDEX", 466 } 467 468 PLACEHOLDER_PARSERS = { 469 **parser.Parser.PLACEHOLDER_PARSERS, 470 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 471 } 472 473 def _parse_types( 474 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 475 ) -> t.Optional[exp.Expression]: 476 dtype = super()._parse_types( 477 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 478 ) 479 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 480 # Mark every type as non-nullable which is ClickHouse's default, unless it's 481 # already marked as nullable. This marker helps us transpile types from other 482 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 483 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 484 # fail in ClickHouse without the `Nullable` type constructor. 485 dtype.set("nullable", False) 486 487 return dtype 488 489 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 490 index = self._index 491 this = self._parse_bitwise() 492 if self._match(TokenType.FROM): 493 self._retreat(index) 494 return super()._parse_extract() 495 496 # We return Anonymous here because extract and regexpExtract have different semantics, 497 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 498 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 499 # 500 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 501 self._match(TokenType.COMMA) 502 return self.expression( 503 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 504 ) 505 506 def _parse_assignment(self) -> t.Optional[exp.Expression]: 507 this = super()._parse_assignment() 508 509 if self._match(TokenType.PLACEHOLDER): 510 return self.expression( 511 exp.If, 512 this=this, 513 true=self._parse_assignment(), 514 false=self._match(TokenType.COLON) and self._parse_assignment(), 515 ) 516 517 return this 518 519 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 520 """ 521 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 522 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 523 """ 524 this = self._parse_id_var() 525 self._match(TokenType.COLON) 526 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 527 self._match_text_seq("IDENTIFIER") and "Identifier" 528 ) 529 530 if not kind: 531 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 532 elif not self._match(TokenType.R_BRACE): 533 self.raise_error("Expecting }") 534 535 return self.expression(exp.Placeholder, this=this, kind=kind) 536 537 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 538 this = super()._parse_in(this) 539 this.set("is_global", is_global) 540 return this 541 542 def _parse_table( 543 self, 544 schema: bool = False, 545 joins: bool = False, 546 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 547 parse_bracket: bool = False, 548 is_db_reference: bool = False, 549 parse_partition: bool = False, 550 ) -> t.Optional[exp.Expression]: 551 this = super()._parse_table( 552 schema=schema, 553 joins=joins, 554 alias_tokens=alias_tokens, 555 parse_bracket=parse_bracket, 556 is_db_reference=is_db_reference, 557 ) 558 559 if self._match(TokenType.FINAL): 560 this = self.expression(exp.Final, this=this) 561 562 return this 563 564 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 565 return super()._parse_position(haystack_first=True) 566 567 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 568 def _parse_cte(self) -> exp.CTE: 569 # WITH <identifier> AS <subquery expression> 570 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 571 572 if not cte: 573 # WITH <expression> AS <identifier> 574 cte = self.expression( 575 exp.CTE, 576 this=self._parse_assignment(), 577 alias=self._parse_table_alias(), 578 scalar=True, 579 ) 580 581 return cte 582 583 def _parse_join_parts( 584 self, 585 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 586 is_global = self._match(TokenType.GLOBAL) and self._prev 587 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 588 589 if kind_pre: 590 kind = self._match_set(self.JOIN_KINDS) and self._prev 591 side = self._match_set(self.JOIN_SIDES) and self._prev 592 return is_global, side, kind 593 594 return ( 595 is_global, 596 self._match_set(self.JOIN_SIDES) and self._prev, 597 self._match_set(self.JOIN_KINDS) and self._prev, 598 ) 599 600 def _parse_join( 601 self, skip_join_token: bool = False, parse_bracket: bool = False 602 ) -> t.Optional[exp.Join]: 603 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 604 if join: 605 join.set("global", join.args.pop("method", None)) 606 607 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 608 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 609 if join.kind == "ARRAY": 610 for table in join.find_all(exp.Table): 611 table.replace(table.to_column()) 612 613 return join 614 615 def _parse_function( 616 self, 617 functions: t.Optional[t.Dict[str, t.Callable]] = None, 618 anonymous: bool = False, 619 optional_parens: bool = True, 620 any_token: bool = False, 621 ) -> t.Optional[exp.Expression]: 622 expr = super()._parse_function( 623 functions=functions, 624 anonymous=anonymous, 625 optional_parens=optional_parens, 626 any_token=any_token, 627 ) 628 629 func = expr.this if isinstance(expr, exp.Window) else expr 630 631 # Aggregate functions can be split in 2 parts: <func_name><suffix> 632 parts = ( 633 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 634 ) 635 636 if parts: 637 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 638 params = self._parse_func_params(anon_func) 639 640 kwargs = { 641 "this": anon_func.this, 642 "expressions": anon_func.expressions, 643 } 644 if parts[1]: 645 kwargs["parts"] = parts 646 exp_class: t.Type[exp.Expression] = ( 647 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 648 ) 649 else: 650 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 651 652 kwargs["exp_class"] = exp_class 653 if params: 654 kwargs["params"] = params 655 656 func = self.expression(**kwargs) 657 658 if isinstance(expr, exp.Window): 659 # The window's func was parsed as Anonymous in base parser, fix its 660 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 661 expr.set("this", func) 662 elif params: 663 # Params have blocked super()._parse_function() from parsing the following window 664 # (if that exists) as they're standing between the function call and the window spec 665 expr = self._parse_window(func) 666 else: 667 expr = func 668 669 return expr 670 671 def _parse_func_params( 672 self, this: t.Optional[exp.Func] = None 673 ) -> t.Optional[t.List[exp.Expression]]: 674 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 675 return self._parse_csv(self._parse_lambda) 676 677 if self._match(TokenType.L_PAREN): 678 params = self._parse_csv(self._parse_lambda) 679 self._match_r_paren(this) 680 return params 681 682 return None 683 684 def _parse_quantile(self) -> exp.Quantile: 685 this = self._parse_lambda() 686 params = self._parse_func_params() 687 if params: 688 return self.expression(exp.Quantile, this=params[0], quantile=this) 689 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 690 691 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 692 return super()._parse_wrapped_id_vars(optional=True) 693 694 def _parse_primary_key( 695 self, wrapped_optional: bool = False, in_props: bool = False 696 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 697 return super()._parse_primary_key( 698 wrapped_optional=wrapped_optional or in_props, in_props=in_props 699 ) 700 701 def _parse_on_property(self) -> t.Optional[exp.Expression]: 702 index = self._index 703 if self._match_text_seq("CLUSTER"): 704 this = self._parse_id_var() 705 if this: 706 return self.expression(exp.OnCluster, this=this) 707 else: 708 self._retreat(index) 709 return None 710 711 def _parse_index_constraint( 712 self, kind: t.Optional[str] = None 713 ) -> exp.IndexColumnConstraint: 714 # INDEX name1 expr TYPE type1(args) GRANULARITY value 715 this = self._parse_id_var() 716 expression = self._parse_assignment() 717 718 index_type = self._match_text_seq("TYPE") and ( 719 self._parse_function() or self._parse_var() 720 ) 721 722 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 723 724 return self.expression( 725 exp.IndexColumnConstraint, 726 this=this, 727 expression=expression, 728 index_type=index_type, 729 granularity=granularity, 730 ) 731 732 def _parse_partition(self) -> t.Optional[exp.Partition]: 733 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 734 if not self._match(TokenType.PARTITION): 735 return None 736 737 if self._match_text_seq("ID"): 738 # Corresponds to the PARTITION ID <string_value> syntax 739 expressions: t.List[exp.Expression] = [ 740 self.expression(exp.PartitionId, this=self._parse_string()) 741 ] 742 else: 743 expressions = self._parse_expressions() 744 745 return self.expression(exp.Partition, expressions=expressions) 746 747 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 748 partition = self._parse_partition() 749 750 if not partition or not self._match(TokenType.FROM): 751 return None 752 753 return self.expression( 754 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 755 ) 756 757 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 758 if not self._match_text_seq("PROJECTION"): 759 return None 760 761 return self.expression( 762 exp.ProjectionDef, 763 this=self._parse_id_var(), 764 expression=self._parse_wrapped(self._parse_statement), 765 ) 766 767 def _parse_constraint(self) -> t.Optional[exp.Expression]: 768 return super()._parse_constraint() or self._parse_projection_def() 769 770 def _parse_alias( 771 self, this: t.Optional[exp.Expression], explicit: bool = False 772 ) -> t.Optional[exp.Expression]: 773 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 774 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 775 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 776 return this 777 778 return super()._parse_alias(this=this, explicit=explicit) 779 780 def _parse_expression(self) -> t.Optional[exp.Expression]: 781 this = super()._parse_expression() 782 783 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 784 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 785 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 786 self._match(TokenType.R_PAREN) 787 788 return this 789 790 def _parse_columns(self) -> exp.Expression: 791 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 792 793 while self._next and self._match_text_seq(")", "APPLY", "("): 794 self._match(TokenType.R_PAREN) 795 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 796 return this 797 798 class Generator(generator.Generator): 799 QUERY_HINTS = False 800 STRUCT_DELIMITER = ("(", ")") 801 NVL2_SUPPORTED = False 802 TABLESAMPLE_REQUIRES_PARENS = False 803 TABLESAMPLE_SIZE_IS_ROWS = False 804 TABLESAMPLE_KEYWORDS = "SAMPLE" 805 LAST_DAY_SUPPORTS_DATE_PART = False 806 CAN_IMPLEMENT_ARRAY_ANY = True 807 SUPPORTS_TO_NUMBER = False 808 JOIN_HINTS = False 809 TABLE_HINTS = False 810 GROUPINGS_SEP = "" 811 SET_OP_MODIFIERS = False 812 SUPPORTS_TABLE_ALIAS_COLUMNS = False 813 VALUES_AS_TABLE = False 814 815 STRING_TYPE_MAPPING = { 816 exp.DataType.Type.CHAR: "String", 817 exp.DataType.Type.LONGBLOB: "String", 818 exp.DataType.Type.LONGTEXT: "String", 819 exp.DataType.Type.MEDIUMBLOB: "String", 820 exp.DataType.Type.MEDIUMTEXT: "String", 821 exp.DataType.Type.TINYBLOB: "String", 822 exp.DataType.Type.TINYTEXT: "String", 823 exp.DataType.Type.TEXT: "String", 824 exp.DataType.Type.VARBINARY: "String", 825 exp.DataType.Type.VARCHAR: "String", 826 } 827 828 SUPPORTED_JSON_PATH_PARTS = { 829 exp.JSONPathKey, 830 exp.JSONPathRoot, 831 exp.JSONPathSubscript, 832 } 833 834 TYPE_MAPPING = { 835 **generator.Generator.TYPE_MAPPING, 836 **STRING_TYPE_MAPPING, 837 exp.DataType.Type.ARRAY: "Array", 838 exp.DataType.Type.BOOLEAN: "Bool", 839 exp.DataType.Type.BIGINT: "Int64", 840 exp.DataType.Type.DATE32: "Date32", 841 exp.DataType.Type.DATETIME: "DateTime", 842 exp.DataType.Type.DATETIME64: "DateTime64", 843 exp.DataType.Type.TIMESTAMP: "DateTime", 844 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 845 exp.DataType.Type.DOUBLE: "Float64", 846 exp.DataType.Type.ENUM: "Enum", 847 exp.DataType.Type.ENUM8: "Enum8", 848 exp.DataType.Type.ENUM16: "Enum16", 849 exp.DataType.Type.FIXEDSTRING: "FixedString", 850 exp.DataType.Type.FLOAT: "Float32", 851 exp.DataType.Type.INT: "Int32", 852 exp.DataType.Type.MEDIUMINT: "Int32", 853 exp.DataType.Type.INT128: "Int128", 854 exp.DataType.Type.INT256: "Int256", 855 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 856 exp.DataType.Type.MAP: "Map", 857 exp.DataType.Type.NESTED: "Nested", 858 exp.DataType.Type.SMALLINT: "Int16", 859 exp.DataType.Type.STRUCT: "Tuple", 860 exp.DataType.Type.TINYINT: "Int8", 861 exp.DataType.Type.UBIGINT: "UInt64", 862 exp.DataType.Type.UINT: "UInt32", 863 exp.DataType.Type.UINT128: "UInt128", 864 exp.DataType.Type.UINT256: "UInt256", 865 exp.DataType.Type.USMALLINT: "UInt16", 866 exp.DataType.Type.UTINYINT: "UInt8", 867 exp.DataType.Type.IPV4: "IPv4", 868 exp.DataType.Type.IPV6: "IPv6", 869 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 870 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 871 } 872 873 TRANSFORMS = { 874 **generator.Generator.TRANSFORMS, 875 exp.AnyValue: rename_func("any"), 876 exp.ApproxDistinct: rename_func("uniq"), 877 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 878 exp.ArraySize: rename_func("LENGTH"), 879 exp.ArraySum: rename_func("arraySum"), 880 exp.ArgMax: arg_max_or_min_no_count("argMax"), 881 exp.ArgMin: arg_max_or_min_no_count("argMin"), 882 exp.Array: inline_array_sql, 883 exp.CastToStrType: rename_func("CAST"), 884 exp.CountIf: rename_func("countIf"), 885 exp.CompressColumnConstraint: lambda self, 886 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 887 exp.ComputedColumnConstraint: lambda self, 888 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 889 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 890 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 891 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 892 exp.DateStrToDate: rename_func("toDate"), 893 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 894 exp.Explode: rename_func("arrayJoin"), 895 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 896 exp.IsNan: rename_func("isNaN"), 897 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 898 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 899 exp.JSONPathKey: json_path_key_only_name, 900 exp.JSONPathRoot: lambda *_: "", 901 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 902 exp.Nullif: rename_func("nullIf"), 903 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 904 exp.Pivot: no_pivot_sql, 905 exp.Quantile: _quantile_sql, 906 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 907 exp.Rand: rename_func("randCanonical"), 908 exp.StartsWith: rename_func("startsWith"), 909 exp.StrPosition: lambda self, e: self.func( 910 "position", e.this, e.args.get("substr"), e.args.get("position") 911 ), 912 exp.TimeToStr: lambda self, e: self.func( 913 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 914 ), 915 exp.TimeStrToTime: _timestrtotime_sql, 916 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 917 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 918 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 919 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 920 exp.MD5Digest: rename_func("MD5"), 921 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 922 exp.SHA: rename_func("SHA1"), 923 exp.SHA2: sha256_sql, 924 exp.UnixToTime: _unix_to_time_sql, 925 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 926 exp.Trim: trim_sql, 927 exp.Variance: rename_func("varSamp"), 928 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 929 exp.Stddev: rename_func("stddevSamp"), 930 exp.Chr: rename_func("CHAR"), 931 exp.Lag: lambda self, e: self.func( 932 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 933 ), 934 exp.Lead: lambda self, e: self.func( 935 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 936 ), 937 } 938 939 PROPERTIES_LOCATION = { 940 **generator.Generator.PROPERTIES_LOCATION, 941 exp.OnCluster: exp.Properties.Location.POST_NAME, 942 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 943 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 944 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 945 } 946 947 # There's no list in docs, but it can be found in Clickhouse code 948 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 949 ON_CLUSTER_TARGETS = { 950 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 951 "DATABASE", 952 "TABLE", 953 "VIEW", 954 "DICTIONARY", 955 "INDEX", 956 "FUNCTION", 957 "NAMED COLLECTION", 958 } 959 960 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 961 NON_NULLABLE_TYPES = { 962 exp.DataType.Type.ARRAY, 963 exp.DataType.Type.MAP, 964 exp.DataType.Type.STRUCT, 965 } 966 967 def strtodate_sql(self, expression: exp.StrToDate) -> str: 968 strtodate_sql = self.function_fallback_sql(expression) 969 970 if not isinstance(expression.parent, exp.Cast): 971 # StrToDate returns DATEs in other dialects (eg. postgres), so 972 # this branch aims to improve the transpilation to clickhouse 973 return f"CAST({strtodate_sql} AS DATE)" 974 975 return strtodate_sql 976 977 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 978 this = expression.this 979 980 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 981 return self.sql(this) 982 983 return super().cast_sql(expression, safe_prefix=safe_prefix) 984 985 def trycast_sql(self, expression: exp.TryCast) -> str: 986 dtype = expression.to 987 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 988 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 989 dtype.set("nullable", True) 990 991 return super().cast_sql(expression) 992 993 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 994 this = self.json_path_part(expression.this) 995 return str(int(this) + 1) if is_int(this) else this 996 997 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 998 return f"AS {self.sql(expression, 'this')}" 999 1000 def _any_to_has( 1001 self, 1002 expression: exp.EQ | exp.NEQ, 1003 default: t.Callable[[t.Any], str], 1004 prefix: str = "", 1005 ) -> str: 1006 if isinstance(expression.left, exp.Any): 1007 arr = expression.left 1008 this = expression.right 1009 elif isinstance(expression.right, exp.Any): 1010 arr = expression.right 1011 this = expression.left 1012 else: 1013 return default(expression) 1014 1015 return prefix + self.func("has", arr.this.unnest(), this) 1016 1017 def eq_sql(self, expression: exp.EQ) -> str: 1018 return self._any_to_has(expression, super().eq_sql) 1019 1020 def neq_sql(self, expression: exp.NEQ) -> str: 1021 return self._any_to_has(expression, super().neq_sql, "NOT ") 1022 1023 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1024 # Manually add a flag to make the search case-insensitive 1025 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1026 return self.func("match", expression.this, regex) 1027 1028 def datatype_sql(self, expression: exp.DataType) -> str: 1029 # String is the standard ClickHouse type, every other variant is just an alias. 1030 # Additionally, any supplied length parameter will be ignored. 1031 # 1032 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1033 if expression.this in self.STRING_TYPE_MAPPING: 1034 dtype = "String" 1035 else: 1036 dtype = super().datatype_sql(expression) 1037 1038 # This section changes the type to `Nullable(...)` if the following conditions hold: 1039 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1040 # and change their semantics 1041 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1042 # constraint: "Type of Map key must be a type, that can be represented by integer or 1043 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1044 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1045 parent = expression.parent 1046 nullable = expression.args.get("nullable") 1047 if nullable is True or ( 1048 nullable is None 1049 and not ( 1050 isinstance(parent, exp.DataType) 1051 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1052 and expression.index in (None, 0) 1053 ) 1054 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1055 ): 1056 dtype = f"Nullable({dtype})" 1057 1058 return dtype 1059 1060 def cte_sql(self, expression: exp.CTE) -> str: 1061 if expression.args.get("scalar"): 1062 this = self.sql(expression, "this") 1063 alias = self.sql(expression, "alias") 1064 return f"{this} AS {alias}" 1065 1066 return super().cte_sql(expression) 1067 1068 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1069 return super().after_limit_modifiers(expression) + [ 1070 ( 1071 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1072 if expression.args.get("settings") 1073 else "" 1074 ), 1075 ( 1076 self.seg("FORMAT ") + self.sql(expression, "format") 1077 if expression.args.get("format") 1078 else "" 1079 ), 1080 ] 1081 1082 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1083 params = self.expressions(expression, key="params", flat=True) 1084 return self.func(expression.name, *expression.expressions) + f"({params})" 1085 1086 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1087 return self.func(expression.name, *expression.expressions) 1088 1089 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1090 return self.anonymousaggfunc_sql(expression) 1091 1092 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1093 return self.parameterizedagg_sql(expression) 1094 1095 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1096 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1097 1098 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1099 return f"ON CLUSTER {self.sql(expression, 'this')}" 1100 1101 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1102 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1103 exp.Properties.Location.POST_NAME 1104 ): 1105 this_name = self.sql( 1106 expression.this if isinstance(expression.this, exp.Schema) else expression, 1107 "this", 1108 ) 1109 this_properties = " ".join( 1110 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1111 ) 1112 this_schema = self.schema_columns_sql(expression.this) 1113 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1114 1115 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1116 1117 return super().createable_sql(expression, locations) 1118 1119 def create_sql(self, expression: exp.Create) -> str: 1120 # The comment property comes last in CTAS statements, i.e. after the query 1121 query = expression.expression 1122 if isinstance(query, exp.Query): 1123 comment_prop = expression.find(exp.SchemaCommentProperty) 1124 if comment_prop: 1125 comment_prop.pop() 1126 query.replace(exp.paren(query)) 1127 else: 1128 comment_prop = None 1129 1130 create_sql = super().create_sql(expression) 1131 1132 comment_sql = self.sql(comment_prop) 1133 comment_sql = f" {comment_sql}" if comment_sql else "" 1134 1135 return f"{create_sql}{comment_sql}" 1136 1137 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1138 this = self.indent(self.sql(expression, "this")) 1139 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1140 1141 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1142 this = self.sql(expression, "this") 1143 this = f" {this}" if this else "" 1144 expr = self.sql(expression, "expression") 1145 expr = f" {expr}" if expr else "" 1146 index_type = self.sql(expression, "index_type") 1147 index_type = f" TYPE {index_type}" if index_type else "" 1148 granularity = self.sql(expression, "granularity") 1149 granularity = f" GRANULARITY {granularity}" if granularity else "" 1150 1151 return f"INDEX{this}{expr}{index_type}{granularity}" 1152 1153 def partition_sql(self, expression: exp.Partition) -> str: 1154 return f"PARTITION {self.expressions(expression, flat=True)}" 1155 1156 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1157 return f"ID {self.sql(expression.this)}" 1158 1159 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1160 return ( 1161 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1162 ) 1163 1164 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1165 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Determines how function names are going to be normalized.
Possible values:
"upper" or True: Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
Default NULL
ordering method to use if not explicitly set.
Possible values: "nulls_are_small"
, "nulls_are_large"
, "nulls_are_last"
Whether the base comes first in the LOG
function.
Possible values: True
, False
, None
(two arguments are not supported by LOG
)
Whether alias reference expansion (_expand_alias_refs()) should run before column qualification (_qualify_columns()).
For example:
WITH data AS ( SELECT 1 AS id, 2 AS my_id ) SELECT id AS my_id FROM data WHERE my_id = 1 GROUP BY my_id, HAVING my_id = 1
In most dialects, "my_id" would refer to "data.my_id" across the query, except: - BigQuery, which will forward the alias to GROUP BY + HAVING clauses i.e it resolves to "WHERE my_id = 1 GROUP BY id HAVING id = 1" - Clickhouse, which will forward the alias across the query i.e it resolves to "WHERE id = 1 GROUP BY id HAVING id = 1"
Specifies the strategy according to which identifiers should be normalized.
Mapping of an escaped sequence (\n
) to its unescaped version (
).
Helper for dialects that use a different name for the same creatable kind. For example, the Clickhouse equivalent of CREATE SCHEMA is CREATE DATABASE.
Whether a set operation uses DISTINCT by default. This is None
when either DISTINCT
or ALL
must be explicitly specified.
Inherited Members
- sqlglot.dialects.dialect.Dialect
- Dialect
- INDEX_OFFSET
- WEEK_OFFSET
- UNNEST_COLUMN_ONLY
- ALIAS_POST_TABLESAMPLE
- TABLESAMPLE_SIZE_IS_PERCENT
- IDENTIFIERS_CAN_START_WITH_DIGIT
- DPIPE_IS_STRING_CONCAT
- STRICT_STRING_CONCAT
- SUPPORTS_SEMI_ANTI_JOIN
- COPY_PARAMS_ARE_CSV
- TYPED_DIVISION
- CONCAT_COALESCE
- HEX_LOWERCASE
- DATE_FORMAT
- DATEINT_FORMAT
- TIME_FORMAT
- TIME_MAPPING
- FORMAT_MAPPING
- PSEUDOCOLUMNS
- PREFER_CTE_ALIAS_COLUMN
- EXPAND_ALIAS_REFS_EARLY_ONLY_IN_GROUP_BY
- SUPPORTS_ORDER_BY_ALL
- HAS_DISTINCT_ARRAY_CONSTRUCTORS
- SUPPORTS_FIXED_SIZE_ARRAYS
- STRICT_JSON_PATH_SYNTAX
- ON_CONDITION_EMPTY_BEFORE_ERROR
- ARRAY_AGG_INCLUDES_NULLS
- REGEXP_EXTRACT_DEFAULT_GROUP
- DATE_PART_MAPPING
- TYPE_TO_EXPRESSIONS
- ANNOTATORS
- get_or_raise
- format_time
- settings
- normalize_identifier
- case_sensitive
- can_identify
- quote_identifier
- to_json_path
- parse
- parse_into
- generate
- transpile
- tokenize
- tokenizer
- jsonpath_tokenizer
- parser
- generator
156 class Tokenizer(tokens.Tokenizer): 157 COMMENTS = ["--", "#", "#!", ("/*", "*/")] 158 IDENTIFIERS = ['"', "`"] 159 STRING_ESCAPES = ["'", "\\"] 160 BIT_STRINGS = [("0b", "")] 161 HEX_STRINGS = [("0x", ""), ("0X", "")] 162 HEREDOC_STRINGS = ["$"] 163 164 KEYWORDS = { 165 **tokens.Tokenizer.KEYWORDS, 166 "ATTACH": TokenType.COMMAND, 167 "DATE32": TokenType.DATE32, 168 "DATETIME64": TokenType.DATETIME64, 169 "DICTIONARY": TokenType.DICTIONARY, 170 "ENUM8": TokenType.ENUM8, 171 "ENUM16": TokenType.ENUM16, 172 "FINAL": TokenType.FINAL, 173 "FIXEDSTRING": TokenType.FIXEDSTRING, 174 "FLOAT32": TokenType.FLOAT, 175 "FLOAT64": TokenType.DOUBLE, 176 "GLOBAL": TokenType.GLOBAL, 177 "INT256": TokenType.INT256, 178 "LOWCARDINALITY": TokenType.LOWCARDINALITY, 179 "MAP": TokenType.MAP, 180 "NESTED": TokenType.NESTED, 181 "SAMPLE": TokenType.TABLE_SAMPLE, 182 "TUPLE": TokenType.STRUCT, 183 "UINT128": TokenType.UINT128, 184 "UINT16": TokenType.USMALLINT, 185 "UINT256": TokenType.UINT256, 186 "UINT32": TokenType.UINT, 187 "UINT64": TokenType.UBIGINT, 188 "UINT8": TokenType.UTINYINT, 189 "IPV4": TokenType.IPV4, 190 "IPV6": TokenType.IPV6, 191 "AGGREGATEFUNCTION": TokenType.AGGREGATEFUNCTION, 192 "SIMPLEAGGREGATEFUNCTION": TokenType.SIMPLEAGGREGATEFUNCTION, 193 "SYSTEM": TokenType.COMMAND, 194 "PREWHERE": TokenType.PREWHERE, 195 } 196 KEYWORDS.pop("/*+") 197 198 SINGLE_TOKENS = { 199 **tokens.Tokenizer.SINGLE_TOKENS, 200 "$": TokenType.HEREDOC_STRING, 201 }
Inherited Members
- sqlglot.tokens.Tokenizer
- Tokenizer
- BYTE_STRINGS
- RAW_STRINGS
- UNICODE_STRINGS
- IDENTIFIER_ESCAPES
- QUOTES
- VAR_SINGLE_TOKENS
- HEREDOC_TAG_IS_IDENTIFIER
- HEREDOC_STRING_ALTERNATIVE
- STRING_ESCAPES_ALLOWED_IN_RAW_STRINGS
- NESTED_COMMENTS
- WHITE_SPACE
- COMMANDS
- COMMAND_PREFIX_TOKENS
- NUMERIC_LITERALS
- dialect
- reset
- tokenize
- tokenize_rs
- size
- sql
- tokens
203 class Parser(parser.Parser): 204 # Tested in ClickHouse's playground, it seems that the following two queries do the same thing 205 # * select x from t1 union all select x from t2 limit 1; 206 # * select x from t1 union all (select x from t2 limit 1); 207 MODIFIERS_ATTACHED_TO_SET_OP = False 208 INTERVAL_SPANS = False 209 210 FUNCTIONS = { 211 **parser.Parser.FUNCTIONS, 212 "ANY": exp.AnyValue.from_arg_list, 213 "ARRAYSUM": exp.ArraySum.from_arg_list, 214 "COUNTIF": _build_count_if, 215 "DATE_ADD": build_date_delta(exp.DateAdd, default_unit=None), 216 "DATEADD": build_date_delta(exp.DateAdd, default_unit=None), 217 "DATE_DIFF": build_date_delta(exp.DateDiff, default_unit=None), 218 "DATEDIFF": build_date_delta(exp.DateDiff, default_unit=None), 219 "DATE_FORMAT": _build_date_format, 220 "DATE_SUB": build_date_delta(exp.DateSub, default_unit=None), 221 "DATESUB": build_date_delta(exp.DateSub, default_unit=None), 222 "FORMATDATETIME": _build_date_format, 223 "JSONEXTRACTSTRING": build_json_extract_path( 224 exp.JSONExtractScalar, zero_based_indexing=False 225 ), 226 "MAP": parser.build_var_map, 227 "MATCH": exp.RegexpLike.from_arg_list, 228 "RANDCANONICAL": exp.Rand.from_arg_list, 229 "STR_TO_DATE": _build_str_to_date, 230 "TUPLE": exp.Struct.from_arg_list, 231 "TIMESTAMP_SUB": build_date_delta(exp.TimestampSub, default_unit=None), 232 "TIMESTAMPSUB": build_date_delta(exp.TimestampSub, default_unit=None), 233 "TIMESTAMP_ADD": build_date_delta(exp.TimestampAdd, default_unit=None), 234 "TIMESTAMPADD": build_date_delta(exp.TimestampAdd, default_unit=None), 235 "UNIQ": exp.ApproxDistinct.from_arg_list, 236 "XOR": lambda args: exp.Xor(expressions=args), 237 "MD5": exp.MD5Digest.from_arg_list, 238 "SHA256": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(256)), 239 "SHA512": lambda args: exp.SHA2(this=seq_get(args, 0), length=exp.Literal.number(512)), 240 } 241 242 AGG_FUNCTIONS = { 243 "count", 244 "min", 245 "max", 246 "sum", 247 "avg", 248 "any", 249 "stddevPop", 250 "stddevSamp", 251 "varPop", 252 "varSamp", 253 "corr", 254 "covarPop", 255 "covarSamp", 256 "entropy", 257 "exponentialMovingAverage", 258 "intervalLengthSum", 259 "kolmogorovSmirnovTest", 260 "mannWhitneyUTest", 261 "median", 262 "rankCorr", 263 "sumKahan", 264 "studentTTest", 265 "welchTTest", 266 "anyHeavy", 267 "anyLast", 268 "boundingRatio", 269 "first_value", 270 "last_value", 271 "argMin", 272 "argMax", 273 "avgWeighted", 274 "topK", 275 "topKWeighted", 276 "deltaSum", 277 "deltaSumTimestamp", 278 "groupArray", 279 "groupArrayLast", 280 "groupUniqArray", 281 "groupArrayInsertAt", 282 "groupArrayMovingAvg", 283 "groupArrayMovingSum", 284 "groupArraySample", 285 "groupBitAnd", 286 "groupBitOr", 287 "groupBitXor", 288 "groupBitmap", 289 "groupBitmapAnd", 290 "groupBitmapOr", 291 "groupBitmapXor", 292 "sumWithOverflow", 293 "sumMap", 294 "minMap", 295 "maxMap", 296 "skewSamp", 297 "skewPop", 298 "kurtSamp", 299 "kurtPop", 300 "uniq", 301 "uniqExact", 302 "uniqCombined", 303 "uniqCombined64", 304 "uniqHLL12", 305 "uniqTheta", 306 "quantile", 307 "quantiles", 308 "quantileExact", 309 "quantilesExact", 310 "quantileExactLow", 311 "quantilesExactLow", 312 "quantileExactHigh", 313 "quantilesExactHigh", 314 "quantileExactWeighted", 315 "quantilesExactWeighted", 316 "quantileTiming", 317 "quantilesTiming", 318 "quantileTimingWeighted", 319 "quantilesTimingWeighted", 320 "quantileDeterministic", 321 "quantilesDeterministic", 322 "quantileTDigest", 323 "quantilesTDigest", 324 "quantileTDigestWeighted", 325 "quantilesTDigestWeighted", 326 "quantileBFloat16", 327 "quantilesBFloat16", 328 "quantileBFloat16Weighted", 329 "quantilesBFloat16Weighted", 330 "simpleLinearRegression", 331 "stochasticLinearRegression", 332 "stochasticLogisticRegression", 333 "categoricalInformationValue", 334 "contingency", 335 "cramersV", 336 "cramersVBiasCorrected", 337 "theilsU", 338 "maxIntersections", 339 "maxIntersectionsPosition", 340 "meanZTest", 341 "quantileInterpolatedWeighted", 342 "quantilesInterpolatedWeighted", 343 "quantileGK", 344 "quantilesGK", 345 "sparkBar", 346 "sumCount", 347 "largestTriangleThreeBuckets", 348 "histogram", 349 "sequenceMatch", 350 "sequenceCount", 351 "windowFunnel", 352 "retention", 353 "uniqUpTo", 354 "sequenceNextNode", 355 "exponentialTimeDecayedAvg", 356 } 357 358 AGG_FUNCTIONS_SUFFIXES = [ 359 "If", 360 "Array", 361 "ArrayIf", 362 "Map", 363 "SimpleState", 364 "State", 365 "Merge", 366 "MergeState", 367 "ForEach", 368 "Distinct", 369 "OrDefault", 370 "OrNull", 371 "Resample", 372 "ArgMin", 373 "ArgMax", 374 ] 375 376 FUNC_TOKENS = { 377 *parser.Parser.FUNC_TOKENS, 378 TokenType.SET, 379 } 380 381 RESERVED_TOKENS = parser.Parser.RESERVED_TOKENS - {TokenType.SELECT} 382 383 ID_VAR_TOKENS = { 384 *parser.Parser.ID_VAR_TOKENS, 385 TokenType.LIKE, 386 } 387 388 AGG_FUNC_MAPPING = ( 389 lambda functions, suffixes: { 390 f"{f}{sfx}": (f, sfx) for sfx in (suffixes + [""]) for f in functions 391 } 392 )(AGG_FUNCTIONS, AGG_FUNCTIONS_SUFFIXES) 393 394 FUNCTIONS_WITH_ALIASED_ARGS = {*parser.Parser.FUNCTIONS_WITH_ALIASED_ARGS, "TUPLE"} 395 396 FUNCTION_PARSERS = { 397 **parser.Parser.FUNCTION_PARSERS, 398 "ARRAYJOIN": lambda self: self.expression(exp.Explode, this=self._parse_expression()), 399 "QUANTILE": lambda self: self._parse_quantile(), 400 "COLUMNS": lambda self: self._parse_columns(), 401 } 402 403 FUNCTION_PARSERS.pop("MATCH") 404 405 NO_PAREN_FUNCTION_PARSERS = parser.Parser.NO_PAREN_FUNCTION_PARSERS.copy() 406 NO_PAREN_FUNCTION_PARSERS.pop("ANY") 407 408 NO_PAREN_FUNCTIONS = parser.Parser.NO_PAREN_FUNCTIONS.copy() 409 NO_PAREN_FUNCTIONS.pop(TokenType.CURRENT_TIMESTAMP) 410 411 RANGE_PARSERS = { 412 **parser.Parser.RANGE_PARSERS, 413 TokenType.GLOBAL: lambda self, this: self._match(TokenType.IN) 414 and self._parse_in(this, is_global=True), 415 } 416 417 # The PLACEHOLDER entry is popped because 1) it doesn't affect Clickhouse (it corresponds to 418 # the postgres-specific JSONBContains parser) and 2) it makes parsing the ternary op simpler. 419 COLUMN_OPERATORS = parser.Parser.COLUMN_OPERATORS.copy() 420 COLUMN_OPERATORS.pop(TokenType.PLACEHOLDER) 421 422 JOIN_KINDS = { 423 *parser.Parser.JOIN_KINDS, 424 TokenType.ANY, 425 TokenType.ASOF, 426 TokenType.ARRAY, 427 } 428 429 TABLE_ALIAS_TOKENS = parser.Parser.TABLE_ALIAS_TOKENS - { 430 TokenType.ANY, 431 TokenType.ARRAY, 432 TokenType.FINAL, 433 TokenType.FORMAT, 434 TokenType.SETTINGS, 435 } 436 437 ALIAS_TOKENS = parser.Parser.ALIAS_TOKENS - { 438 TokenType.FORMAT, 439 } 440 441 LOG_DEFAULTS_TO_LN = True 442 443 QUERY_MODIFIER_PARSERS = { 444 **parser.Parser.QUERY_MODIFIER_PARSERS, 445 TokenType.SETTINGS: lambda self: ( 446 "settings", 447 self._advance() or self._parse_csv(self._parse_assignment), 448 ), 449 TokenType.FORMAT: lambda self: ("format", self._advance() or self._parse_id_var()), 450 } 451 452 CONSTRAINT_PARSERS = { 453 **parser.Parser.CONSTRAINT_PARSERS, 454 "INDEX": lambda self: self._parse_index_constraint(), 455 "CODEC": lambda self: self._parse_compress(), 456 } 457 458 ALTER_PARSERS = { 459 **parser.Parser.ALTER_PARSERS, 460 "REPLACE": lambda self: self._parse_alter_table_replace(), 461 } 462 463 SCHEMA_UNNAMED_CONSTRAINTS = { 464 *parser.Parser.SCHEMA_UNNAMED_CONSTRAINTS, 465 "INDEX", 466 } 467 468 PLACEHOLDER_PARSERS = { 469 **parser.Parser.PLACEHOLDER_PARSERS, 470 TokenType.L_BRACE: lambda self: self._parse_query_parameter(), 471 } 472 473 def _parse_types( 474 self, check_func: bool = False, schema: bool = False, allow_identifiers: bool = True 475 ) -> t.Optional[exp.Expression]: 476 dtype = super()._parse_types( 477 check_func=check_func, schema=schema, allow_identifiers=allow_identifiers 478 ) 479 if isinstance(dtype, exp.DataType) and dtype.args.get("nullable") is not True: 480 # Mark every type as non-nullable which is ClickHouse's default, unless it's 481 # already marked as nullable. This marker helps us transpile types from other 482 # dialects to ClickHouse, so that we can e.g. produce `CAST(x AS Nullable(String))` 483 # from `CAST(x AS TEXT)`. If there is a `NULL` value in `x`, the former would 484 # fail in ClickHouse without the `Nullable` type constructor. 485 dtype.set("nullable", False) 486 487 return dtype 488 489 def _parse_extract(self) -> exp.Extract | exp.Anonymous: 490 index = self._index 491 this = self._parse_bitwise() 492 if self._match(TokenType.FROM): 493 self._retreat(index) 494 return super()._parse_extract() 495 496 # We return Anonymous here because extract and regexpExtract have different semantics, 497 # so parsing extract(foo, bar) into RegexpExtract can potentially break queries. E.g., 498 # `extract('foobar', 'b')` works, but ClickHouse crashes for `regexpExtract('foobar', 'b')`. 499 # 500 # TODO: can we somehow convert the former into an equivalent `regexpExtract` call? 501 self._match(TokenType.COMMA) 502 return self.expression( 503 exp.Anonymous, this="extract", expressions=[this, self._parse_bitwise()] 504 ) 505 506 def _parse_assignment(self) -> t.Optional[exp.Expression]: 507 this = super()._parse_assignment() 508 509 if self._match(TokenType.PLACEHOLDER): 510 return self.expression( 511 exp.If, 512 this=this, 513 true=self._parse_assignment(), 514 false=self._match(TokenType.COLON) and self._parse_assignment(), 515 ) 516 517 return this 518 519 def _parse_query_parameter(self) -> t.Optional[exp.Expression]: 520 """ 521 Parse a placeholder expression like SELECT {abc: UInt32} or FROM {table: Identifier} 522 https://clickhouse.com/docs/en/sql-reference/syntax#defining-and-using-query-parameters 523 """ 524 this = self._parse_id_var() 525 self._match(TokenType.COLON) 526 kind = self._parse_types(check_func=False, allow_identifiers=False) or ( 527 self._match_text_seq("IDENTIFIER") and "Identifier" 528 ) 529 530 if not kind: 531 self.raise_error("Expecting a placeholder type or 'Identifier' for tables") 532 elif not self._match(TokenType.R_BRACE): 533 self.raise_error("Expecting }") 534 535 return self.expression(exp.Placeholder, this=this, kind=kind) 536 537 def _parse_in(self, this: t.Optional[exp.Expression], is_global: bool = False) -> exp.In: 538 this = super()._parse_in(this) 539 this.set("is_global", is_global) 540 return this 541 542 def _parse_table( 543 self, 544 schema: bool = False, 545 joins: bool = False, 546 alias_tokens: t.Optional[t.Collection[TokenType]] = None, 547 parse_bracket: bool = False, 548 is_db_reference: bool = False, 549 parse_partition: bool = False, 550 ) -> t.Optional[exp.Expression]: 551 this = super()._parse_table( 552 schema=schema, 553 joins=joins, 554 alias_tokens=alias_tokens, 555 parse_bracket=parse_bracket, 556 is_db_reference=is_db_reference, 557 ) 558 559 if self._match(TokenType.FINAL): 560 this = self.expression(exp.Final, this=this) 561 562 return this 563 564 def _parse_position(self, haystack_first: bool = False) -> exp.StrPosition: 565 return super()._parse_position(haystack_first=True) 566 567 # https://clickhouse.com/docs/en/sql-reference/statements/select/with/ 568 def _parse_cte(self) -> exp.CTE: 569 # WITH <identifier> AS <subquery expression> 570 cte: t.Optional[exp.CTE] = self._try_parse(super()._parse_cte) 571 572 if not cte: 573 # WITH <expression> AS <identifier> 574 cte = self.expression( 575 exp.CTE, 576 this=self._parse_assignment(), 577 alias=self._parse_table_alias(), 578 scalar=True, 579 ) 580 581 return cte 582 583 def _parse_join_parts( 584 self, 585 ) -> t.Tuple[t.Optional[Token], t.Optional[Token], t.Optional[Token]]: 586 is_global = self._match(TokenType.GLOBAL) and self._prev 587 kind_pre = self._match_set(self.JOIN_KINDS, advance=False) and self._prev 588 589 if kind_pre: 590 kind = self._match_set(self.JOIN_KINDS) and self._prev 591 side = self._match_set(self.JOIN_SIDES) and self._prev 592 return is_global, side, kind 593 594 return ( 595 is_global, 596 self._match_set(self.JOIN_SIDES) and self._prev, 597 self._match_set(self.JOIN_KINDS) and self._prev, 598 ) 599 600 def _parse_join( 601 self, skip_join_token: bool = False, parse_bracket: bool = False 602 ) -> t.Optional[exp.Join]: 603 join = super()._parse_join(skip_join_token=skip_join_token, parse_bracket=True) 604 if join: 605 join.set("global", join.args.pop("method", None)) 606 607 # tbl ARRAY JOIN arr <-- this should be a `Column` reference, not a `Table` 608 # https://clickhouse.com/docs/en/sql-reference/statements/select/array-join 609 if join.kind == "ARRAY": 610 for table in join.find_all(exp.Table): 611 table.replace(table.to_column()) 612 613 return join 614 615 def _parse_function( 616 self, 617 functions: t.Optional[t.Dict[str, t.Callable]] = None, 618 anonymous: bool = False, 619 optional_parens: bool = True, 620 any_token: bool = False, 621 ) -> t.Optional[exp.Expression]: 622 expr = super()._parse_function( 623 functions=functions, 624 anonymous=anonymous, 625 optional_parens=optional_parens, 626 any_token=any_token, 627 ) 628 629 func = expr.this if isinstance(expr, exp.Window) else expr 630 631 # Aggregate functions can be split in 2 parts: <func_name><suffix> 632 parts = ( 633 self.AGG_FUNC_MAPPING.get(func.this) if isinstance(func, exp.Anonymous) else None 634 ) 635 636 if parts: 637 anon_func: exp.Anonymous = t.cast(exp.Anonymous, func) 638 params = self._parse_func_params(anon_func) 639 640 kwargs = { 641 "this": anon_func.this, 642 "expressions": anon_func.expressions, 643 } 644 if parts[1]: 645 kwargs["parts"] = parts 646 exp_class: t.Type[exp.Expression] = ( 647 exp.CombinedParameterizedAgg if params else exp.CombinedAggFunc 648 ) 649 else: 650 exp_class = exp.ParameterizedAgg if params else exp.AnonymousAggFunc 651 652 kwargs["exp_class"] = exp_class 653 if params: 654 kwargs["params"] = params 655 656 func = self.expression(**kwargs) 657 658 if isinstance(expr, exp.Window): 659 # The window's func was parsed as Anonymous in base parser, fix its 660 # type to be ClickHouse style CombinedAnonymousAggFunc / AnonymousAggFunc 661 expr.set("this", func) 662 elif params: 663 # Params have blocked super()._parse_function() from parsing the following window 664 # (if that exists) as they're standing between the function call and the window spec 665 expr = self._parse_window(func) 666 else: 667 expr = func 668 669 return expr 670 671 def _parse_func_params( 672 self, this: t.Optional[exp.Func] = None 673 ) -> t.Optional[t.List[exp.Expression]]: 674 if self._match_pair(TokenType.R_PAREN, TokenType.L_PAREN): 675 return self._parse_csv(self._parse_lambda) 676 677 if self._match(TokenType.L_PAREN): 678 params = self._parse_csv(self._parse_lambda) 679 self._match_r_paren(this) 680 return params 681 682 return None 683 684 def _parse_quantile(self) -> exp.Quantile: 685 this = self._parse_lambda() 686 params = self._parse_func_params() 687 if params: 688 return self.expression(exp.Quantile, this=params[0], quantile=this) 689 return self.expression(exp.Quantile, this=this, quantile=exp.Literal.number(0.5)) 690 691 def _parse_wrapped_id_vars(self, optional: bool = False) -> t.List[exp.Expression]: 692 return super()._parse_wrapped_id_vars(optional=True) 693 694 def _parse_primary_key( 695 self, wrapped_optional: bool = False, in_props: bool = False 696 ) -> exp.PrimaryKeyColumnConstraint | exp.PrimaryKey: 697 return super()._parse_primary_key( 698 wrapped_optional=wrapped_optional or in_props, in_props=in_props 699 ) 700 701 def _parse_on_property(self) -> t.Optional[exp.Expression]: 702 index = self._index 703 if self._match_text_seq("CLUSTER"): 704 this = self._parse_id_var() 705 if this: 706 return self.expression(exp.OnCluster, this=this) 707 else: 708 self._retreat(index) 709 return None 710 711 def _parse_index_constraint( 712 self, kind: t.Optional[str] = None 713 ) -> exp.IndexColumnConstraint: 714 # INDEX name1 expr TYPE type1(args) GRANULARITY value 715 this = self._parse_id_var() 716 expression = self._parse_assignment() 717 718 index_type = self._match_text_seq("TYPE") and ( 719 self._parse_function() or self._parse_var() 720 ) 721 722 granularity = self._match_text_seq("GRANULARITY") and self._parse_term() 723 724 return self.expression( 725 exp.IndexColumnConstraint, 726 this=this, 727 expression=expression, 728 index_type=index_type, 729 granularity=granularity, 730 ) 731 732 def _parse_partition(self) -> t.Optional[exp.Partition]: 733 # https://clickhouse.com/docs/en/sql-reference/statements/alter/partition#how-to-set-partition-expression 734 if not self._match(TokenType.PARTITION): 735 return None 736 737 if self._match_text_seq("ID"): 738 # Corresponds to the PARTITION ID <string_value> syntax 739 expressions: t.List[exp.Expression] = [ 740 self.expression(exp.PartitionId, this=self._parse_string()) 741 ] 742 else: 743 expressions = self._parse_expressions() 744 745 return self.expression(exp.Partition, expressions=expressions) 746 747 def _parse_alter_table_replace(self) -> t.Optional[exp.Expression]: 748 partition = self._parse_partition() 749 750 if not partition or not self._match(TokenType.FROM): 751 return None 752 753 return self.expression( 754 exp.ReplacePartition, expression=partition, source=self._parse_table_parts() 755 ) 756 757 def _parse_projection_def(self) -> t.Optional[exp.ProjectionDef]: 758 if not self._match_text_seq("PROJECTION"): 759 return None 760 761 return self.expression( 762 exp.ProjectionDef, 763 this=self._parse_id_var(), 764 expression=self._parse_wrapped(self._parse_statement), 765 ) 766 767 def _parse_constraint(self) -> t.Optional[exp.Expression]: 768 return super()._parse_constraint() or self._parse_projection_def() 769 770 def _parse_alias( 771 self, this: t.Optional[exp.Expression], explicit: bool = False 772 ) -> t.Optional[exp.Expression]: 773 # In clickhouse "SELECT <expr> APPLY(...)" is a query modifier, 774 # so "APPLY" shouldn't be parsed as <expr>'s alias. However, "SELECT <expr> apply" is a valid alias 775 if self._match_pair(TokenType.APPLY, TokenType.L_PAREN, advance=False): 776 return this 777 778 return super()._parse_alias(this=this, explicit=explicit) 779 780 def _parse_expression(self) -> t.Optional[exp.Expression]: 781 this = super()._parse_expression() 782 783 # Clickhouse allows "SELECT <expr> [APPLY(func)] [...]]" modifier 784 while self._match_pair(TokenType.APPLY, TokenType.L_PAREN): 785 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 786 self._match(TokenType.R_PAREN) 787 788 return this 789 790 def _parse_columns(self) -> exp.Expression: 791 this: exp.Expression = self.expression(exp.Columns, this=self._parse_lambda()) 792 793 while self._next and self._match_text_seq(")", "APPLY", "("): 794 self._match(TokenType.R_PAREN) 795 this = exp.Apply(this=this, expression=self._parse_var(any_token=True)) 796 return this
Parser consumes a list of tokens produced by the Tokenizer and produces a parsed syntax tree.
Arguments:
- error_level: The desired error level. Default: ErrorLevel.IMMEDIATE
- error_message_context: The amount of context to capture from a query string when displaying the error message (in number of characters). Default: 100
- max_errors: Maximum number of error messages to include in a raised ParseError. This is only relevant if error_level is ErrorLevel.RAISE. Default: 3
Inherited Members
- sqlglot.parser.Parser
- Parser
- STRUCT_TYPE_TOKENS
- NESTED_TYPE_TOKENS
- ENUM_TYPE_TOKENS
- AGGREGATE_TYPE_TOKENS
- TYPE_TOKENS
- SIGNED_TO_UNSIGNED_TYPE_TOKEN
- SUBQUERY_PREDICATES
- DB_CREATABLES
- CREATABLES
- ALTERABLES
- INTERVAL_VARS
- ARRAY_CONSTRUCTORS
- COMMENT_TABLE_ALIAS_TOKENS
- UPDATE_ALIAS_TOKENS
- TRIM_TYPES
- CONJUNCTION
- ASSIGNMENT
- DISJUNCTION
- EQUALITY
- COMPARISON
- BITWISE
- TERM
- FACTOR
- EXPONENT
- TIMES
- TIMESTAMPS
- SET_OPERATIONS
- JOIN_METHODS
- JOIN_SIDES
- JOIN_HINTS
- LAMBDAS
- EXPRESSION_PARSERS
- STATEMENT_PARSERS
- UNARY_PARSERS
- STRING_PARSERS
- NUMERIC_PARSERS
- PRIMARY_PARSERS
- PROPERTY_PARSERS
- ALTER_ALTER_PARSERS
- INVALID_FUNC_NAME_TOKENS
- KEY_VALUE_DEFINITIONS
- SET_PARSERS
- SHOW_PARSERS
- TYPE_LITERAL_PARSERS
- TYPE_CONVERTERS
- DDL_SELECT_TOKENS
- PRE_VOLATILE_TOKENS
- TRANSACTION_KIND
- TRANSACTION_CHARACTERISTICS
- CONFLICT_ACTIONS
- CREATE_SEQUENCE
- ISOLATED_LOADING_OPTIONS
- USABLES
- CAST_ACTIONS
- SCHEMA_BINDING_OPTIONS
- PROCEDURE_OPTIONS
- EXECUTE_AS_OPTIONS
- KEY_CONSTRAINT_OPTIONS
- INSERT_ALTERNATIVES
- CLONE_KEYWORDS
- HISTORICAL_DATA_PREFIX
- HISTORICAL_DATA_KIND
- OPCLASS_FOLLOW_KEYWORDS
- OPTYPE_FOLLOW_TOKENS
- TABLE_INDEX_HINT_TOKENS
- VIEW_ATTRIBUTES
- WINDOW_ALIAS_TOKENS
- WINDOW_BEFORE_PAREN_TOKENS
- WINDOW_SIDES
- JSON_KEY_VALUE_SEPARATOR_TOKENS
- FETCH_TOKENS
- ADD_CONSTRAINT_TOKENS
- DISTINCT_TOKENS
- NULL_TOKENS
- UNNEST_OFFSET_ALIAS_TOKENS
- SELECT_START_TOKENS
- COPY_INTO_VARLEN_OPTIONS
- IS_JSON_PREDICATE_KIND
- ODBC_DATETIME_LITERALS
- ON_CONDITION_TOKENS
- PRIVILEGE_FOLLOW_TOKENS
- DESCRIBE_STYLES
- OPERATION_MODIFIERS
- STRICT_CAST
- PREFIXED_PIVOT_COLUMNS
- IDENTIFY_PIVOT_STRINGS
- ALTER_TABLE_ADD_REQUIRED_FOR_EACH_COLUMN
- TABLESAMPLE_CSV
- DEFAULT_SAMPLING_METHOD
- SET_REQUIRES_ASSIGNMENT_DELIMITER
- TRIM_PATTERN_FIRST
- STRING_ALIASES
- SET_OP_MODIFIERS
- NO_PAREN_IF_COMMANDS
- JSON_ARROWS_REQUIRE_JSON_TYPE
- COLON_IS_VARIANT_EXTRACT
- VALUES_FOLLOWED_BY_PAREN
- SUPPORTS_IMPLICIT_UNNEST
- SUPPORTS_PARTITION_SELECTION
- error_level
- error_message_context
- max_errors
- dialect
- reset
- parse
- parse_into
- check_errors
- raise_error
- expression
- validate_expression
- errors
- sql
798 class Generator(generator.Generator): 799 QUERY_HINTS = False 800 STRUCT_DELIMITER = ("(", ")") 801 NVL2_SUPPORTED = False 802 TABLESAMPLE_REQUIRES_PARENS = False 803 TABLESAMPLE_SIZE_IS_ROWS = False 804 TABLESAMPLE_KEYWORDS = "SAMPLE" 805 LAST_DAY_SUPPORTS_DATE_PART = False 806 CAN_IMPLEMENT_ARRAY_ANY = True 807 SUPPORTS_TO_NUMBER = False 808 JOIN_HINTS = False 809 TABLE_HINTS = False 810 GROUPINGS_SEP = "" 811 SET_OP_MODIFIERS = False 812 SUPPORTS_TABLE_ALIAS_COLUMNS = False 813 VALUES_AS_TABLE = False 814 815 STRING_TYPE_MAPPING = { 816 exp.DataType.Type.CHAR: "String", 817 exp.DataType.Type.LONGBLOB: "String", 818 exp.DataType.Type.LONGTEXT: "String", 819 exp.DataType.Type.MEDIUMBLOB: "String", 820 exp.DataType.Type.MEDIUMTEXT: "String", 821 exp.DataType.Type.TINYBLOB: "String", 822 exp.DataType.Type.TINYTEXT: "String", 823 exp.DataType.Type.TEXT: "String", 824 exp.DataType.Type.VARBINARY: "String", 825 exp.DataType.Type.VARCHAR: "String", 826 } 827 828 SUPPORTED_JSON_PATH_PARTS = { 829 exp.JSONPathKey, 830 exp.JSONPathRoot, 831 exp.JSONPathSubscript, 832 } 833 834 TYPE_MAPPING = { 835 **generator.Generator.TYPE_MAPPING, 836 **STRING_TYPE_MAPPING, 837 exp.DataType.Type.ARRAY: "Array", 838 exp.DataType.Type.BOOLEAN: "Bool", 839 exp.DataType.Type.BIGINT: "Int64", 840 exp.DataType.Type.DATE32: "Date32", 841 exp.DataType.Type.DATETIME: "DateTime", 842 exp.DataType.Type.DATETIME64: "DateTime64", 843 exp.DataType.Type.TIMESTAMP: "DateTime", 844 exp.DataType.Type.TIMESTAMPTZ: "DateTime", 845 exp.DataType.Type.DOUBLE: "Float64", 846 exp.DataType.Type.ENUM: "Enum", 847 exp.DataType.Type.ENUM8: "Enum8", 848 exp.DataType.Type.ENUM16: "Enum16", 849 exp.DataType.Type.FIXEDSTRING: "FixedString", 850 exp.DataType.Type.FLOAT: "Float32", 851 exp.DataType.Type.INT: "Int32", 852 exp.DataType.Type.MEDIUMINT: "Int32", 853 exp.DataType.Type.INT128: "Int128", 854 exp.DataType.Type.INT256: "Int256", 855 exp.DataType.Type.LOWCARDINALITY: "LowCardinality", 856 exp.DataType.Type.MAP: "Map", 857 exp.DataType.Type.NESTED: "Nested", 858 exp.DataType.Type.SMALLINT: "Int16", 859 exp.DataType.Type.STRUCT: "Tuple", 860 exp.DataType.Type.TINYINT: "Int8", 861 exp.DataType.Type.UBIGINT: "UInt64", 862 exp.DataType.Type.UINT: "UInt32", 863 exp.DataType.Type.UINT128: "UInt128", 864 exp.DataType.Type.UINT256: "UInt256", 865 exp.DataType.Type.USMALLINT: "UInt16", 866 exp.DataType.Type.UTINYINT: "UInt8", 867 exp.DataType.Type.IPV4: "IPv4", 868 exp.DataType.Type.IPV6: "IPv6", 869 exp.DataType.Type.AGGREGATEFUNCTION: "AggregateFunction", 870 exp.DataType.Type.SIMPLEAGGREGATEFUNCTION: "SimpleAggregateFunction", 871 } 872 873 TRANSFORMS = { 874 **generator.Generator.TRANSFORMS, 875 exp.AnyValue: rename_func("any"), 876 exp.ApproxDistinct: rename_func("uniq"), 877 exp.ArrayFilter: lambda self, e: self.func("arrayFilter", e.expression, e.this), 878 exp.ArraySize: rename_func("LENGTH"), 879 exp.ArraySum: rename_func("arraySum"), 880 exp.ArgMax: arg_max_or_min_no_count("argMax"), 881 exp.ArgMin: arg_max_or_min_no_count("argMin"), 882 exp.Array: inline_array_sql, 883 exp.CastToStrType: rename_func("CAST"), 884 exp.CountIf: rename_func("countIf"), 885 exp.CompressColumnConstraint: lambda self, 886 e: f"CODEC({self.expressions(e, key='this', flat=True)})", 887 exp.ComputedColumnConstraint: lambda self, 888 e: f"{'MATERIALIZED' if e.args.get('persisted') else 'ALIAS'} {self.sql(e, 'this')}", 889 exp.CurrentDate: lambda self, e: self.func("CURRENT_DATE"), 890 exp.DateAdd: _datetime_delta_sql("DATE_ADD"), 891 exp.DateDiff: _datetime_delta_sql("DATE_DIFF"), 892 exp.DateStrToDate: rename_func("toDate"), 893 exp.DateSub: _datetime_delta_sql("DATE_SUB"), 894 exp.Explode: rename_func("arrayJoin"), 895 exp.Final: lambda self, e: f"{self.sql(e, 'this')} FINAL", 896 exp.IsNan: rename_func("isNaN"), 897 exp.JSONExtract: json_extract_segments("JSONExtractString", quoted_index=False), 898 exp.JSONExtractScalar: json_extract_segments("JSONExtractString", quoted_index=False), 899 exp.JSONPathKey: json_path_key_only_name, 900 exp.JSONPathRoot: lambda *_: "", 901 exp.Map: lambda self, e: _lower_func(var_map_sql(self, e)), 902 exp.Nullif: rename_func("nullIf"), 903 exp.PartitionedByProperty: lambda self, e: f"PARTITION BY {self.sql(e, 'this')}", 904 exp.Pivot: no_pivot_sql, 905 exp.Quantile: _quantile_sql, 906 exp.RegexpLike: lambda self, e: self.func("match", e.this, e.expression), 907 exp.Rand: rename_func("randCanonical"), 908 exp.StartsWith: rename_func("startsWith"), 909 exp.StrPosition: lambda self, e: self.func( 910 "position", e.this, e.args.get("substr"), e.args.get("position") 911 ), 912 exp.TimeToStr: lambda self, e: self.func( 913 "formatDateTime", e.this, self.format_time(e), e.args.get("zone") 914 ), 915 exp.TimeStrToTime: _timestrtotime_sql, 916 exp.TimestampAdd: _datetime_delta_sql("TIMESTAMP_ADD"), 917 exp.TimestampSub: _datetime_delta_sql("TIMESTAMP_SUB"), 918 exp.VarMap: lambda self, e: _lower_func(var_map_sql(self, e)), 919 exp.Xor: lambda self, e: self.func("xor", e.this, e.expression, *e.expressions), 920 exp.MD5Digest: rename_func("MD5"), 921 exp.MD5: lambda self, e: self.func("LOWER", self.func("HEX", self.func("MD5", e.this))), 922 exp.SHA: rename_func("SHA1"), 923 exp.SHA2: sha256_sql, 924 exp.UnixToTime: _unix_to_time_sql, 925 exp.TimestampTrunc: timestamptrunc_sql(zone=True), 926 exp.Trim: trim_sql, 927 exp.Variance: rename_func("varSamp"), 928 exp.SchemaCommentProperty: lambda self, e: self.naked_property(e), 929 exp.Stddev: rename_func("stddevSamp"), 930 exp.Chr: rename_func("CHAR"), 931 exp.Lag: lambda self, e: self.func( 932 "lagInFrame", e.this, e.args.get("offset"), e.args.get("default") 933 ), 934 exp.Lead: lambda self, e: self.func( 935 "leadInFrame", e.this, e.args.get("offset"), e.args.get("default") 936 ), 937 } 938 939 PROPERTIES_LOCATION = { 940 **generator.Generator.PROPERTIES_LOCATION, 941 exp.OnCluster: exp.Properties.Location.POST_NAME, 942 exp.PartitionedByProperty: exp.Properties.Location.POST_SCHEMA, 943 exp.ToTableProperty: exp.Properties.Location.POST_NAME, 944 exp.VolatileProperty: exp.Properties.Location.UNSUPPORTED, 945 } 946 947 # There's no list in docs, but it can be found in Clickhouse code 948 # see `ClickHouse/src/Parsers/ParserCreate*.cpp` 949 ON_CLUSTER_TARGETS = { 950 "SCHEMA", # Transpiled CREATE SCHEMA may have OnCluster property set 951 "DATABASE", 952 "TABLE", 953 "VIEW", 954 "DICTIONARY", 955 "INDEX", 956 "FUNCTION", 957 "NAMED COLLECTION", 958 } 959 960 # https://clickhouse.com/docs/en/sql-reference/data-types/nullable 961 NON_NULLABLE_TYPES = { 962 exp.DataType.Type.ARRAY, 963 exp.DataType.Type.MAP, 964 exp.DataType.Type.STRUCT, 965 } 966 967 def strtodate_sql(self, expression: exp.StrToDate) -> str: 968 strtodate_sql = self.function_fallback_sql(expression) 969 970 if not isinstance(expression.parent, exp.Cast): 971 # StrToDate returns DATEs in other dialects (eg. postgres), so 972 # this branch aims to improve the transpilation to clickhouse 973 return f"CAST({strtodate_sql} AS DATE)" 974 975 return strtodate_sql 976 977 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 978 this = expression.this 979 980 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 981 return self.sql(this) 982 983 return super().cast_sql(expression, safe_prefix=safe_prefix) 984 985 def trycast_sql(self, expression: exp.TryCast) -> str: 986 dtype = expression.to 987 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 988 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 989 dtype.set("nullable", True) 990 991 return super().cast_sql(expression) 992 993 def _jsonpathsubscript_sql(self, expression: exp.JSONPathSubscript) -> str: 994 this = self.json_path_part(expression.this) 995 return str(int(this) + 1) if is_int(this) else this 996 997 def likeproperty_sql(self, expression: exp.LikeProperty) -> str: 998 return f"AS {self.sql(expression, 'this')}" 999 1000 def _any_to_has( 1001 self, 1002 expression: exp.EQ | exp.NEQ, 1003 default: t.Callable[[t.Any], str], 1004 prefix: str = "", 1005 ) -> str: 1006 if isinstance(expression.left, exp.Any): 1007 arr = expression.left 1008 this = expression.right 1009 elif isinstance(expression.right, exp.Any): 1010 arr = expression.right 1011 this = expression.left 1012 else: 1013 return default(expression) 1014 1015 return prefix + self.func("has", arr.this.unnest(), this) 1016 1017 def eq_sql(self, expression: exp.EQ) -> str: 1018 return self._any_to_has(expression, super().eq_sql) 1019 1020 def neq_sql(self, expression: exp.NEQ) -> str: 1021 return self._any_to_has(expression, super().neq_sql, "NOT ") 1022 1023 def regexpilike_sql(self, expression: exp.RegexpILike) -> str: 1024 # Manually add a flag to make the search case-insensitive 1025 regex = self.func("CONCAT", "'(?i)'", expression.expression) 1026 return self.func("match", expression.this, regex) 1027 1028 def datatype_sql(self, expression: exp.DataType) -> str: 1029 # String is the standard ClickHouse type, every other variant is just an alias. 1030 # Additionally, any supplied length parameter will be ignored. 1031 # 1032 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1033 if expression.this in self.STRING_TYPE_MAPPING: 1034 dtype = "String" 1035 else: 1036 dtype = super().datatype_sql(expression) 1037 1038 # This section changes the type to `Nullable(...)` if the following conditions hold: 1039 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1040 # and change their semantics 1041 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1042 # constraint: "Type of Map key must be a type, that can be represented by integer or 1043 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1044 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1045 parent = expression.parent 1046 nullable = expression.args.get("nullable") 1047 if nullable is True or ( 1048 nullable is None 1049 and not ( 1050 isinstance(parent, exp.DataType) 1051 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1052 and expression.index in (None, 0) 1053 ) 1054 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1055 ): 1056 dtype = f"Nullable({dtype})" 1057 1058 return dtype 1059 1060 def cte_sql(self, expression: exp.CTE) -> str: 1061 if expression.args.get("scalar"): 1062 this = self.sql(expression, "this") 1063 alias = self.sql(expression, "alias") 1064 return f"{this} AS {alias}" 1065 1066 return super().cte_sql(expression) 1067 1068 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1069 return super().after_limit_modifiers(expression) + [ 1070 ( 1071 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1072 if expression.args.get("settings") 1073 else "" 1074 ), 1075 ( 1076 self.seg("FORMAT ") + self.sql(expression, "format") 1077 if expression.args.get("format") 1078 else "" 1079 ), 1080 ] 1081 1082 def parameterizedagg_sql(self, expression: exp.ParameterizedAgg) -> str: 1083 params = self.expressions(expression, key="params", flat=True) 1084 return self.func(expression.name, *expression.expressions) + f"({params})" 1085 1086 def anonymousaggfunc_sql(self, expression: exp.AnonymousAggFunc) -> str: 1087 return self.func(expression.name, *expression.expressions) 1088 1089 def combinedaggfunc_sql(self, expression: exp.CombinedAggFunc) -> str: 1090 return self.anonymousaggfunc_sql(expression) 1091 1092 def combinedparameterizedagg_sql(self, expression: exp.CombinedParameterizedAgg) -> str: 1093 return self.parameterizedagg_sql(expression) 1094 1095 def placeholder_sql(self, expression: exp.Placeholder) -> str: 1096 return f"{{{expression.name}: {self.sql(expression, 'kind')}}}" 1097 1098 def oncluster_sql(self, expression: exp.OnCluster) -> str: 1099 return f"ON CLUSTER {self.sql(expression, 'this')}" 1100 1101 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1102 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1103 exp.Properties.Location.POST_NAME 1104 ): 1105 this_name = self.sql( 1106 expression.this if isinstance(expression.this, exp.Schema) else expression, 1107 "this", 1108 ) 1109 this_properties = " ".join( 1110 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1111 ) 1112 this_schema = self.schema_columns_sql(expression.this) 1113 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1114 1115 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1116 1117 return super().createable_sql(expression, locations) 1118 1119 def create_sql(self, expression: exp.Create) -> str: 1120 # The comment property comes last in CTAS statements, i.e. after the query 1121 query = expression.expression 1122 if isinstance(query, exp.Query): 1123 comment_prop = expression.find(exp.SchemaCommentProperty) 1124 if comment_prop: 1125 comment_prop.pop() 1126 query.replace(exp.paren(query)) 1127 else: 1128 comment_prop = None 1129 1130 create_sql = super().create_sql(expression) 1131 1132 comment_sql = self.sql(comment_prop) 1133 comment_sql = f" {comment_sql}" if comment_sql else "" 1134 1135 return f"{create_sql}{comment_sql}" 1136 1137 def prewhere_sql(self, expression: exp.PreWhere) -> str: 1138 this = self.indent(self.sql(expression, "this")) 1139 return f"{self.seg('PREWHERE')}{self.sep()}{this}" 1140 1141 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1142 this = self.sql(expression, "this") 1143 this = f" {this}" if this else "" 1144 expr = self.sql(expression, "expression") 1145 expr = f" {expr}" if expr else "" 1146 index_type = self.sql(expression, "index_type") 1147 index_type = f" TYPE {index_type}" if index_type else "" 1148 granularity = self.sql(expression, "granularity") 1149 granularity = f" GRANULARITY {granularity}" if granularity else "" 1150 1151 return f"INDEX{this}{expr}{index_type}{granularity}" 1152 1153 def partition_sql(self, expression: exp.Partition) -> str: 1154 return f"PARTITION {self.expressions(expression, flat=True)}" 1155 1156 def partitionid_sql(self, expression: exp.PartitionId) -> str: 1157 return f"ID {self.sql(expression.this)}" 1158 1159 def replacepartition_sql(self, expression: exp.ReplacePartition) -> str: 1160 return ( 1161 f"REPLACE {self.sql(expression.expression)} FROM {self.sql(expression, 'source')}" 1162 ) 1163 1164 def projectiondef_sql(self, expression: exp.ProjectionDef) -> str: 1165 return f"PROJECTION {self.sql(expression.this)} {self.wrap(expression.expression)}"
Generator converts a given syntax tree to the corresponding SQL string.
Arguments:
- pretty: Whether to format the produced SQL string. Default: False.
- identify: Determines when an identifier should be quoted. Possible values are: False (default): Never quote, except in cases where it's mandatory by the dialect. True or 'always': Always quote. 'safe': Only quote identifiers that are case insensitive.
- normalize: Whether to normalize identifiers to lowercase. Default: False.
- pad: The pad size in a formatted string. For example, this affects the indentation of a projection in a query, relative to its nesting level. Default: 2.
- indent: The indentation size in a formatted string. For example, this affects the
indentation of subqueries and filters under a
WHERE
clause. Default: 2. - normalize_functions: How to normalize function names. Possible values are: "upper" or True (default): Convert names to uppercase. "lower": Convert names to lowercase. False: Disables function name normalization.
- unsupported_level: Determines the generator's behavior when it encounters unsupported expressions. Default ErrorLevel.WARN.
- max_unsupported: Maximum number of unsupported messages to include in a raised UnsupportedError. This is only relevant if unsupported_level is ErrorLevel.RAISE. Default: 3
- leading_comma: Whether the comma is leading or trailing in select expressions. This is only relevant when generating in pretty mode. Default: False
- max_text_width: The max number of characters in a segment before creating new lines in pretty mode. The default is on the smaller end because the length only represents a segment and not the true line length. Default: 80
- comments: Whether to preserve comments in the output SQL code. Default: True
967 def strtodate_sql(self, expression: exp.StrToDate) -> str: 968 strtodate_sql = self.function_fallback_sql(expression) 969 970 if not isinstance(expression.parent, exp.Cast): 971 # StrToDate returns DATEs in other dialects (eg. postgres), so 972 # this branch aims to improve the transpilation to clickhouse 973 return f"CAST({strtodate_sql} AS DATE)" 974 975 return strtodate_sql
977 def cast_sql(self, expression: exp.Cast, safe_prefix: t.Optional[str] = None) -> str: 978 this = expression.this 979 980 if isinstance(this, exp.StrToDate) and expression.to == exp.DataType.build("datetime"): 981 return self.sql(this) 982 983 return super().cast_sql(expression, safe_prefix=safe_prefix)
985 def trycast_sql(self, expression: exp.TryCast) -> str: 986 dtype = expression.to 987 if not dtype.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True): 988 # Casting x into Nullable(T) appears to behave similarly to TRY_CAST(x AS T) 989 dtype.set("nullable", True) 990 991 return super().cast_sql(expression)
1028 def datatype_sql(self, expression: exp.DataType) -> str: 1029 # String is the standard ClickHouse type, every other variant is just an alias. 1030 # Additionally, any supplied length parameter will be ignored. 1031 # 1032 # https://clickhouse.com/docs/en/sql-reference/data-types/string 1033 if expression.this in self.STRING_TYPE_MAPPING: 1034 dtype = "String" 1035 else: 1036 dtype = super().datatype_sql(expression) 1037 1038 # This section changes the type to `Nullable(...)` if the following conditions hold: 1039 # - It's marked as nullable - this ensures we won't wrap ClickHouse types with `Nullable` 1040 # and change their semantics 1041 # - It's not the key type of a `Map`. This is because ClickHouse enforces the following 1042 # constraint: "Type of Map key must be a type, that can be represented by integer or 1043 # String or FixedString (possibly LowCardinality) or UUID or IPv6" 1044 # - It's not a composite type, e.g. `Nullable(Array(...))` is not a valid type 1045 parent = expression.parent 1046 nullable = expression.args.get("nullable") 1047 if nullable is True or ( 1048 nullable is None 1049 and not ( 1050 isinstance(parent, exp.DataType) 1051 and parent.is_type(exp.DataType.Type.MAP, check_nullable=True) 1052 and expression.index in (None, 0) 1053 ) 1054 and not expression.is_type(*self.NON_NULLABLE_TYPES, check_nullable=True) 1055 ): 1056 dtype = f"Nullable({dtype})" 1057 1058 return dtype
1068 def after_limit_modifiers(self, expression: exp.Expression) -> t.List[str]: 1069 return super().after_limit_modifiers(expression) + [ 1070 ( 1071 self.seg("SETTINGS ") + self.expressions(expression, key="settings", flat=True) 1072 if expression.args.get("settings") 1073 else "" 1074 ), 1075 ( 1076 self.seg("FORMAT ") + self.sql(expression, "format") 1077 if expression.args.get("format") 1078 else "" 1079 ), 1080 ]
1101 def createable_sql(self, expression: exp.Create, locations: t.DefaultDict) -> str: 1102 if expression.kind in self.ON_CLUSTER_TARGETS and locations.get( 1103 exp.Properties.Location.POST_NAME 1104 ): 1105 this_name = self.sql( 1106 expression.this if isinstance(expression.this, exp.Schema) else expression, 1107 "this", 1108 ) 1109 this_properties = " ".join( 1110 [self.sql(prop) for prop in locations[exp.Properties.Location.POST_NAME]] 1111 ) 1112 this_schema = self.schema_columns_sql(expression.this) 1113 this_schema = f"{self.sep()}{this_schema}" if this_schema else "" 1114 1115 return f"{this_name}{self.sep()}{this_properties}{this_schema}" 1116 1117 return super().createable_sql(expression, locations)
1119 def create_sql(self, expression: exp.Create) -> str: 1120 # The comment property comes last in CTAS statements, i.e. after the query 1121 query = expression.expression 1122 if isinstance(query, exp.Query): 1123 comment_prop = expression.find(exp.SchemaCommentProperty) 1124 if comment_prop: 1125 comment_prop.pop() 1126 query.replace(exp.paren(query)) 1127 else: 1128 comment_prop = None 1129 1130 create_sql = super().create_sql(expression) 1131 1132 comment_sql = self.sql(comment_prop) 1133 comment_sql = f" {comment_sql}" if comment_sql else "" 1134 1135 return f"{create_sql}{comment_sql}"
1141 def indexcolumnconstraint_sql(self, expression: exp.IndexColumnConstraint) -> str: 1142 this = self.sql(expression, "this") 1143 this = f" {this}" if this else "" 1144 expr = self.sql(expression, "expression") 1145 expr = f" {expr}" if expr else "" 1146 index_type = self.sql(expression, "index_type") 1147 index_type = f" TYPE {index_type}" if index_type else "" 1148 granularity = self.sql(expression, "granularity") 1149 granularity = f" GRANULARITY {granularity}" if granularity else "" 1150 1151 return f"INDEX{this}{expr}{index_type}{granularity}"
Inherited Members
- sqlglot.generator.Generator
- Generator
- NULL_ORDERING_SUPPORTED
- IGNORE_NULLS_IN_FUNC
- LOCKING_READS_SUPPORTED
- EXCEPT_INTERSECT_SUPPORT_ALL_CLAUSE
- WRAP_DERIVED_VALUES
- CREATE_FUNCTION_RETURN_AS
- MATCHED_BY_SOURCE
- SINGLE_STRING_INTERVAL
- INTERVAL_ALLOWS_PLURAL_FORM
- LIMIT_FETCH
- LIMIT_ONLY_LITERALS
- RENAME_TABLE_WITH_DB
- INDEX_ON
- QUERY_HINT_SEP
- IS_BOOL_ALLOWED
- DUPLICATE_KEY_UPDATE_WITH_SET
- LIMIT_IS_TOP
- RETURNING_END
- EXTRACT_ALLOWS_QUOTES
- TZ_TO_WITH_TIME_ZONE
- ALTER_TABLE_INCLUDE_COLUMN_KEYWORD
- UNNEST_WITH_ORDINALITY
- AGGREGATE_FILTER_SUPPORTED
- SEMI_ANTI_JOIN_WITH_SIDE
- COMPUTED_COLUMN_WITH_TYPE
- SUPPORTS_TABLE_COPY
- TABLESAMPLE_WITH_METHOD
- TABLESAMPLE_SEED_KEYWORD
- COLLATE_IS_FUNC
- DATA_TYPE_SPECIFIERS_ALLOWED
- ENSURE_BOOLS
- CTE_RECURSIVE_KEYWORD_REQUIRED
- SUPPORTS_SINGLE_ARG_CONCAT
- UNPIVOT_ALIASES_ARE_IDENTIFIERS
- JSON_KEY_VALUE_PAIR_SEP
- INSERT_OVERWRITE
- SUPPORTS_SELECT_INTO
- SUPPORTS_UNLOGGED_TABLES
- SUPPORTS_CREATE_TABLE_LIKE
- LIKE_PROPERTY_INSIDE_SCHEMA
- MULTI_ARG_DISTINCT
- JSON_TYPE_REQUIRED_FOR_EXTRACTION
- JSON_PATH_BRACKETED_KEY_SUPPORTED
- JSON_PATH_SINGLE_QUOTE_ESCAPE
- COPY_PARAMS_ARE_WRAPPED
- COPY_PARAMS_EQ_REQUIRED
- COPY_HAS_INTO_KEYWORD
- STAR_EXCEPT
- HEX_FUNC
- WITH_PROPERTIES_PREFIX
- QUOTE_JSON_PATH
- PAD_FILL_PATTERN_IS_REQUIRED
- SUPPORTS_EXPLODING_PROJECTIONS
- ARRAY_CONCAT_IS_VAR_LEN
- SUPPORTS_CONVERT_TIMEZONE
- PARSE_JSON_NAME
- TIME_PART_SINGULARS
- TOKEN_MAPPING
- PARAMETER_TOKEN
- NAMED_PLACEHOLDER_TOKEN
- RESERVED_KEYWORDS
- WITH_SEPARATED_COMMENTS
- EXCLUDE_COMMENTS
- UNWRAPPED_INTERVAL_VALUES
- PARAMETERIZABLE_TEXT_TYPES
- EXPRESSIONS_WITHOUT_NESTED_CTES
- SENTINEL_LINE_BREAK
- pretty
- identify
- normalize
- pad
- unsupported_level
- max_unsupported
- leading_comma
- max_text_width
- comments
- dialect
- normalize_functions
- unsupported_messages
- generate
- preprocess
- unsupported
- sep
- seg
- pad_comment
- maybe_comment
- wrap
- no_identify
- normalize_func
- indent
- sql
- uncache_sql
- cache_sql
- characterset_sql
- column_parts
- column_sql
- columnposition_sql
- columndef_sql
- columnconstraint_sql
- computedcolumnconstraint_sql
- autoincrementcolumnconstraint_sql
- compresscolumnconstraint_sql
- generatedasidentitycolumnconstraint_sql
- generatedasrowcolumnconstraint_sql
- periodforsystemtimeconstraint_sql
- notnullcolumnconstraint_sql
- transformcolumnconstraint_sql
- primarykeycolumnconstraint_sql
- uniquecolumnconstraint_sql
- sequenceproperties_sql
- clone_sql
- describe_sql
- heredoc_sql
- prepend_ctes
- with_sql
- tablealias_sql
- bitstring_sql
- hexstring_sql
- bytestring_sql
- unicodestring_sql
- rawstring_sql
- datatypeparam_sql
- directory_sql
- delete_sql
- drop_sql
- set_operation
- set_operations
- fetch_sql
- filter_sql
- hint_sql
- indexparameters_sql
- index_sql
- identifier_sql
- hex_sql
- lowerhex_sql
- inputoutputformat_sql
- national_sql
- properties_sql
- root_properties
- properties
- with_properties
- locate_properties
- property_name
- property_sql
- fallbackproperty_sql
- journalproperty_sql
- freespaceproperty_sql
- checksumproperty_sql
- mergeblockratioproperty_sql
- datablocksizeproperty_sql
- blockcompressionproperty_sql
- isolatedloadingproperty_sql
- partitionboundspec_sql
- partitionedofproperty_sql
- lockingproperty_sql
- withdataproperty_sql
- withsystemversioningproperty_sql
- insert_sql
- introducer_sql
- kill_sql
- pseudotype_sql
- objectidentifier_sql
- onconflict_sql
- returning_sql
- rowformatdelimitedproperty_sql
- withtablehint_sql
- indextablehint_sql
- historicaldata_sql
- table_parts
- table_sql
- tablesample_sql
- pivot_sql
- version_sql
- tuple_sql
- update_sql
- values_sql
- var_sql
- into_sql
- from_sql
- groupingsets_sql
- rollup_sql
- cube_sql
- group_sql
- having_sql
- connect_sql
- prior_sql
- join_sql
- lambda_sql
- lateral_op
- lateral_sql
- limit_sql
- offset_sql
- setitem_sql
- set_sql
- pragma_sql
- lock_sql
- literal_sql
- escape_str
- loaddata_sql
- null_sql
- boolean_sql
- order_sql
- withfill_sql
- cluster_sql
- distribute_sql
- sort_sql
- ordered_sql
- matchrecognizemeasure_sql
- matchrecognize_sql
- query_modifiers
- options_modifier
- queryoption_sql
- offset_limit_modifiers
- select_sql
- schema_sql
- schema_columns_sql
- star_sql
- parameter_sql
- sessionparameter_sql
- subquery_sql
- qualify_sql
- unnest_sql
- where_sql
- window_sql
- partition_by_sql
- windowspec_sql
- withingroup_sql
- between_sql
- bracket_offset_expressions
- bracket_sql
- all_sql
- any_sql
- exists_sql
- case_sql
- constraint_sql
- nextvaluefor_sql
- extract_sql
- trim_sql
- convert_concat_args
- concat_sql
- concatws_sql
- check_sql
- foreignkey_sql
- primarykey_sql
- if_sql
- matchagainst_sql
- jsonkeyvalue_sql
- jsonpath_sql
- json_path_part
- formatjson_sql
- jsonobject_sql
- jsonobjectagg_sql
- jsonarray_sql
- jsonarrayagg_sql
- jsoncolumndef_sql
- jsonschema_sql
- jsontable_sql
- openjsoncolumndef_sql
- openjson_sql
- in_sql
- in_unnest_op
- interval_sql
- return_sql
- reference_sql
- anonymous_sql
- paren_sql
- neg_sql
- not_sql
- alias_sql
- pivotalias_sql
- aliases_sql
- atindex_sql
- attimezone_sql
- fromtimezone_sql
- add_sql
- and_sql
- or_sql
- xor_sql
- connector_sql
- bitwiseand_sql
- bitwiseleftshift_sql
- bitwisenot_sql
- bitwiseor_sql
- bitwiserightshift_sql
- bitwisexor_sql
- currentdate_sql
- collate_sql
- command_sql
- comment_sql
- mergetreettlaction_sql
- mergetreettl_sql
- transaction_sql
- commit_sql
- rollback_sql
- altercolumn_sql
- alterdiststyle_sql
- altersortkey_sql
- alterrename_sql
- renamecolumn_sql
- alterset_sql
- alter_sql
- add_column_sql
- droppartition_sql
- addconstraint_sql
- distinct_sql
- ignorenulls_sql
- respectnulls_sql
- havingmax_sql
- intdiv_sql
- dpipe_sql
- div_sql
- overlaps_sql
- distance_sql
- dot_sql
- propertyeq_sql
- escape_sql
- glob_sql
- gt_sql
- gte_sql
- ilike_sql
- ilikeany_sql
- is_sql
- like_sql
- likeany_sql
- similarto_sql
- lt_sql
- lte_sql
- mod_sql
- mul_sql
- nullsafeeq_sql
- nullsafeneq_sql
- slice_sql
- sub_sql
- try_sql
- log_sql
- use_sql
- binary
- function_fallback_sql
- func
- format_args
- too_wide
- format_time
- expressions
- op_expressions
- naked_property
- tag_sql
- token_sql
- userdefinedfunction_sql
- joinhint_sql
- kwarg_sql
- when_sql
- merge_sql
- tochar_sql
- tonumber_sql
- dictproperty_sql
- dictrange_sql
- dictsubproperty_sql
- duplicatekeyproperty_sql
- distributedbyproperty_sql
- clusteredbyproperty_sql
- anyvalue_sql
- querytransform_sql
- indexconstraintoption_sql
- checkcolumnconstraint_sql
- nvl2_sql
- comprehension_sql
- columnprefix_sql
- opclass_sql
- predict_sql
- forin_sql
- refresh_sql
- toarray_sql
- tsordstotime_sql
- tsordstotimestamp_sql
- tsordstodate_sql
- unixdate_sql
- lastday_sql
- dateadd_sql
- arrayany_sql
- struct_sql
- partitionrange_sql
- truncatetable_sql
- convert_sql
- copyparameter_sql
- credentials_sql
- copy_sql
- semicolon_sql
- datadeletionproperty_sql
- maskingpolicycolumnconstraint_sql
- gapfill_sql
- scope_resolution
- scoperesolution_sql
- parsejson_sql
- rand_sql
- changes_sql
- pad_sql
- summarize_sql
- explodinggenerateseries_sql
- arrayconcat_sql
- converttimezone_sql
- json_sql
- jsonvalue_sql
- conditionalinsert_sql
- multitableinserts_sql
- oncondition_sql
- jsonexists_sql
- arrayagg_sql
- apply_sql
- grant_sql
- grantprivilege_sql
- grantprincipal_sql
- columns_sql
- overlay_sql
- todouble_sql