Skip to content

feat: Extend ARRAY_AGG to support ORDER BY #35

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
May 19, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
42 changes: 42 additions & 0 deletions src/ast/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,8 @@ pub enum Expr {
Subquery(Box<Query>),
/// The `LISTAGG` function `SELECT LISTAGG(...) WITHIN GROUP (ORDER BY ...)`
ListAgg(ListAgg),
/// The `ARRAY_AGG` function `SELECT ARRAY_AGG(... ORDER BY ...)`
ArrayAgg(ArrayAgg),
/// The `GROUPING SETS` expr.
GroupingSets(Vec<Vec<Expr>>),
/// The `CUBE` expr.
Expand Down Expand Up @@ -542,6 +544,7 @@ impl fmt::Display for Expr {
Expr::Subquery(s) => write!(f, "({})", s),
Expr::ArraySubquery(s) => write!(f, "ARRAY({})", s),
Expr::ListAgg(listagg) => write!(f, "{}", listagg),
Expr::ArrayAgg(arrayagg) => write!(f, "{}", arrayagg),
Expr::GroupingSets(sets) => {
write!(f, "GROUPING SETS (")?;
let mut sep = "";
Expand Down Expand Up @@ -2448,6 +2451,45 @@ impl fmt::Display for ListAggOnOverflow {
}
}

/// An `ARRAY_AGG` invocation `ARRAY_AGG( [ DISTINCT ] <expr> [ORDER BY <expr>] [LIMIT <n>] )`
/// Or `ARRAY_AGG( [ DISTINCT ] <expr> ) [ WITHIN GROUP ( ORDER BY <expr> ) ]`
/// ORDER BY position is defined differently for BigQuery, Postgres and Snowflake.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ArrayAgg {
pub distinct: bool,
pub expr: Box<Expr>,
pub order_by: Option<Box<OrderByExpr>>,
pub limit: Option<Box<Expr>>,
pub within_group: bool, // order by is used inside a within group or not
}

impl fmt::Display for ArrayAgg {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"ARRAY_AGG({}{}",
if self.distinct { "DISTINCT " } else { "" },
self.expr
)?;
if !self.within_group {
if let Some(order_by) = &self.order_by {
write!(f, " ORDER BY {}", order_by)?;
}
if let Some(limit) = &self.limit {
write!(f, " LIMIT {}", limit)?;
}
}
write!(f, ")")?;
if self.within_group {
if let Some(order_by) = &self.order_by {
write!(f, " WITHIN GROUP (ORDER BY {})", order_by)?;
}
}
Ok(())
}
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum ObjectType {
Expand Down
6 changes: 6 additions & 0 deletions src/dialect/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,12 @@ pub trait Dialect: Debug + Any {
fn is_identifier_start(&self, ch: char) -> bool;
/// Determine if a character is a valid unquoted identifier character
fn is_identifier_part(&self, ch: char) -> bool;
/// Returns true if the dialect supports ARRAY_AGG() [WITHIN GROUP (ORDER BY)] expressions.
/// Otherwise, the dialect should expect an `ORDER BY` without the `WITHIN GROUP` clause, e.g. `ANSI` [(1)].
/// [(1)]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#array-aggregate-function
fn supports_within_after_array_aggregation(&self) -> bool {
false
}
}

impl dyn Dialect {
Expand Down
4 changes: 4 additions & 0 deletions src/dialect/snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,8 @@ impl Dialect for SnowflakeDialect {
|| ch == '$'
|| ch == '_'
}

fn supports_within_after_array_aggregation(&self) -> bool {
true
}
}
49 changes: 49 additions & 0 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -476,6 +476,7 @@ impl<'a> Parser<'a> {
self.expect_token(&Token::LParen)?;
self.parse_array_subquery()
}
Keyword::ARRAY_AGG => self.parse_array_agg_expr(),
Keyword::NOT => Ok(Expr::UnaryOp {
op: UnaryOperator::Not,
expr: Box::new(self.parse_subexpr(Self::UNARY_NOT_PREC)?),
Expand Down Expand Up @@ -1006,6 +1007,54 @@ impl<'a> Parser<'a> {
}))
}

pub fn parse_array_agg_expr(&mut self) -> Result<Expr, ParserError> {
self.expect_token(&Token::LParen)?;
let distinct = self.parse_keyword(Keyword::DISTINCT);
let expr = Box::new(self.parse_expr()?);
// ANSI SQL and BigQuery define ORDER BY inside function.
if !self.dialect.supports_within_after_array_aggregation() {
let order_by = if self.parse_keywords(&[Keyword::ORDER, Keyword::BY]) {
let order_by_expr = self.parse_order_by_expr()?;
Some(Box::new(order_by_expr))
} else {
None
};
let limit = if self.parse_keyword(Keyword::LIMIT) {
self.parse_limit()?.map(Box::new)
} else {
None
};
self.expect_token(&Token::RParen)?;
return Ok(Expr::ArrayAgg(ArrayAgg {
distinct,
expr,
order_by,
limit,
within_group: false,
}));
}
// Snowflake defines ORDERY BY in within group instead of inside the function like
// ANSI SQL.
self.expect_token(&Token::RParen)?;
let within_group = if self.parse_keywords(&[Keyword::WITHIN, Keyword::GROUP]) {
self.expect_token(&Token::LParen)?;
self.expect_keywords(&[Keyword::ORDER, Keyword::BY])?;
let order_by_expr = self.parse_order_by_expr()?;
self.expect_token(&Token::RParen)?;
Some(Box::new(order_by_expr))
} else {
None
};

Ok(Expr::ArrayAgg(ArrayAgg {
distinct,
expr,
order_by: within_group,
limit: None,
within_group: true,
}))
}

// This function parses date/time fields for both the EXTRACT function-like
// operator and interval qualifiers. EXTRACT supports a wider set of
// date/time fields than interval qualifiers, so this function may need to
Expand Down
25 changes: 23 additions & 2 deletions tests/sqlparser_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ mod test_utils;
use matches::assert_matches;
use sqlparser::ast::*;
use sqlparser::dialect::{
AnsiDialect, ClickHouseDialect, GenericDialect, MsSqlDialect, PostgreSqlDialect, SQLiteDialect,
SnowflakeDialect,
AnsiDialect, ClickHouseDialect, GenericDialect, HiveDialect, MsSqlDialect, PostgreSqlDialect,
SQLiteDialect, SnowflakeDialect,
};
use sqlparser::keywords::ALL_KEYWORDS;
use sqlparser::parser::{Parser, ParserError};
Expand Down Expand Up @@ -1644,6 +1644,27 @@ fn parse_listagg() {
);
}

#[test]
fn parse_array_agg_func() {
let supported_dialects = TestedDialects {
dialects: vec![
Box::new(GenericDialect {}),
Box::new(PostgreSqlDialect {}),
Box::new(MsSqlDialect {}),
Box::new(AnsiDialect {}),
Box::new(HiveDialect {}),
],
};

for sql in [
"SELECT ARRAY_AGG(x ORDER BY x) AS a FROM T",
"SELECT ARRAY_AGG(x ORDER BY x LIMIT 2) FROM tbl",
"SELECT ARRAY_AGG(DISTINCT x ORDER BY x LIMIT 2) FROM tbl",
] {
supported_dialects.verified_stmt(sql);
}
}

#[test]
fn parse_create_table() {
let sql = "CREATE TABLE uk_cities (\
Expand Down
19 changes: 19 additions & 0 deletions tests/sqlparser_snowflake.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,25 @@ fn test_single_table_in_parenthesis_with_alias() {
);
}

#[test]
fn test_array_agg_func() {
for sql in [
"SELECT ARRAY_AGG(x) WITHIN GROUP (ORDER BY x) AS a FROM T",
"SELECT ARRAY_AGG(DISTINCT x) WITHIN GROUP (ORDER BY x ASC) FROM tbl",
] {
snowflake().verified_stmt(sql);
}

let sql = "select array_agg(x order by x) as a from T";
let result = snowflake().parse_sql_statements(sql);
assert_eq!(
result,
Err(ParserError::ParserError(String::from(
"Expected ), found: order"
)))
)
}

fn snowflake() -> TestedDialects {
TestedDialects {
dialects: vec![Box::new(SnowflakeDialect {})],
Expand Down