mismatched row count

【详述】在SQL查询中出现mismatched row count 错误,去掉关联仅查询union all是没问题的,但是去掉union 查询关联是报错的;另有一个相似的查询是没有union,查询是没问题的。
【背景】查询是2个聚合union all在一起作为主表,去关联2个子查询,子查询结果concat转为数组存储,详细SQL如下

SELECT
m.tenant_id AS 租户id
, m.market_id AS 站点id
, m.msku
, array_concat(cast(coalesce(pera.uv, ‘0’) AS array < INT>),
cast(coalesce(per1.uv, ‘0’) AS array < INT>), [9999]) AS user_array
FROM (
SELECT
tenant_id
, msku AS msku
, market_id
, sum(returns) AS returns
FROM dws.dws_sales_operations_df
GROUP BY
tenant_id
, msku
, market_id
UNION ALL
SELECT
tenant_id
, msku AS msku
, market_id
, sum(orders) AS returns
FROM dws.dws_sales_return_statistics_df
GROUP BY
tenant_id
, msku
, market_id
) m
LEFT JOIN (
SELECT
tenant_id
, group_concat(cast(user_id AS Varchar)) AS uv
FROM dim.dim_common_data_permission
WHERE all_flag = 1
GROUP BY tenant_id
) pera
ON pera.tenant_id = m.tenant_id
LEFT JOIN (
SELECT
tenant_id
, msku
, market_id
, group_concat(cast(user_id AS Varchar)) AS uv
FROM dim.dim_common_data_permission
WHERE all_flag = 0
GROUP BY
msku
, market_id
, tenant_id
) per1
ON per1.msku = m.msku AND per1.market_id = m.market_id AND per1.tenant_id = m.tenant_id
;

【StarRocks版本】2.4

我测试了下,好像跟cast varchar as array 有关,语句中改为 concat_ws(’,’, coalesce(pera.uv, ‘0’), coalesce(per1.uv, ‘0’), ‘9999’) 就不会报错,加上cast就是这个错误

那应该是有问题的,把涉及的表结构发我下,我们复现 下

也帮发个执行计划 explain verbose 这个SQL

具体的版本号是什么?2.4.0?

PLAN FRAGMENT 0
OUTPUT EXPRS:116: tenant_id | 118: market_id | 117: msku | 136: array_concat
PARTITION: UNPARTITIONED

RESULT SINK

22:EXCHANGE
limit: 200

PLAN FRAGMENT 1
OUTPUT EXPRS:
PARTITION: RANDOM

STREAM DATA SINK
EXCHANGE ID: 22
UNPARTITIONED

21:Project
| <slot 116> : 116: tenant_id
| <slot 117> : 117: msku
| <slot 118> : 118: market_id
| <slot 136> : array_concat(CAST(coalesce(127: group_concat, ‘0’) AS ARRAY), CAST(coalesce(135: group_concat, ‘0’) AS ARRAY), CAST(ARRAY<smallint(6)>[9999] AS ARRAY))
| limit: 200
|
20:HASH JOIN
| join op: RIGHT OUTER JOIN (BUCKET_SHUFFLE)
| colocate: false, reason:
| equal join conjunct: 132: msku = 117: msku
| equal join conjunct: 131: market_id = 118: market_id
| equal join conjunct: 128: tenant_id = 116: tenant_id
| limit: 200
|
|----19:EXCHANGE
| limit: 200
|
2:AGGREGATE (update finalize)
| output: group_concat(134: cast)
| group by: 132: msku, 131: market_id, 128: tenant_id
|
1:Project
| <slot 128> : 128: tenant_id
| <slot 131> : 131: market_id
| <slot 132> : 132: msku
| <slot 134> : CAST(129: user_id AS VARCHAR)
|
0:OlapScanNode
TABLE: dim_common_data_permission
PREAGGREGATION: ON
PREDICATES: 133: all_flag = 0
partitions=1/1
rollup: dim_common_data_permission
tabletRatio=10/10
tabletList=3515060,3515064,3515068,3515072,3515076,3515080,3515084,3515088,3515092,3515096
cardinality=76235
avgRowSize=46.52272
numNodes=0

PLAN FRAGMENT 2
OUTPUT EXPRS:
PARTITION: RANDOM

STREAM DATA SINK
EXCHANGE ID: 19
BUCKET_SHUFFLE_HASH_PARTITIONED: 116: tenant_id

18:Project
| <slot 116> : 116: tenant_id
| <slot 117> : 117: msku
| <slot 118> : 118: market_id
| <slot 127> : 127: group_concat
| limit: 200
|
17:HASH JOIN
| join op: LEFT OUTER JOIN (BROADCAST)
| colocate: false, reason:
| equal join conjunct: 116: tenant_id = 120: tenant_id
| limit: 200
|
|----16:EXCHANGE
|
3:UNION
| limit: 200
|
|----12:EXCHANGE
| limit: 200
|
8:EXCHANGE
limit: 200

PLAN FRAGMENT 3
OUTPUT EXPRS:
PARTITION: RANDOM

STREAM DATA SINK
EXCHANGE ID: 16
UNPARTITIONED

15:AGGREGATE (update finalize)
| output: group_concat(126: cast)
| group by: 120: tenant_id
|
14:Project
| <slot 120> : 120: tenant_id
| <slot 126> : CAST(121: user_id AS VARCHAR)
|
13:OlapScanNode
TABLE: dim_common_data_permission
PREAGGREGATION: ON
PREDICATES: 125: all_flag = 1
partitions=1/1
rollup: dim_common_data_permission
tabletRatio=10/10
tabletList=3515060,3515064,3515068,3515072,3515076,3515080,3515084,3515088,3515092,3515096
cardinality=76235
avgRowSize=21.0
numNodes=0

PLAN FRAGMENT 4
OUTPUT EXPRS:
PARTITION: RANDOM

STREAM DATA SINK
EXCHANGE ID: 12
RANDOM

11:Project
| <slot 104> : 104: tenant_id
| <slot 105> : 105: market_id
| <slot 115> : CAST(106: msku AS VARCHAR(65533))
| limit: 200
|
10:AGGREGATE (update finalize)
| group by: 104: tenant_id, 106: msku, 105: market_id
| limit: 200
|
9:OlapScanNode
TABLE: dws_sales_return_statistics_df
PREAGGREGATION: ON
partitions=1/1
rollup: dws_sales_return_statistics_df
tabletRatio=10/10
tabletList=3492739,3492743,3492747,3492751,3492755,3492759,3492763,3492767,3492771,3492775
cardinality=1653040
avgRowSize=23.982697
numNodes=0

PLAN FRAGMENT 5
OUTPUT EXPRS:
PARTITION: HASH_PARTITIONED: 2: tenant_id, 6: msku, 4: market_id

STREAM DATA SINK
EXCHANGE ID: 08
RANDOM

7:AGGREGATE (merge finalize)
| group by: 2: tenant_id, 6: msku, 4: market_id
| limit: 200
|
6:EXCHANGE

PLAN FRAGMENT 6
OUTPUT EXPRS:
PARTITION: RANDOM

STREAM DATA SINK
EXCHANGE ID: 06
HASH_PARTITIONED: 2: tenant_id, 6: msku, 4: market_id

5:AGGREGATE (update serialize)
| STREAMING
| group by: 2: tenant_id, 6: msku, 4: market_id
|
4:OlapScanNode
TABLE: dws_sales_operations_df
PREAGGREGATION: ON
partitions=1/1
rollup: dws_sales_operations_df
tabletRatio=10/10
tabletList=3491934,3491938,3491942,3491946,3491950,3491954,3491958,3491962,3491966,3491970
cardinality=12353024
avgRowSize=27.185394
numNodes=0

涉及3个表

CREATE TABLE dim_common_data_permission
(
tenant_id Int NOT NULL,
user_id Bigint NOT NULL,
user_name Varchar(256) NOT NULL,
market_id Int NOT NULL,
msku Varchar(1048576) NOT NULL,
all_flag Tinyint NOT NULL
)
COMMENT ‘OLAP’ ENGINE = StarRocks;

CREATE TABLE dws_sales_return_statistics_df
(
etl_time Datetime NOT NULL,
tenant_id Int NOT NULL,
market_id Int NOT NULL,
msku Varchar(400) NOT NULL,
asin Varchar(80) NOT NULL,
statistics_date Datetime NOT NULL,
return_reson Varchar(200) NOT NULL,
orders Bigint NOT NULL,
disposition Varchar(200) NOT NULL,
license_plate_number Varchar(2000) NOT NULL,
returns_sellable Bigint NOT NULL COMMENT ‘可用退货量’
)
COMMENT ‘OLAP’ ENGINE = StarRocks;

– auto-generated definition
CREATE TABLE dws_sales_operations_df
(
etl_time Datetime NOT NULL,
tenant_id Int NOT NULL,
statistics_date Datetime NOT NULL,
market_id Int NOT NULL,
id Bigint NOT NULL,
msku Varchar(65533) NOT NULL,
spu Varchar(65533) NOT NULL,
asin Varchar(65533) NOT NULL,
market_id_msku Varchar(65533) NOT NULL,
server_id Int NOT NULL,
warehouse_id Int NOT NULL,
country Int NOT NULL,
currency Varchar(65533) NOT NULL,
product_name Varchar(65533) NOT NULL,
brand Varchar(65533) NOT NULL,
category Varchar(65533) NOT NULL,
sku Varchar(65533) NOT NULL,
variation_asin Varchar(65533) NOT NULL,
listing_state Varchar(65533) NOT NULL,
listing_level Varchar(65533) NOT NULL,
crawl_title Varchar(65533) NOT NULL,
first_sales_date Datetime NOT NULL,
add_date Datetime NOT NULL,
update_date Datetime NOT NULL,
create_time Datetime NOT NULL,
order_product_sales Decimal(20, 8) NOT NULL,
order_product_sales_b2b Decimal(20, 8) NOT NULL,
orders Int NOT NULL,
order_items Int NOT NULL,
order_items_b2b Int NOT NULL,
units_ordered Int NOT NULL,
units_ordered_b2b Int NOT NULL,
units_ordered_traffic Int NOT NULL,
patch_units_ordered Int NOT NULL,
multi_channel_orders Int NOT NULL,
refunds Int NOT NULL,
returns Int NOT NULL,
returns_sellable Int NOT NULL,
returns_purchase_cost Decimal(20, 8) NOT NULL,
returns_arrive_cost Decimal(20, 8) NOT NULL,
refund_cost Decimal(20, 8) NOT NULL,
return_product_sales Decimal(20, 8) NOT NULL,
refund_discount_cost Decimal(20, 8) NOT NULL,
refund_tax_cost Decimal(20, 8) NOT NULL,
order_ids Varchar(65533) NOT NULL,
ads_impressions Int NOT NULL,
ads_clicks Int NOT NULL,
ads_orders Int NOT NULL,
ads_item_orders Int NOT NULL,
ads_sales Decimal(20, 8) NOT NULL,
ads_item_sales Decimal(20, 8) NOT NULL,
ads_spend Decimal(20, 8) NOT NULL,
sessions Int NOT NULL,
page_views Int NOT NULL,
buy_box_percentage Decimal(12, 2) NOT NULL,
commission_cost Decimal(20, 8) NOT NULL,
shipping_cost Decimal(20, 8) NOT NULL,
fba_shipping_cost Decimal(20, 8) NOT NULL,
shipping_charge Decimal(20, 8) NOT NULL,
discount_cost Decimal(20, 8) NOT NULL,
amazon_tax Decimal(20, 8) NOT NULL,
others_cost Decimal(20, 8) NOT NULL,
purchase_cost Decimal(20, 8) NOT NULL,
arrive_cost Decimal(20, 8) NOT NULL,
product_shipping_cost Decimal(20, 8) NOT NULL,
vat_cost Decimal(20, 8) NOT NULL,
sales_gross_profit Decimal(20, 8) NOT NULL,
sales_net_profit Decimal(20, 8) NOT NULL,
patch_amazon_cost Decimal(20, 8) NOT NULL,
patch_purchase_cost Decimal(20, 8) NOT NULL,
patch_arrive_cost Decimal(20, 8) NOT NULL,
patch_cost Decimal(20, 8) NOT NULL,
multichannel_amazon_cost Decimal(20, 8) NOT NULL,
multichannel_purchase_cost Decimal(20, 8) NOT NULL,
multichannel_arrive_cost Decimal(20, 8) NOT NULL,
multichannel_cost Decimal(20, 8) NOT NULL,
bogus_orders Int NOT NULL,
bogus_quantity Int NOT NULL,
bogus_sales_cost Decimal(20, 8) NOT NULL,
bogus_others_cost Decimal(20, 8) NOT NULL,
bogus_discount_cost Decimal(20, 8) NOT NULL,
bogus_purchase_cost Decimal(20, 8) NOT NULL,
bogus_arrive_cost Decimal(20, 8) NOT NULL,
bogus_vat_cost Decimal(20, 8) NOT NULL,
bogus_principal_price Decimal(20, 8) NOT NULL,
bogus_price Decimal(20, 8) NOT NULL,
storage_fee Decimal(20, 8) NOT NULL,
platform_fee Decimal(20, 8) NOT NULL,
total_revenue Decimal(20, 8) NOT NULL,
total_spend Decimal(20, 8) NOT NULL,
product_manager_account Varchar(65533) NOT NULL,
product_manager_account_id Int NOT NULL,
purchase_account Varchar(65533) NOT NULL,
purchase_account_id Int NOT NULL,
selling_manager_name Varchar(65533) NOT NULL,
selling_manager_id Int NOT NULL,
sessions_asin Int NOT NULL,
page_views_asin Int NOT NULL,
buy_box_percentage_asin Decimal(12, 2) NOT NULL,
origin Tinyint NOT NULL,
cost_origin Varchar(65533) NOT NULL
)
COMMENT ‘OLAP’ ENGINE = StarRocks;

具体的版本号2.4.1

用使用到视图(view)吗?

没有使用视图,是用到了union

我们先本地复现下,方便加个联系方式吗,要是我们本地复现不来话,再联系你

可以 微信号 ifansay