主键模型的表使用date_trunc分区后,物化视图查询改写失败请问是什么原因?

如题,版本是3.1.3


测试1: 主键模型 , order_list 不使用分区 , 物化视图可以正常改写

CREATE TABLE goods(
    item_id1          INT,
    item_name         STRING,
    price             FLOAT
) 
PRIMARY KEY (item_id1)
DISTRIBUTED BY HASH(item_id1)
PROPERTIES (
"replication_num" = "1"
);

INSERT INTO goods
VALUES
    (1001,"apple",6.5),
    (1002,"pear",8.0),
    (1003,"potato",2.2);

CREATE TABLE order_list(
    order_id          INT,
    client_id         INT,
    item_id2          INT,
    order_date        DATE
)
PRIMARY KEY (order_id)
DISTRIBUTED BY HASH(order_id)
PROPERTIES (
"replication_num" = "1"
);

INSERT INTO order_list
VALUES
    (10001,101,1001,"2022-03-13"),
    (10001,101,1002,"2022-03-13"),
    (10002,103,1002,"2022-03-13"),
    (10002,103,1003,"2022-03-14"),
    (10003,102,1003,"2022-03-14"),
    (10003,102,1001,"2022-03-14");

CREATE MATERIALIZED VIEW order_mv
DISTRIBUTED BY HASH(`order_id`)
REFRESH ASYNC START('2022-09-01 10:00:00') EVERY (interval 1 SECOND)
AS SELECT
    order_list.order_id,
    sum(goods.price) as total
FROM order_list INNER JOIN goods ON goods.item_id1 = order_list.item_id2
GROUP BY order_id;

explain
SELECT
    a.order_id,
    sum(goods.price) as total
FROM order_list a INNER JOIN goods ON goods.item_id1 = a.item_id2
GROUP BY a.order_id;

测试1 执行 explain 结果

Explain String                    |
----------------------------------+
PLAN FRAGMENT 0                   |
 OUTPUT EXPRS:1: order_id | 8: sum|
  PARTITION: UNPARTITIONED        |
                                  |
  RESULT SINK                     |
                                  |
  2:EXCHANGE                      |
                                  |
PLAN FRAGMENT 1                   |
 OUTPUT EXPRS:                    |
  PARTITION: RANDOM               |
                                  |
  STREAM DATA SINK                |
    EXCHANGE ID: 02               |
    UNPARTITIONED                 |
                                  |
  1:Project                       |
  |  <slot 1> : 9: order_id       |
  |  <slot 8> : 10: total         |
  |                               |
  0:OlapScanNode                  |
     TABLE: order_mv              |
     PREAGGREGATION: ON           |
     partitions=1/1               |
     rollup: order_mv             |
     tabletRatio=2/2              |
     tabletList=150467,150469     |
     cardinality=3                |
     avgRowSize=12.0              |
     numNodes=0                   |
     MaterializedView: true       |

测试2: 主键模型 , order_list_2 使用 order_date 进行分区 , 物化视图改写失败

CREATE TABLE order_list_2(
    order_id          INT,
    order_date        DATE,
    client_id         INT,
    item_id2          INT
)
PRIMARY KEY (order_id,order_date)
PARTITION BY date_trunc('day',order_date)
DISTRIBUTED BY HASH(order_id)
PROPERTIES (
"replication_num" = "1"
);

INSERT INTO order_list_2
VALUES
(10001,"2022-03-13",101,1001),
(10001,"2022-03-13",101,1002),
(10002,"2022-03-13",103,1002),
(10002,"2022-03-14",103,1003),
(10003,"2022-03-14",102,1003),
(10003,"2022-03-14",102,1001);

CREATE MATERIALIZED VIEW order_mv_2
DISTRIBUTED BY HASH(`order_id`)
REFRESH ASYNC START('2022-09-01 10:00:00') EVERY (interval 1 SECOND)
AS SELECT
    a.order_id,
    sum(goods.price) as total
FROM order_list_2 a INNER JOIN goods ON goods.item_id1 = a.item_id2
GROUP BY order_id;

explain
SELECT
    a.order_id,
    sum(goods.price) as total
FROM order_list_2 a INNER JOIN goods ON goods.item_id1 = a.item_id2
GROUP BY order_id;

测试2 执行 explain 结果

Explain String                                     |
---------------------------------------------------+
PLAN FRAGMENT 0                                    |
 OUTPUT EXPRS:1: order_id | 8: sum                 |
  PARTITION: UNPARTITIONED                         |
                                                   |
  RESULT SINK                                      |
                                                   |
  7:EXCHANGE                                       |
                                                   |
PLAN FRAGMENT 1                                    |
 OUTPUT EXPRS:                                     |
  PARTITION: HASH_PARTITIONED: 1: order_id         |
                                                   |
  STREAM DATA SINK                                 |
    EXCHANGE ID: 07                                |
    UNPARTITIONED                                  |
                                                   |
  6:AGGREGATE (update finalize)                    |
  |  output: sum(7: price)                         |
  |  group by: 1: order_id                         |
  |                                                |
  5:EXCHANGE                                       |
                                                   |
PLAN FRAGMENT 2                                    |
 OUTPUT EXPRS:                                     |
  PARTITION: RANDOM                                |
                                                   |
  STREAM DATA SINK                                 |
    EXCHANGE ID: 05                                |
    HASH_PARTITIONED: 1: order_id                  |
                                                   |
  4:Project                                        |
  |  <slot 1> : 1: order_id                        |
  |  <slot 7> : 7: price                           |
  |                                                |
  3:HASH JOIN                                      |
  |  join op: INNER JOIN (BUCKET_SHUFFLE)          |
  |  colocate: false, reason:                      |
  |  equal join conjunct: 5: item_id1 = 4: item_id2|
  |                                                |
  |----2:EXCHANGE                                  |
  |                                                |
  0:OlapScanNode                                   |
     TABLE: goods                                  |
     PREAGGREGATION: ON                            |
     partitions=1/1                                |
     rollup: goods                                 |
     tabletRatio=2/2                               |
     tabletList=150336,150338                      |
     cardinality=3                                 |
     avgRowSize=12.0                               |
     numNodes=0                                    |
                                                   |
PLAN FRAGMENT 3                                    |
 OUTPUT EXPRS:                                     |
  PARTITION: RANDOM                                |
                                                   |
  STREAM DATA SINK                                 |
    EXCHANGE ID: 02                                |
    BUCKET_SHUFFLE_HASH_PARTITIONED: 4: item_id2   |
                                                   |
  1:OlapScanNode                                   |
     TABLE: order_list_2                           |
     PREAGGREGATION: ON                            |
     PREDICATES: 4: item_id2 IS NOT NULL           |
     partitions=2/3                                |
     rollup: order_list_2                          |
     tabletRatio=4/4                               |
     tabletList=151314,151316,151309,151311        |
     cardinality=4                                 |
     avgRowSize=8.0                                |
     numNodes=0                                    |

我也有同样的问题…

发愁 , 你后续有找到原因吗

这个是因为当有分区表达式处理的不是很鲁棒。最新的3.1.8(应该很快release)已经解决了该问题。