高并发单任务执行耗时大大升高

【详述】单任务执行几十ms,并发50后3-4百ms
【StarRocks版本】例如:2.5.14
【集群规模】例如:3fe(1 follower+2observer)+6be(独立节点)
【机器信息】CPU虚拟核/内存/网卡,例如:32C/256G/万兆
profile.txt (81.0 KB)
看了下主要耗时在PendingTime,有啥参数也减少PendingTime?

这个profile整体才166ms 麻烦重新生成下 发下并发高时的 慢查询profile 是2.5.14版本对吧 麻烦再确认下pipelin_dop配置的是多大

是的,2.5.14,pipelin_dop配置的是1,以下是集群参数,看着profile大部分时间都在PendingTime 有点不理解这块在做什么处理,是因为并发高在调度等待,还是等待数据
±---------------------------------------------±--------------------------+
| Variable_name | Value |
±---------------------------------------------±--------------------------+
| SQL_AUTO_IS_NULL | false |
| auto_increment_increment | 1 |
| autocommit | true |
| big_query_profile_second_threshold | 0 |
| broadcast_row_limit | 15000000 |
| cbo_cte_reuse | true |
| cbo_enable_low_cardinality_optimize | true |
| cbo_max_reorder_node_use_dp | 10 |
| cbo_max_reorder_node_use_exhaustive | 4 |
| character_set_client | utf8 |
| character_set_connection | utf8 |
| character_set_database | utf8 |
| character_set_results | utf8 |
| character_set_server | utf8 |
| collation_connection | utf8_general_ci |
| collation_database | utf8_general_ci |
| collation_server | utf8_general_ci |
| connector_io_tasks_per_scan_operator | 16 |
| connector_scan_use_query_mem_ratio | 0.3 |
| count_distinct_column_buckets | 1024 |
| default_rowset_type | alpha |
| disable_colocate_join | false |
| disable_join_reorder | false |
| disable_streaming_preaggregations | false |
| div_precision_increment | 4 |
| enable_adaptive_sink_dop | true |
| enable_collect_table_level_scan_stats | true |
| enable_connector_adaptive_io_tasks | true |
| enable_deliver_batch_fragments | true |
| enable_distinct_column_bucketization | false |
| enable_filter_unused_columns_in_scan_stage | true |
| enable_global_runtime_filter | true |
| enable_groupby_use_output_alias | false |
| enable_hive_column_stats | true |
| enable_insert_strict | true |
| enable_local_shuffle_agg | true |
| enable_materialized_view_rewrite | true |
| enable_materialized_view_union_rewrite | true |
| enable_materialized_view_view_delta_rewrite | true |
| enable_multicolumn_global_runtime_filter | false |
| enable_mv_planner | false |
| enable_pipeline_engine | true |
| enable_pipeline_query_statistic | true |
| enable_populate_block_cache | true |
| enable_predicate_reorder | false |
| enable_profile | false |
| enable_prune_complex_types | true |
| enable_query_cache | false |
| enable_query_dump | false |
| enable_query_queue_load | false |
| enable_query_queue_select | false |
| enable_query_queue_statistic | false |
| enable_query_tablet_affinity | false |
| enable_realtime_mv | false |
| enable_resource_group | true |
| enable_rewrite_groupingsets_to_union_all | false |
| enable_rule_based_materialized_view_rewrite | true |
| enable_scan_block_cache | false |
| enable_shared_scan | false |
| enable_sort_aggregate | false |
| enable_tablet_internal_parallel | true |
| event_scheduler | OFF |
| exec_mem_limit | 2147483648 |
| force_schedule_local | false |
| forward_to_leader | false |
| full_sort_late_materialization | false |
| group_concat_max_len | 65535 |
| hash_join_push_down_right_table | true |
| hive_partition_stats_sample_size | 3000 |
| hudi_mor_force_jni_reader | false |
| init_connect | |
| innodb_read_only | true |
| interactive_timeout | 3600 |
| io_tasks_per_scan_operator | 4 |
| join_implementation_mode_v2 | auto |
| language | /starrocks/share/english/ |
| license | Elastic License 2.0 |
| load_mem_limit | 0 |
| load_transmission_compression_type | NO_COMPRESSION |
| lower_case_table_names | 0 |
| materialized_view_rewrite_mode | DEFAULT |
| max_allowed_packet | 33554432 |
| max_parallel_scan_instance_num | -1 |
| max_pushdown_conditions_per_column | -1 |
| max_scan_key_num | -1 |
| nested_mv_rewrite_max_level | 3 |
| net_buffer_length | 16384 |
| net_read_timeout | 60 |
| net_write_timeout | 60 |
| new_planner_agg_stage | 0 |
| new_planner_optimize_timeout | 3000 |
| parallel_exchange_instance_num | -1 |
| parallel_fragment_exec_instance_num | 2 |
| parse_tokens_limit | 3500000 |
| performance_schema | false |
| pipeline_dop | 1 |
| pipeline_profile_level | 1 |
| prefer_compute_node | false |
| query_cache_agg_cardinality_limit | 5000000 |
| query_cache_entry_max_bytes | 4194304 |
| query_cache_entry_max_rows | 409600 |
| query_cache_force_populate | false |
| query_cache_hot_partition_num | 3 |
| query_cache_size | 1048576 |
| query_cache_type | 0 |
| query_delivery_timeout | 300 |
| query_mem_limit | 0 |
| query_queue_concurrency_limit | 0 |
| query_queue_cpu_used_permille_limit | 0 |
| query_queue_fresh_resource_usage_interval_ms | 5000 |
| query_queue_max_queued_queries | 1024 |
| query_queue_mem_used_pct_limit | 0.0 |
| query_queue_pending_timeout_second | 300 |
| query_timeout | 300 |
| resource_group | |
| runtime_filter_on_exchange_node | false |
| runtime_join_filter_push_down_limit | 1024000 |
| scan_use_query_mem_ratio | 0.3 |
| sql_mode | ONLY_FULL_GROUP_BY |
| sql_quote_show_create | true |
| sql_safe_updates | 0 |
| sql_select_limit | 9223372036854775807 |
| statistic_collect_parallel | 1 |
| storage_engine | olap |
| streaming_preaggregation_mode | auto |
| system_time_zone | Asia/Shanghai |
| time_zone | Asia/Shanghai |
| transaction_isolation | REPEATABLE-READ |
| transmission_compression_type | NO_COMPRESSION |
| transmission_encode_level | 7 |
| tx_isolation | REPEATABLE-READ |
| tx_visible_wait_timeout | 10 |
| use_compute_nodes | -1 |
| use_page_cache | true |
| version | 5.1.0 |
| version_comment | StarRocks version 2.5.14 |
| wait_timeout | 28800 |
±---------------------------------------------±--------------------------+

看profile 怀疑可能是在高并发场景下,由于pipelineDriver的数量比较大,即便pipelineDriver已经处于可调度状态,也会由于排队无法得到及时执行,这部分等待的时间无法体现在任何算子的执行时间中,但是会体现在 PendingTime 上 高并发时的qps 跟低峰期比上涨了多少 ?

这个问题解决了吗