internal_service.cpp:205 tablet writer add chunk failed, message=Update memory limit exceed tablet:53099

【详述】问题描述:
datax导入数据,目标端为主键模型的表,结构如下,导入了会儿报
delta_writer.cpp:80] Update memory limit exceed tablet:53099 8130750385 > 8117488189 top_tablets:53111(1851M)53107(1851M)53103(1811M)53099(1810M)55179(28M)
W0214 17:22:22.683980 29039 internal_service.cpp:205] tablet writer add chunk failed, message=Update memory limit exceed tablet:53099 8130750385 > 8117488189 top_tablets:53111(1851M)53107(1851M)53103(1811M)53099(1810M)55179(28M), id=1a29fb2c-5043-4c5f-987e-afa34bf14154, index_id=53098, sender_id=0
I0214 17:22:22.789904 29028 load_channel_mgr.cpp:301] Cancelled load channel load id=1a29fb2c50434c5f-987eafa34bf14154

表结构如下:
CREATE TABLE u_detail (
detail_id bigint(20) NOT NULL ,
user_id varchar(32) NOT NULL ,
service_range varchar(1024) NOT NULL ,
city varchar(255) NOT NULL ,
relname char(32) NOT NULL ,
tags varchar(64) NOT NULL ,
birthday date NOT NULL DEFAULT “1980-01-01” ,
hometown varchar(128) NOT NULL ,
livetown varchar(128) NOT NULL ,
signature varchar(255) NOT NULL ,
tel char(16) NOT NULL ,
qq char(10) NOT NULL ,
weixin varchar(64) NOT NULL ,
education varchar(64) NOT NULL ,
job int(11) NOT NULL ,
corparation varchar(255) NOT NULL ,
job_position int(11) NOT NULL ,
income int(11) NOT NULL ,
certificate_type int(11) NOT NULL ,
certificate_number varchar(32) NOT NULL ,
post_address varchar(128) NOT NULL ,
zipcode varchar(16) NOT NULL ,
homepage varchar(64) NOT NULL ,
about_me varchar(128) NOT NULL ,
hobby varchar(128) NOT NULL ,
sightml varchar(256) NOT NULL ,
lng decimal64(9, 6) NOT NULL ,
lat decimal64(9, 6) NOT NULL ,
fax varchar(16) NOT NULL ,
admin_city varchar(64) NOT NULL ,
cart_cookie varchar(2000) NOT NULL DEFAULT “[]” ,
privacy_setting tinyint(4) NOT NULL DEFAULT “0” ,
app_from varchar(255) NOT NULL DEFAULT “” ,
tag_switch varchar(255) NOT NULL DEFAULT ‘’
) ENGINE=OLAP
PRIMARY KEY(u_detail_id)
COMMENT “xxxx”
DISTRIBUTED BY HASH(u_detail_id) BUCKETS 15
PROPERTIES (
“replication_num” = “3”,
“in_memory” = “false”,
“storage_format” = “DEFAULT”
);

【StarRocks版本】例如:2.0.0GA
【集群规模】例如:3fe(1 follower+2observer)+5be(fe与be混部)
【机器信息】CPU虚拟核/内存/网卡,例如:8C/16G/万兆
相关设置信息:

  • 并行度:4

  • cbo是否开启:show variables like ‘%cbo%’;
    ±------------------------------------±------+
    | Variable_name | Value |
    ±------------------------------------±------+
    | cbo_enable_dp_join_reorder | true |
    | cbo_enable_greedy_join_reorder | true |
    | cbo_enable_low_cardinality_optimize | true |
    | cbo_enable_replicated_join | true |
    | cbo_max_reorder_node_use_dp | 10 |
    | cbo_max_reorder_node_use_exhaustive | 4 |
    | cbo_use_correlated_join_estimate | true |
    ±------------------------------------±------+

  • be节点cpu和内存使用率截图

be节点相关参数配置如下

alter_tablet_worker_count=3
base_compaction_check_interval_seconds=60
base_compaction_interval_seconds_since_last_operation=86400
base_compaction_num_cumulative_deltas=1
base_compaction_num_threads_per_disk=8
base_compaction_trace_threshold=120
base_compaction_write_mbytes_per_sec=1800
base_cumulative_delta_ratio=0.3
be_port=9060
be_service_threads=64
bitmap_filter_enable_not_equal=0
bitmap_max_filter_items=30
bitmap_max_filter_ratio=1
bitmap_serialize_version=1
broker_write_timeout_seconds=30
brpc_max_body_size=2147483648
brpc_num_threads=-1
brpc_port=8060
brpc_socket_max_unwritten_bytes=1073741824
buffer_stream_reserve_size=8192000
check_consistency_worker_count=1
chunk_reserved_bytes_limit=2147483648
clear_transaction_task_worker_count=1
clone_worker_count=3
cluster_id=-1
column_dictionary_key_ratio_threshold=0
column_dictionary_key_size_threshold=0
compaction_max_memory_limit=-1
compaction_max_memory_limit_percent=100
compaction_memory_limit_per_worker=2147483648
compress_rowbatches=1
consistency_max_memory_limit=10G
consistency_max_memory_limit_percent=20
create_tablet_worker_count=3
cumulative_compaction_budgeted_bytes=104857600
cumulative_compaction_check_interval_seconds=1
cumulative_compaction_num_threads_per_disk=8
cumulative_compaction_skip_window_seconds=30
cumulative_compaction_trace_threshold=60
default_num_rows_per_column_file_block=1024
default_query_options=
delete_worker_count=3
dictionary_encoding_ratio=0.7
dictionary_speculate_min_chunk_size=10000
disable_column_pool=0
disable_mem_pools=0
disable_storage_page_cache=1
disk_stat_monitor_interval=5
doris_max_pushdown_conjuncts_return_rate=90
doris_max_scan_key_num=1024
doris_scan_range_row_count=524288
doris_scanner_queue_size=1024
doris_scanner_row_num=16384
doris_scanner_thread_pool_queue_size=102400
doris_scanner_thread_pool_thread_num=48
download_low_speed_limit_kbps=50
download_low_speed_time=300
download_worker_count=1
drop_tablet_worker_count=3
enable_bitmap_union_disk_format_with_set=0
enable_metric_calculator=1
enable_partitioned_aggregation=1
enable_prefetch=1
enable_quadratic_probing=0
enable_schema_change_vectorized=1
enable_system_metrics=1
enable_token_check=1
es_http_timeout_ms=5000
es_scroll_keepalive=5m
etl_thread_pool_queue_size=256
etl_thread_pool_size=8
exchg_node_buffer_size_bytes=10485760
file_descriptor_cache_capacity=16384
file_descriptor_cache_clean_interval=3600
flush_thread_num_per_store=2
fragment_pool_queue_size=2048
fragment_pool_thread_num_max=4096
fragment_pool_thread_num_min=64
heartbeat_service_port=9050
heartbeat_service_thread_count=1
ignore_broken_disk=0
ignore_load_tablet_failure=0
ignore_rowset_stale_unconsistent_delete=0
inc_rowset_expired_sec=1800
index_stream_cache_capacity=10737418240
late_materialization_ratio=10
load_data_reserve_hours=4
load_error_log_reserve_hours=48
load_process_max_memory_limit_bytes=107374182400
load_process_max_memory_limit_percent=90
local_library_dir=/data/startrocks/be/lib/udf-runtime
log_buffer_level=
madvise_huge_pages=0
make_snapshot_worker_count=5
max_client_cache_size_per_host=10
max_compaction_concurrency=-1
max_consumer_num_per_group=3
max_cumulative_compaction_num_singleton_deltas=1000
max_download_speed_kbps=50000
max_free_io_buffers=128
max_garbage_sweep_interval=3600
max_hdfs_file_handle=1000
max_memory_sink_batch_count=20
max_percentage_of_error_disk=0
max_pushdown_conditions_per_column=1024
max_row_source_mask_memory_bytes=209715200
max_runnings_transactions_per_txn_map=100
max_tablet_num_per_shard=1024
max_transmit_batched_bytes=65536
mem_limit=14G
memory_limitation_per_thread_for_schema_change=2
memory_maintenance_sleep_time_s=10
memory_max_alignment=16
metric_late_materialization_ratio=1000
min_buffer_size=1024
min_compaction_failure_interval_sec=120
min_cumulative_compaction_num_singleton_deltas=5
min_file_descriptor_number=60000
min_garbage_sweep_interval=180
mmap_buffers=0
null_encoding=0
num_cores=0
num_disks=0
num_threads_per_core=3
num_threads_per_disk=0
number_tablet_writer_threads=16
olap_table_sink_send_interval_ms=10
path_gc_check=1
path_gc_check_interval_second=86400
path_gc_check_step=1000
path_gc_check_step_interval_ms=10
path_scan_interval_second=86400
pending_data_expire_time_sec=1800
periodic_counter_update_period_ms=500
pipeline_exec_thread_pool_thread_num=3
pipeline_io_buffer_size=64
pipeline_io_thread_pool_queue_size=102400
pipeline_io_thread_pool_thread_num=3
pipeline_yield_max_chunks_moved=100
pipeline_yield_max_time_spent=100000000
plugin_path=/data/startrocks/be/plugin
port=20001
pprof_profile_dir=/data/startrocks/be/log
pre_aggregate_factor=80
priority_networks=
priority_queue_remaining_tasks_increased_frequency=512
publish_version_worker_count=8
pull_load_task_dir=/data/startrocks/be/var/pull_load
push_worker_count_high_priority=3
push_worker_count_normal_priority=3
push_write_mbytes_per_sec=10
query_scratch_dirs=/data/startrocks/be
read_size=8388608
release_snapshot_worker_count=5
report_disk_state_interval_seconds=60
report_tablet_interval_seconds=60
report_task_interval_seconds=10
result_buffer_cancelled_interval_time=300
routine_load_thread_pool_size=10
row_nums_check=1
rpc_compress_ratio_threshold=1.1
scan_context_gc_interval_min=5
scratch_dirs=/tmp
serialize_batch=0
sleep_five_seconds=5
sleep_one_second=1
small_file_dir=/data/startrocks/be/lib/small_file/
snapshot_expire_time_sec=172800
sorter_block_size=8388608
status_report_interval=5
storage_flood_stage_left_capacity_bytes=1073741824
storage_flood_stage_usage_percent=95
storage_format_version=2
storage_medium_migrate_count=1
storage_page_cache_limit=0
storage_root_path=/data/startrocks/be/storage
streaming_load_max_batch_size_mb=100
streaming_load_max_mb=10240
streaming_load_rpc_max_alive_time_sec=1200
sync_tablet_meta=0
sys_log_dir=/data/startrocks/be/log
sys_log_level=INFO
sys_log_roll_mode=SIZE-MB-1024
sys_log_roll_num=10
sys_log_verbose_level=10
sys_log_verbose_modules=
sys_minidump_dir=/data/startrocks/be
sys_minidump_enable=0
sys_minidump_interval=600
sys_minidump_limit=20480
sys_minidump_max_files=16
tablet_map_shard_size=32
tablet_max_versions=10000
tablet_meta_checkpoint_min_interval_secs=600
tablet_meta_checkpoint_min_new_rowsets_num=10
tablet_rowset_stale_sweep_time_sec=1800
tablet_stat_cache_update_interval_second=300
tablet_writer_open_rpc_timeout_sec=60
tc_free_memory_rate=20
tc_gc_period=60
tc_max_total_thread_cache_bytes=1073741824
tc_use_memory_min=10737418240
thrift_client_retry_interval_ms=100
thrift_connect_timeout_seconds=3
thrift_rpc_timeout_ms=5000
trash_file_expire_time_sec=259200
txn_commit_rpc_timeout_ms=10000
txn_map_shard_size=128
txn_shard_size=1024
unused_rowset_monitor_interval=30
update_cache_expire_sec=360
update_compaction_check_interval_seconds=60
update_compaction_num_threads_per_disk=1
update_compaction_per_tablet_min_interval_seconds=120
update_compaction_trace_threshold=20
upload_worker_count=1
use_mmap_allocate_chunk=0
user_function_dir=/data/startrocks/be/lib/udf
vector_chunk_size=4096
web_log_bytes=1048576
webserver_num_workers=48
webserver_port=8040
write_buffer_size=104857600

memory traker


我也遇到这个报错了,怎么解决的

show variables like “%exec_mem_limit %”; 查看下这个参数设置的多大。这个报错是主键模型在导入数据时将索引加载到内存中,占用内存超限了