主机 | 系统 | CPU | 内存 | 硬盘 |
---|---|---|---|---|
duckdb-test | AlmaLinux 9.4 PG: 16 DuckDB: 1.0.0 | 8 | 32GB | 500GB SSD |
dnf install -y wget
wget https://github.com/duckdb/duckdb/releases/download/v1.0.0/duckdb_cli-linux-amd64.zip
unzip duckdb_cli-linux-amd64.zip
mv duckdb /usr/local/bin
配置系统环境
systemctl stop firewalld
systemctl disable firewalld
cat >> /etc/sysctl.conf << EOF
vm.overcommit_memory=1
EOF
sysctl -p
echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/enabled
echo madvise | sudo tee /sys/kernel/mm/transparent_hugepage/defrag
cat >> /etc/rc.d/rc.local << EOF
if test -f /sys/kernel/mm/transparent_hugepage/enabled; then
echo madvise > /sys/kernel/mm/transparent_hugepage/enabled
fi
if test -f /sys/kernel/mm/transparent_hugepage/defrag; then
echo madvise > /sys/kernel/mm/transparent_hugepage/defrag
fi
EOF
chmod +x /etc/rc.d/rc.local
swapoff -a
cat >> /etc/sysctl.conf << EOF
vm.swappiness=0
EOF
sysctl -p
setenforce 0
sed -i 's/SELINUX=.*/SELINUX=disabled/' /etc/selinux/config
sed -i 's/SELINUXTYPE/#SELINUXTYPE/' /etc/selinux/config
echo "export LANG=en_US.UTF8" >> /etc/profile
source /etc/profile
cp -f /usr/share/zoneinfo/Asia/Shanghai /etc/localtime
hwclock
cat >> /etc/security/limits.conf << EOF
* soft nproc 65535
* hard nproc 65535
* soft nofile 655350
* hard nofile 655350
* soft stack unlimited
* hard stack unlimited
* hard memlock unlimited
* soft memlock unlimited
EOF
cat >> /etc/security/limits.d/20-nproc.conf << EOF
* soft nproc 65535
root soft nproc 65535
EOF
cat >> /etc/sysctl.conf << EOF
net.ipv4.tcp_abort_on_overflow=1
EOF
sysctl -p
cat >> /etc/sysctl.conf << EOF
net.core.somaxconn=1024
EOF
sysctl -p
cat >> /etc/sysctl.conf << EOF
vm.max_map_count = 262144
EOF
sysctl -p
echo 120000 > /proc/sys/kernel/threads-max
echo 200000 > /proc/sys/kernel/pid_max
安装JDK 11
dnf install java-11-openjdk java-11-openjdk-devel -y
编辑/etc/profile
文件
export JAVA_HOME=/usr/lib/jvm/java-11
export PATH=$JAVA_HOME/bin:$PATH
刷新配置
source /etc/profile
下载StarRocks二进制包
wget https://releases.starrocks.io/starrocks/StarRocks-3.2.7.tar.gz
tar -xzvf StarRocks-3.2.7.tar.gz
创建元数据目录
mkdir /data/meta
修改FE配置文件fe/conf/fe.conf
priority_networks = 10.1.0.0/20
meta_dir = /data/meta
JAVA_OPTS="-Xmx4096m -XX:+UseMembar -XX:SurvivorRatio=8 -XX:MaxTenuringThreshold=7 -XX:+PrintGCDateStamps -XX:+PrintGCDetails -XX:+UseConcMarkSweepGC -XX:+UseParNewGC -XX:+CMSClassUnloadingEnabled -XX:-CMSParallelRemarkEnabled -XX:CMSInitiatingOccupancyFraction=80 -XX:SoftRefLRUPolicyMSPerMB=0 -Xloggc:$DORIS_HOME/log/fe.gc.log.$DATE"
JAVA_HOME = /usr/lib/jvm/java-11
启动fe
./fe/bin/start_fe.sh --daemon
创建数据存储目录
mkdir /data/be
修改 BE 配置文件 be/conf/be.conf
storage_root_path = /data/be
priority_networks = 10.1.0.0/20
JAVA_HOME = /usr/lib/jvm/java-11
启动BE节点
./be/bin/start_be.sh --daemon
下载mysql客户端
dnf install mysql
在fe节点,使用MYSQL客户端连接
mysql -uroot -P9030 -h 127.0.0.1
添加BE节点
ALTER SYSTEM ADD BACKEND "10.1.2.14:9050";
查看FE节点和BE节点状态
SHOW FRONTENDS\G;
SHOW BACKENDS\G;
创建测试目录
mkdir -p /tpcds/duckdb/
DuckDB创建测试数据库并安装tpcds扩展
[root@duckdb-test ~]# duckdb /tpcds/duckdb/tpcds-100.db
v1.0.0 1f98600c2c
Enter ".help" for usage hints.
D INSTALL tpcds;
100% ▕████████████████████████████████████████████████████████████▏
D LOAD tpcds;
生成测试数据,这里使用sf=100,100GB的数据库:
D CALL dsdgen(sf = 100);
100% ▕████████████████████████████████████████████████████████████▏
┌─────────┐
│ Success │
│ boolean │
├─────────┤
│ 0 rows │
└─────────┘
查看测试数据库
[root@duckdb-test duckdb]# ls -lh tpcds-100.db
-rw-r--r--. 1 root root 27G Jun 11 10:03 tpcds-100.db
编写python测试脚本tpcds.py
,跑一个完整的TPCDS测试,为了防止OOM,将DuckDB使用的内存大小限制为16GB,并行线程数为4个,测试脚本如下:
import time
import duckdb
import logging
logger = logging.getLogger(__name__)
fh = logging.FileHandler('tpcds.log')
logger.setLevel(logging.INFO)
fh.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
fh.setFormatter(formatter)
logger.addHandler(fh)
database_url = 'tpcds-100.db'
mem_limit = '16GB'
thread_limit = 4
con = duckdb.connect(database_url)
con.execute(f"SET memory_limit = '{mem_limit}';")
con.execute(f"SET threads TO {thread_limit};")
start_time = time.time()
logger.info("Starting TPC-DS queries")
print("Starting TPC-DS queries")
with open('tpcds-duckdb-results.csv', 'w+', encoding='utf-8') as f:
for query_id in range(1, 100):
query_start_time = time.time()
query = f"PRAGMA tpcds({query_id});"
con.execute(query)
query_end_time = time.time()
logger.info(f"Query {query_id} took {query_end_time - query_start_time} seconds")
f.write(f"{query_id},{round(query_end_time - query_start_time, 2)}\n")
f.flush()
end_time = time.time()
print(f"TPCDS 99 Query Time taken: {int(end_time - start_time)} seconds")
运行测试脚本,由于需要较久的时间,所以放在tmux里运行,在执行前清除内存中的buffer,防止缓存影响测试结果的准确性:
tmux new -s duckdb
pip install duckdb
sync
echo 1 > /proc/sys/vm/drop_caches
[root@duckdb-test duckdb]# python tpcds.py
Starting TPC-DS queries
TPCDS 99 Query Time taken: 906 seconds
可以看到,执行完99个TPC-DS测试共用906秒。
创建测试目录,下载TPCDS工具包
mkdir -p /tpcds/sr/
cd /tpcds/sr/
wget https://starrocks-public.oss-cn-zhangjiakou.aliyuncs.com/tpcds-poc-1.0.zip
unzip tpcds-poc-1.0
cd tpcds-poc-1.0
生成sf=100的数据
sh bin/gen_data/gen-tpcds.sh 100 data_100
修改配置文件conf/starrocks.conf
,指定mysql的host和port等配置,然后创建表结构
sh bin/create_db_table.sh ddl_100
导入数据
sh bin/stream_load.sh data_100
运行TPC-DS测试
sh bin/benchmark.sh -p -d tpcds
DuckDB和StarRocks跑完99个SQL测试的总耗时如下,DuckDB在一部分测试场景下性能会更好:
软件 | 总耗时/s |
---|---|
DuckDB | 700.4 |
StarRocks | 437.4 |
99条SQL执行耗时如下所示:
SQL | StarRocks(单位ms) | DuckDB(单位毫秒) |
---|---|---|
Q1 | 582 | 1340 |
Q2 | 924 | 5650 |
Q3 | 1928 | 2670 |
Q4 | 36953 | 23340 |
Q5 | 5274 | 4840 |
Q6 | 631 | 970 |
Q7 | 3135 | 2290 |
Q8 | 1569 | 550 |
Q9 | 4849 | 7430 |
Q10 | 819 | 1780 |
Q11 | 16042 | 12540 |
Q12 | 335 | 830 |
Q13 | 2917 | 4320 |
Q14 | 18950 | 136250 |
Q15 | 575 | 5430 |
Q16 | 1280 | 2520 |
Q17 | 2812 | 3230 |
Q18 | 1657 | 1570 |
Q19 | 1622 | 1190 |
Q20 | 511 | 950 |
Q21 | 972 | 1180 |
Q22 | 5206 | 96020 |
Q23 | 67877 | 126870 |
Q24 | 4531 | 4090 |
Q25 | 2658 | 2020 |
Q26 | 1396 | 1260 |
Q27 | 2327 | 3240 |
Q28 | 4035 | 5740 |
Q29 | 3018 | 1910 |
Q30 | 340 | 710 |
Q31 | 3402 | 4110 |
Q32 | 362 | 510 |
Q33 | 3452 | 1690 |
Q34 | 1516 | 620 |
Q35 | 1786 | 3440 |
Q36 | 1678 | 2530 |
Q37 | 947 | 1260 |
Q38 | 6548 | 3560 |
Q39 | 2508 | 1220 |
Q40 | 481 | 2330 |
Q41 | 41 | 70 |
Q42 | 1825 | 350 |
Q43 | 2400 | 1190 |
Q44 | 2645 | 1280 |
Q45 | 500 | 770 |
Q46 | 3795 | 1070 |
Q47 | 3661 | 18640 |
Q48 | 1997 | 270 |
Q49 | 3438 | 3630 |
Q50 | 2519 | 1810 |
Q51 | 9794 | 13350 |
Q52 | 1836 | 760 |
Q53 | 1581 | 960 |
Q54 | 2434 | 1550 |
Q55 | 1802 | 380 |
Q56 | 3375 | 160 |
Q57 | 1801 | 6070 |
Q58 | 2254 | 1810 |
Q59 | 7071 | 9440 |
Q60 | 3418 | 110 |
Q61 | 2859 | 1390 |
Q62 | 775 | 780 |
Q63 | 1553 | 530 |
Q64 | 5592 | 24960 |
Q65 | 5448 | 7920 |
Q66 | 1694 | 2250 |
Q67 | 32384 | 17890 |
Q68 | 2550 | 5180 |
Q69 | 1196 | 1310 |
Q70 | 5066 | 2490 |
Q71 | 1843 | 2510 |
Q72 | 6563 | 4620 |
Q73 | 1511 | 630 |
Q74 | 11243 | 7930 |
Q75 | 9425 | 5880 |
Q76 | 1653 | 1550 |
Q77 | 4284 | 210 |
Q78 | 24730 | 4280 |
Q79 | 3046 | 5120 |
Q80 | 5336 | 10590 |
Q81 | 467 | 1080 |
Q82 | 1634 | 1840 |
Q83 | 198 | 250 |
Q84 | 170 | 250 |
Q85 | 488 | 2620 |
Q86 | 614 | 840 |
Q87 | 6697 | 4290 |
Q88 | 4699 | 3270 |
Q89 | 1681 | 1070 |
Q90 | 261 | 210 |
Q91 | 110 | 90 |
Q92 | 278 | 240 |
Q93 | 2224 | 5070 |
Q94 | 555 | 930 |
Q95 | 1307 | 13230 |
Q96 | 573 | 1360 |
Q97 | 7288 | 5470 |
Q98 | 1605 | 1360 |
Q99 | 1269 | 1240 |