运行duckdb数据库
D:>duckdb
v1.2.0 5f5512b827
Enter “.help” for usage hints.
Connected to a transient in-memory database.
Use “.open FILENAME” to reopen on a persistent database.
生成模拟数据,10个列,100万行数据;
-- 步骤1:创建生成模拟数据的视图
CREATE OR REPLACE VIEW test_data_view AS
SELECT
-- 生成自增ID
CAST(generate_series AS INTEGER) AS id,
-- 随机字符串(3个大写字母)
(
CHR((RANDOM() * 26 + 65)::INTEGER) ||
CHR((RANDOM() * 26 + 65)::INTEGER) ||
CHR((RANDOM() * 26 + 65)::INTEGER)
) AS code,
-- 可变长度文本(5-10字符)
SUBSTRING(
REPLACE(RANDOM()::STRING, '0.', '') ||
REPLACE(RANDOM()::STRING, '0.', ''),
1, 5 + (RANDOM() * 5)::INTEGER
) AS dynamic_text,
-- 数值类型
ROUND(RANDOM() * 1000, 2) AS price, -- 带2位小数的数值
(RANDOM() * 10000)::INTEGER AS quantity, -- 整数库存量
RANDOM()::BOOLEAN AS is_available, -- 布尔值
-- 日期时间类型
DATE '2024-01-01' AS fixed_date, -- 固定日期
CURRENT_TIMESTAMP AT TIME ZONE 'UTC' AS utc_created, -- 当前UTC时间
-- 组合类型
MAP(ARRAY['A', 'B', 'C'],
ARRAY[RANDOM(), RANDOM(), RANDOM()]) AS attributes,
-- 生成范围约束数值(0-100)
LEAST(GREATEST(RANDOM() * 120, 0), 100) AS constrained_value
FROM GENERATE_SERIES(1, 10000000); -- 生成100万行测试数据
查看一下视图,发现实时随机生成100万行数据要3秒
所以又建一个t1表,复制这100万行数据
CREATE table t1 as select * from main.test_data_view;
基于T1表进行测试excel导出, 这样估计是最快的
1万行,用时0.4秒
5万行,用时2秒
10万行,用时4秒
20万行,用时8秒
100万行,用时35秒
D INSTALL excel;
100% ▕████████████████████████████████████████████████████████████▏
Run Time (s): real 33.381 user 13.625000 sys 0.093750
D LOAD excel;
Run Time (s): real 0.077 user 0.015625 sys 0.000000
D COPY t1 TO 'D:\100w.xlsx' WITH (FORMAT XLSX, HEADER TRUE);
100% ▕████████████████████████████████████████████████████████████▏
Run Time (s): real 34.868 user 26.968750 sys 0.062500
D COPY (SELECT * FROM t1 limit 10000 ) TO 'D:\test1w.xlsx' WITH (FORMAT XLSX, HEADER TRUE);
Run Time (s): real 0.386 user 0.640625 sys 0.000000
D COPY (SELECT * FROM t1 limit 50000 ) TO 'D:\test5w.xlsx' WITH (FORMAT XLSX, HEADER TRUE);
Run Time (s): real 1.825 user 1.390625 sys 0.015625
D COPY (SELECT * FROM t1 limit 100000 ) TO 'D:\test10w.xlsx' WITH (FORMAT XLSX, HEADER TRUE);
100% ▕████████████████████████████████████████████████████████████▏
Run Time (s): real 3.793 user 2.875000 sys 0.031250
D COPY (SELECT * FROM t1 limit 200000 ) TO 'D:\test20w.xlsx' WITH (FORMAT XLSX, HEADER TRUE);
100% ▕████████████████████████████████████████████████████████████▏
Run Time (s): real 7.815 user 12.984375 sys 0.031250
D COPY (SELECT * FROM t1 limit 500000 ) TO 'D:\test50w.xlsx' WITH (FORMAT XLSX, HEADER TRUE);
100% ▕████████████████████████████████████████████████████████████▏
Run Time (s): real 18.385 user 14.921875 sys 0.062500
D COPY (SELECT * FROM t1 limit 1000000 ) TO 'D:\test100w.xlsx' WITH (FORMAT XLSX, HEADER TRUE);
100% ▕████████████████████████████████████████████████████████████▏
Run Time (s): real 36.913 user 58.718750 sys 0.218750
D COPY t1 TO 'D:\test100w_2.xlsx' WITH (FORMAT XLSX, HEADER TRUE);
100% ▕████████████████████████████████████████████████████████████▏
Run Time (s): real 34.845 user 29.859375 sys 0.062500
导出CSV速度
1万行,0.05秒
10万行,0.4秒
50万行,1.2秒
100万行,3.2秒
D COPY (SELECT * FROM t1 limit 10000) TO 'D:\test1w.csv' (FORMAT CSV, HEADER);
Run Time (s): real 0.046 user 0.015625 sys 0.000000
D COPY (SELECT * FROM t1 limit 100000) TO 'D:\test10w.csv' (FORMAT CSV, HEADER);
Run Time (s): real 0.382 user 0.265625 sys 0.000000
D COPY (SELECT * FROM t1 limit 500000) TO 'D:\test50w.csv' (FORMAT CSV, HEADER);
Run Time (s): real 1.183 user 1.187500 sys 0.015625
D COPY (SELECT * FROM t1 limit 1000000) TO 'D:\test100w.csv' (FORMAT CSV, HEADER);
100% ▕████████████████████████████████████████████████████████████▏
Run Time (s): real 3.156 user 2.015625 sys 0.312500
最后也证明了猜测,从视图实时生成100万随机数据,再导出Excel差3秒,导出csv只差1.5秒
D select count(*) from test_data_view;
┌────────────────┐
│ count_star() │
│ int64 │
├────────────────┤
│ 1000000 │
│ (1.00 million) │
└────────────────┘
Run Time (s): real 0.007 user 0.000000 sys 0.000000
D COPY test_data_view TO 'D:\test100w_view.xlsx' WITH (FORMAT XLSX, HEADER TRUE);
100% ▕████████████████████████████████████████████████████████████▏
Run Time (s): real 37.115 user 27.640625 sys 0.093750
D COPY test_data_view TO 'D:\test100w_view.csv' (FORMAT CSV, HEADER);
100% ▕████████████████████████████████████████████████████████████▏
Run Time (s): real 4.641 user 3.656250 sys 0.140625