Postgresql 报表性能测试

1. 测试数据

 

表结构 一:

CREATE TABLE fact_data

(

  day_id integer,

  popdb_id integer,

  region_id integer,

  area_id integer,

  city_id integer,

  product_id integer,

  f1 integer,

  f2 integer,

  f3 integer,

  f4 integer,

  f5 integer,

  f6 integer,

  f7 integer,

  f8 integer,

  user_id integer

)

 

表结构二

CREATE TABLE fact_data_hstore

(

  day_id integer,

  popdb_id integer,

  region_id integer,

  area_id integer,

  city_id integer,

  product_id integer,

  user_id integer,

  data hstore

)

 

 

2.测试报告under Windows

2 CPUs, 每个CPU 4 核,3GHZ

磁盘 EVA 4400 36 disks

Postgresql 9.3

 

2.1测试结果(一个字段 四百万条记录)

SQLServer:4060579 records

 

 select region_id, avg(f1*1.0) from fact_data  group by region_id

 

结果集合:返回9条记录,2个字段,耗时 500毫秒

 

 

Postgresql:  4060579 records

 

 select region_id, avg(f1) from fact_data  group by region_id

 

结果集合:返回9条记录,2个字段,耗时 1310毫秒

 

 

Postgresql:  4060579 records不加索引

 select region_id, avg((data->‘f1‘)::integer ) from fact_data_hstore

group by region_id

 

结果集合:返回9条记录,2个字段,耗时 3051毫秒

 

 

Postgresql:  4060579 records 增加索引

 select region_id, avg((data->‘f1‘)::integer ) from fact_data_hstore

group by region_id

 

结果集合:返回9条记录,2个字段,耗时 3000毫秒

索引对这个SQL语句性能没有作用。

 

 

 

 

2.2测试结果(八个字段 四百万条记录)

SQLServer:4060579 records

 

select region_id,avg(f1) as af1

from fact_data

group by region_id

 

结果集合:返回9条记录,2个字段,耗时 1316毫秒

 

 

Postgresql:  4060579 records

 

 select region_id,avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8

from fact_data

group by region_id

结果集合:返回9条记录,9个字段,耗时 2850毫秒

 

 

Postgresql:  4060579 records不加索引

 

select region_id, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,

avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8

from fact_data_hstore

group by region_id

 

结果集合:返回9条记录,9个字段,耗时 12690毫秒

 

 

2.3测试结果(八个字段 一百万条记录)

 

Postgresql:  1000000 records

 

 select region_id,avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8

from fact_data_1m

group by region_id

结果集合:返回9条记录,9个字段,耗时 720毫秒

 

 

Postgresql:  1000000 records不加索引

 

select region_id, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,

avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8

from fact_data_hstore_1m

group by region_id

结果集合:返回9条记录,9个字段,耗时 3109毫秒

 

 

 

3.测试报告under Linux virtual machine

虚拟机 2个虚核,普通硬盘

3.1测试结果(一个字段 四百万条记录)

 

Postgresql:  4060579 records

 

 select region_id, avg(f1) from fact_data  group by region_id

 

结果集合:返回9条记录,2个字段,耗时 860毫秒

 

 

Postgresql:  4060579 records不加索引

 select region_id, avg((data->‘f1‘)::integer ) from fact_data_hstore

group by region_id

 

结果集合:返回9条记录,2个字段,耗时 1630毫秒

 

 

 

 

3.2测试结果(八个字段 四百万条记录)

Postgresql:  4060579 records

 

 select region_id,avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8

from fact_data

group by region_id

结果集合:返回9条记录,9个字段,耗时 1708毫秒

 

 

Postgresql:  4060579 records不加索引

 

select region_id, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,

avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8

from fact_data_hstore

group by region_id

 

结果集合:返回9条记录,9个字段,耗时 6771毫秒

 

 

3.3测试结果(八个字段 一百万条记录)

 

Postgresql:  1000000 records

 

 select region_id,avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8

from fact_data_1m

group by region_id

结果集合:返回9条记录,9个字段,耗时 470毫秒

 

 

Postgresql:  1000000 records不加索引

 

select region_id, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,

avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8

from fact_data_hstore_1m

group by region_id

结果集合:返回9条记录,9个字段,耗时 1700毫秒

 

 

4.LU表分拆测试

添加LU_POPDB

CREATE TABLE lu_pop

(

  popdb_id integer,

  region_id integer,

  area_id integer,

  city_id integer

)

WITH (

  OIDS=FALSE

);

ALTER TABLE lu_pop

  OWNER TO postgres;

 

 

  insert into lu_pop

  select distinct popdb_id,region_id,area_id,city_id from  fact_data

 

 

 

Postgresql:

select p.region_id, avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8 from

 fact_data  f

 inner join lu_pop p on f.popdb_id = p.popdb_id

 group by p.region_id

 

结果集合:

四百万 返回9条记录,9个字段,耗时 3201毫秒

一百万 返回9条记录,9个字段,耗时 820毫秒

 

 

Postgresql: 

select p.region_id, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,

avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8

from fact_data_hstore f

 inner join lu_pop p on f.popdb_id = p.popdb_id

group by p.region_id

 

结果集合:

四百万返回9条记录,9个字段,耗时 9101毫秒

一百万返回9条记录,9个字段,耗时 2310毫秒

 

 

 

添加LU_POPDB_hstore

 

CREATE TABLE lu_pop_hstore

(

  popdb_id integer,

  hierarchies hstore

)

WITH (

  OIDS=FALSE

);

ALTER TABLE lu_pop_hstore

  OWNER TO postgres;

 

  insert into lu_pop_hstore

  select popdb_id,hstore(‘region_id=>‘||region_id||‘,‘||‘area_id=>‘||area_id||‘,‘||‘city_id=>‘||city_id) from  lu_pop;

 

 

select p.hierarchies->‘region_id‘, avg(f1) as af1, avg(f2) as af2, avg(f3) as af3, avg(f4) as af4, avg(f5) as af5, avg(f6) as af6, avg(f7) as af7, avg(f8) as af8 from

 fact_data  f

 inner join lu_pop_hstore p on f.popdb_id = p.popdb_id

 group by p.hierarchies->‘region_id‘

 

结果集合:

四百万数据 返回9条记录,9个字段,耗时 3760毫秒

一百万数据 返回9条记录,9个字段,耗时 990毫秒

 

 

 

 select p.hierarchies->‘region_id‘, avg((data->‘f1‘)::integer) as af1, avg((data->‘f2‘)::integer) as af2, avg((data->‘f3‘)::integer) as af3, avg((data->‘f4‘)::integer) as af4,

avg((data->‘f5‘)::integer) as af5, avg((data->‘f6‘)::integer) as af6, avg((data->‘f7‘)::integer) as af7, avg((data->‘f8‘)::integer) as af8

from fact_data_hstore f

inner join lu_pop_hstore p on f.popdb_id = p.popdb_id

 group by p.hierarchies->‘region_id‘

 

结果集合:

四百万数据 返回9条记录,9个字段,耗时 9852毫秒

一百万数据 返回9条记录,9个字段,耗时 2500毫秒

 

5.测试结论

 

常规列 Fact (四百万)

Hstore Fact(四百万)

常规列 Fact (一百万)

Hstore Fact(一百万)

全打入事实表Windows

2,850

12,690

720

3,109

全打入事实表

1,708

6,771

470

1,700

拆分LU

3,200

9,101

820

2,310

拆分LUHstore格式

3,760

9,852

990

2,500

 

 

分列存放Fact数据 VS Hstore存放事实数据

分列存放的fact数据比Hstore存放的事实数据性能高150%300%倍左右。因而用Hstore存放事实数据性能较差。

 

独立LU表VS把LU全部放入事实表

拆分LU表与把LU全部放入事实表相比,全部放入事实表比拆分到LU表中性能高30%80%倍左右,把LU拆分出来的性能较差。

 

独立LU表VS 独立LU Hstore

按照分列存放LU的数据比用Hstore格式存放LU数据,性能高8% 到20% 左右。因而用HStore存放LU数据带来的性能降低还是可以接受的。

 

数据量四百万 VS 一百万

响应时间和数据量几乎成正比,因而一定要进行表分区,让数据量控制在可以接受的范围之内。

 

 

Windows VS Linux 虚拟机

即使Windows的硬件比较强大,但是性能还是比Linux虚拟机差 50% 到80%。

 

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。