select attname, inherited, n_distinct, array_to_string(most_common_vals, E';') as most_common_vals,most_common_freqs from pg_stats where tablename = 'table_name' ;
Posts categoriezed as postgres
PostgreSQL explain plan operations
to be continued
> – what does “Bitmap Heap Scan” phase do?
A plain indexscan fetches one tuple-pointer at a time from the index,
and immediately visits that tuple in the table. A bitmap scan fetches
all the tuple-pointers from the index in one go, sorts them using an
in-memory “bitmap” data structure, and then visits the table tuples in
physical tuple-location order. The bitmap scan improves locality of
reference to the table at the cost of more bookkeeping overhead to
manage the “bitmap” data structure — and at the cost that the data
is no longer retrieved in index order, which doesn’t matter for your
query but would matter if you said ORDER BY.
> – what is “Recheck condition” and why is it needed?
If the bitmap gets too large we convert it to “lossy” style, in which we
only remember which pages contain matching tuples instead of remembering
each tuple individually. When that happens, the table-visiting phase
has to examine each tuple on the page and recheck the scan condition to
see which tuples to return.
> – why are proposed “width” fields in the plan different between the two
> plans?
Updated statistics about average column widths, presumably.
> (actually, a nice explanation what exactly are those widths would also
> be nice :) )
Sum of the average widths of the columns being fetched from the table.
> – I thought “Bitmap Index Scan” was only used when there are two or more
> applicable indexes in the plan, so I don’t understand why is it used
> now?
True, we can combine multiple bitmaps via AND/OR operations to merge
results from multiple indexes before visiting the table … but it’s
still potentially worthwhile even for one index. A rule of thumb is
that plain indexscan wins for fetching a small number of tuples, bitmap
scan wins for a somewhat larger number of tuples, and seqscan wins if
you’re fetching a large percentage of the whole table.
regards, tom lane
https://www.postgresql.org/message-id/12553.1135634231@sss.pgh.pa.us
Postgresql how-to find top query for last 15 seconds
DROP TABLE IF EXISTS pg_stat_statements_temp; create table pg_stat_statements_temp as SELECT d.datname,u.usename, s.queryid, round(s.total_time::numeric, 2) AS total_time, s.calls, round(s.mean_time::numeric, 2) AS mean, now() as cur_date FROM pg_stat_statements s join pg_database d on s.dbid=d.oid join pg_user u on s.userid=u.usesysid; select pg_sleep(15); with stat as (SELECT d.datname,u.usename, s.queryid,s.query, round(s.total_time::numeric, 2) AS total_time, s.calls, round(s.mean_time::numeric, 2) AS mean FROM pg_stat_statements s join pg_database d on s.dbid=d.oid join pg_user u on s.userid=u.usesysid ) select s.datname,s.usename,s.queryid,regexp_replace(s.query, E'[\n\r]+', ' ', 'g' )::varchar(80) AS short_query, s.total_time-t.total_time as time, s.calls-t.calls as calls, s.mean -t.mean as mean, round( (100 * (s.total_time-t.total_time) /sum(s.total_time-t.total_time) OVER () )::numeric, 2) as cpu, now()-t.cur_date as diff_time, CASE WHEN s.calls-t.calls >0 THEN (s.calls-t.calls )/(extract ( second from now()-t.cur_date) + extract ( minutes from now()-t.cur_date)*60 + extract ( minutes from now()-t.cur_date)*60*60 ) else '0' end as tps from stat s join pg_stat_statements_temp t on s.datname=t.datname and s.usename=t.usename and s.queryid=t.queryid order by cpu desc limit 20;
postgresql top 10 tables by io operations
helps to keep focus on “hot” tables
top 10 tables by sum of io operatoins:
SELECT st.schemaname||'.'||st.relname, round (100*( sum (coalesce(st.n_tup_ins,0)+coalesce(st.n_tup_upd,0)-coalesce(st.n_tup_hot_upd,0)+coalesce(st.n_tup_del,0)) ) / ( sum (coalesce(st.n_tup_ins,0)+coalesce(st.n_tup_upd,0)-coalesce(st.n_tup_hot_upd,0)+coalesce(st.n_tup_del,0)) over () ),2) as pct_io_ops from pg_stat_user_tables st JOIN pg_class c ON c.oid=st.relid LEFT JOIN pg_tablespace t ON t.oid=c.reltablespace WHERE coalesce(n_tup_ins,0)+coalesce(n_tup_upd,0)-coalesce(n_tup_hot_upd,0)+coalesce(n_tup_del,0)>100 group by st.schemaname||'.'||st.relname,st.n_tup_ins,st.n_tup_upd,st.n_tup_hot_upd,st.n_tup_del order by pct_io_ops desc limit 10;
top 10 tables by index scans:
SELECT pg_stat_all_tables.schemaname||'.'||relid::regclass AS table, round (100* (sum(pg_stat_user_indexes.idx_scan)) / (sum(pg_stat_user_indexes.idx_scan) over () ) ,2) pct_indx_scans FROM pg_stat_user_indexes JOIN pg_index USING (indexrelid) join pg_stat_all_tables using (relid) group by pg_stat_all_tables.schemaname||'.'||relid::regclass,pg_stat_user_indexes.idx_scan order by 2 desc limit 10;
postgresql repmgr how-to
For better result you should install database by this document
how-to install postgres database
i have 4 node config, 2 nodes in one datacenter and 2 in other
config will look like this:
DR1 DR2 DB01--->DB11 | | \_DB02 \_DB12
create config file:
node1 /u01/postgres/9.5/main/conf/repmgr.conf
node=1 cluster=db_cluster node_name=pg1 conninfo='host=pg01 user=repmgr dbname=postgres password=q1' pg_bindir=/usr/pgsql-9.5/bin/ master_response_timeout=5 reconnect_attempts=2 reconnect_interval=2 failover=manual promote_command='/usr/pgsql-9.5/bin/repmgr standby promote -f /u01/postgres/9.5/main/conf/repmgr.conf' follow_command='/usr/pgsql-9.5/bin/repmgr standby follow -f /u01/postgres/9.5/main/conf/repmgr.conf' use_replication_slots=1
node2 /u01/postgres/9.5/main/conf/repmgr.conf
node=2 cluster=db_cluster node_name=pg2 conninfo='host=pg02 user=repmgr dbname=postgres password=q1' pg_bindir=/usr/pgsql-9.5/bin/ master_response_timeout=5 reconnect_attempts=2 reconnect_interval=2 failover=manual promote_command='/usr/pgsql-9.5/bin/repmgr standby promote -f /u01/postgres/9.5/main/conf/repmgr.conf' follow_command='/usr/pgsql-9.5/bin/repmgr standby follow -f /u01/postgres/9.5/main/conf/repmgr.conf' use_replication_slots=1
node3 /u01/postgres/9.5/main/conf/repmgr.conf
node=3 cluster=db_cluster node_name=pg3 conninfo='host=pg03 user=repmgr dbname=postgres password=q1' pg_bindir=/usr/pgsql-9.5/bin/ master_response_timeout=5 reconnect_attempts=2 reconnect_interval=2 failover=manual promote_command='/usr/pgsql-9.5/bin/repmgr standby promote -f /u01/postgres/9.5/main/conf/repmgr.conf' follow_command='/usr/pgsql-9.5/bin/repmgr standby follow -f /u01/postgres/9.5/main/conf/repmgr.conf' use_replication_slots=1
node4 /u01/postgres/9.5/main/conf/repmgr.conf
node=4 cluster=db_cluster node_name=pg4 conninfo='host=pg04 user=repmgr dbname=postgres password=q1' pg_bindir=/usr/pgsql-9.5/bin/ master_response_timeout=5 reconnect_attempts=2 reconnect_interval=2 failover=manual promote_command='/usr/pgsql-9.5/bin/repmgr standby promote -f /u01/postgres/9.5/main/conf/repmgr.conf' follow_command='/usr/pgsql-9.5/bin/repmgr standby follow -f /u01/postgres/9.5/main/conf/repmgr.conf' use_replication_slots=1 upstream_node=3
also we need to setup passwordless auth of repmgr user
create .pgpass file in postgres user home directory with 0600 premissions:
All nodes:
cat ~/.pgpass *:*:*:repmgr:q1 chmod 600 ~/.pgpass
create repmgr user
createuser -s repmgr
also we need to allow repmgr to connect to database with replication role
add this lines to your pg_hba.conf file on all nodes:
cat /u01/postgres/9.5/main/conf/pg_hba.conf | tail -3
local replication,postgres repmgr trust host replicatoin,postgres repmgr 127.0.0.1/32 trust host replication,postgres repmgr 10.10.10.0/24 md5
Here I’m allowed to connect from all network, but if you want, you can limit this by certain machines.
Next step is configure repmgr
Create Master:
[postgres@pg1 ~]$ /usr/pgsql-9.5/bin/repmgr -f /u01/postgres/9.5/main/conf/repmgr.conf master register [2016-12-05 19:06:36] [NOTICE] master node correctly registered for cluster db_cluster with id 1 (conninfo: host=pg1 user=repmgr dbname=postgres password=q1)
create standbys
on host pg2 and pg3:
/usr/pgsql-9.5/bin/repmgr -c -h pg1 -d postgres -U repmgr -D $PGDATA -f /u01/postgres/9.5/main/conf/repmgr.conf standby clone
start standby:
pg_ctl start -o "-c config_file=/u01/postgres/9.5/main/conf/postgresql.conf"
and register standby:
/usr/pgsql-9.5/bin/repmgr -f /u01/postgres/9.5/main/conf/repmgr.conf standby register
create last standby ( which is going to be cascade standby ) and we are create this standby from PG3 database
/usr/pgsql-9.5/bin/repmgr -c -h pg3 -d postgres -U repmgr -D $PGDATA -f /u01/postgres/9.5/main/conf/repmgr.conf standby clone
start and register database:
pg_ctl start -o "-c config_file=/u01/postgres/9.5/main/conf/postgresql.conf" /usr/pgsql-9.5/bin/repmgr -f /u01/postgres/9.5/main/conf/repmgr.conf standby register
final result is
postgres=# select * from repmgr_db_cluster.repl_nodes ; id | type | upstream_node_id | cluster | name | conninfo | slot_name | priority | active ----+---------+------------------+------------+------+----------------------------------------------------------------+---------------+----------+-------- 1 | master | | db_cluster | pg1 | host=pg1 user=repmgr dbname=postgres password=xxxxxxxxxxxxxxxx | repmgr_slot_1 | 100 | t 2 | standby | 1 | db_cluster | pg2 | host=pg2 user=repmgr dbname=postgres password=xxxxxxxxxxxxxxxx | repmgr_slot_2 | 100 | t 3 | standby | 1 | db_cluster | pg3 | host=pg3 user=repmgr dbname=postgres password=xxxxxxxxxxxxxxxx | repmgr_slot_3 | 100 | t 4 | standby | 3 | db_cluster | pg4 | host=pg4 user=repmgr dbname=postgres password=xxxxxxxxxxxxxxxx | repmgr_slot_4 | 100 | t
postgresql install how-to
I will split material of how-to install pgpool on several logical parts
here is the first one:
start install postgres with packages:
yum install postgresql95-devel postgresql95 postgresql95-test postgresql95-libs postgresql95-server postgresql95-contrib pgbouncer.x86_64 repmgr95* pgpool-II-95 pgpool-II-95-extensions
for 9.6
yum install postgresql96-devel postgresql96 postgresql96-test postgresql96-libs postgresql96-server postgresql96-contrib pgbouncer.x86_64 repmgr96*
for 10
yum install postgresql10-devel postgresql10 postgresql10-test postgresql10-libs postgresql10-server postgresql10-contrib pgbouncer.x86_64
create directories and change home directory of postgres user:
mkdir /var/log/postgres chown postgres: /var/log/postgres usermod -m -d /u01 postgres chown postgres: /u01 -R su - postgres cp /etc/skel/.bash* $HOME mkdir -p /u01/postgres/main/pgdata/ mkdir -p /u01/postgres/main/pgdatats/ mkdir -p /u01/postgres/main/recovery mkdir -p /u01/postgres/main/conf mkdir /u01/postgres/main/recovery/backup mkdir /u01/postgres/main/recovery/walarchive mkdir /u01/postgres/main/recovery/dmp
configure bash_profile:
echo "export PGDATA=/u01/postgres/main/pgdata ">> ~/.bash_profile echo "export PGTBS=/u01/postgres/main/pgdatats ">> ~/.bash_profile echo "export PGCFG=/u01/postgres/main/conf ">> ~/.bash_profile echo "export HISTTIMEFORMAT='%Y-%m-%d %H:%M:%S ' ">> ~/.bash_profile echo "export HISTFILESIZE=9000 ">> ~/.bash_profile echo "export PROMPT_COMMAND='history -a' ">> ~/.bash_profile echo "export PS1='\[\e]0;\u@\h: \w\a\][\u@\h \W]$ ' ">> ~/.bash_profile
create dummy database:
/usr/pgsql-9.5/bin/initdb --locale=ru_RU.utf8 --data-checksums
configure init scripts ( or systemd daemon ) to work with our external config directory:
RH6:
/etc/init.d/postgresql-9.5 PGDATA=/u01/postgres/main/pgdata PGOPTS="-c config_file=/u01/postgres/main/conf/postgresql.conf"
RH7:
vim /usr/lib/systemd/system/postgresql-9.5.service # Note: changing PGDATA will typically require adjusting SELinux # Note: do not use a PGDATA pathname containing spaces, or you will Environment=PGDATA=/u01/postgres/main/pgdata Environment=PGCONF=/u01/postgres/main/conf ExecStartPre=/usr/pgsql-10/bin/postgresql-10-check-db-dir ${PGDATA} ExecStart=/usr/pgsql-10/bin/postmaster -D ${PGDATA} -c config_file=${PGCONF}/postgresql.conf
don’t forget about:
systemctl daemon-reload
Create starter database:
/usr/pgsql-9.5/bin/initdb --lc-collate=ru_RU.UTF8 --locale=en_US.UTF8 --encoding=UTF8 /usr/pgsql-9.5/bin/pg_ctl stop
Move config file /u01/postgres/main/conf/postgresql.conf to $PGCFG directory and modify it :
data_directory='/u01/postgres/main/pgdata' hba_file='/u01/postgres/main/conf/pg_hba.conf' ident_file='/u01/postgres/main/conf/pg_ident.conf'
enable database startup:
chkconfig postgresql-9.5 on
data_directory='/u01/postgres/main/pgdata' hba_file='/u01/postgres/main/conf/pg_hba.conf' ident_file='/u01/postgres/main/conf/pg_ident.conf' listen_addresses = '*' # what IP address(es) to listen on; max_connections = 500 # (change requires restart) shared_buffers = 8GB # min 128kB work_mem = 64MB # min 64kB maintenance_work_mem = 2GB # min 1MB dynamic_shared_memory_type = posix # the default is the first option wal_level = hot_standby # minimal, archive, hot_standby, or logical max_wal_size = 3GB min_wal_size = 1GB checkpoint_completion_target = 0.9 # checkpoint target duration, 0.0 - 1.0 archive_mode = on # enables archiving; off, on, or always archive_command = 'cp -i %p /u01/postgres/main/recovery/walarchive/%f < /dev/null' # command to use to archive a logfile segment max_wal_senders = 4 # max number of walsender processes max_replication_slots = 4 # max number of replication slots hot_standby = on # "on" allows queries during recovery hot_standby_feedback = on # send info from standby to prevent effective_cache_size = 24GB log_destination = 'stderr' # Valid values are combinations of logging_collector = on # Enable capturing of stderr and csvlog log_directory = '/var/log/postgres' # directory where log files are written, log_filename = 'postgresql-%Y-%m-%d.log' # log file name pattern, log_truncate_on_rotation = on # If on, an existing log file with the log_rotation_age = 1d # Automatic rotation of logfiles will log_rotation_size = 0 # Automatic rotation of logfiles will log_min_duration_statement = 300 # -1 is disabled, 0 logs all statements log_checkpoints = on log_connections = on log_disconnections = on log_line_prefix = '%t [%p]: [%l-1] db=%d,appname=%q,user=%u,client=%h' # special values: log_statement = 'ddl' # none, ddl, mod, all log_temp_files = 0 # log temporary files equal or larger log_timezone = 'Europe/Moscow' cluster_name = 'p00nrd' # added to process titles if nonempty track_io_timing = on autovacuum_vacuum_scale_factor = 0.02 # fraction of table size before vacuum autovacuum_analyze_scale_factor = 0.01 # fraction of table size before analyze autovacuum_vacuum_cost_delay = 30ms # default vacuum cost delay for datestyle = 'iso, mdy' timezone = 'Europe/Moscow' lc_messages = 'en_US.UTF8' # locale for system error message lc_monetary = 'en_US.UTF8' # locale for monetary formatting lc_numeric = 'en_US.UTF8' # locale for number formatting lc_time = 'en_US.UTF8' # locale for time formatting default_text_search_config = 'pg_catalog.english' shared_preload_libraries = 'pg_stat_statements' # (change requires restart) pg_stat_statements.max=10000 pg_stat_statements.track=top pg_stat_statements.track_utility=off
Upload csv files to postgresql database
here is my drafts of how-to automate upload a lot of csv files to database ( use text fieds because there is no need to mach all types of columns in this case )
for i in $(ls -1 *.csv); do echo "create table $i (" ; echo $(head -1 $i | sed 's/,/ text,\n/g' ); echo "text );" ;echo copy $i | awk -F "." '{print $1}'; echo "from '/u01/import/$i' DELIMITER ',' CSV HEADER;" ; done
postgresql query to find duplicate indexes
SELECT sub.table, pg_size_pretty(SUM(pg_relation_size(idx))::BIGINT) AS SIZE, (array_agg(idx))[1] AS idx1, (array_agg(idx))[2] AS idx2, (array_agg(idx))[3] AS idx3, (array_agg(idx))[4] AS idx4 FROM ( SELECT indrelid::regclass as table,indexrelid::regclass AS idx, (indrelid::text ||E'\n'|| indclass::text ||E'\n'|| indkey::text ||E'\n'|| COALESCE(indexprs::text,'')||E'\n' || COALESCE(indpred::text,'')) AS KEY FROM pg_index where pg_relation_size(indexrelid::regclass)>100*1024*1024 ) sub GROUP BY sub.table, KEY HAVING COUNT(*)>1 ORDER BY SUM(pg_relation_size(idx)) DESC;
sample output
table | size | idx1 | idx2 | idx3 | idx4 -------------------+--------+-----------------------------+------------------------------------+------+------ mdm_record_column | 797 MB | mdm_record_column_column_fk | mdm_record_column_record_fk | | fin_bill_generate | 300 MB | fin_bill_generate_fk | fin_bill_generate_spec_item_id_idx | |
also usefull to look at usage stat on this indexes
SELECT relid::regclass AS table, indexrelid::regclass AS index, pg_size_pretty(pg_relation_size(indexrelid::regclass)) AS index_size, idx_tup_read, idx_tup_fetch, idx_scan, pg_get_indexdef(pg_index.indexrelid) as indexdef FROM pg_stat_user_indexes JOIN pg_index USING (indexrelid) WHERE (relid::regclass)::text ='mdm_record_column' order by idx_scan desc ; table | index | index_size | idx_tup_read | idx_tup_fetch | idx_scan | indexdef -------------------+---------------------------------+------------+--------------+---------------+----------+------------------------------------------------------------------------------------------- mdm_record_column | mdm_record_column_record_fk | 399 MB | 758024 | 758024 | 2992 | CREATE INDEX mdm_record_column_record_fk ON mdm_record_column USING btree (column_id) mdm_record_column | mdm_record_column_record_id_idx | 399 MB | 1922 | 1922 | 442 | CREATE INDEX mdm_record_column_record_id_idx ON mdm_record_column USING btree (record_id) mdm_record_column | mdm_record_column_pk | 399 MB | 0 | 0 | 0 | CREATE UNIQUE INDEX mdm_record_column_pk ON mdm_record_column USING btree (id) mdm_record_column | mdm_record_column_column_fk | 399 MB | 0 | 0 | 0 | CREATE INDEX mdm_record_column_column_fk ON mdm_record_column USING btree (column_id)
postgresql cat effective non default parameters from config file
cat postgresql.conf |egrep -v "^[[:blank:]]*#" | grep -v '^$'
data_directory='/u01/postgres/9.5/main/pgdata' hba_file='/u01/postgres/9.5/main/conf/pg_hba.conf' ident_file='/u01/postgres/9.5/main/conf/pg_ident.conf' listen_addresses = '*' # what IP address(es) to listen on; max_connections = 300 # (change requires restart) shared_buffers = 4096MB # min 128kB work_mem = 256MB # min 64kB maintenance_work_mem = 512MB # min 1MB dynamic_shared_memory_type = posix # the default is the first option checkpoint_timeout = 15min # range 30s-1h max_wal_size = 10GB min_wal_size = 1GB checkpoint_completion_target = 0.9 # checkpoint target duration, 0.0 - 1.0 archive_mode = on # enables archiving; off, on, or always archive_command = 'cp -i %p /u01/postgres/9.5/main/recovery/walarchive/%f </dev/null' # command to use to archive a logfile segment max_wal_senders = 3 # max number of walsender processes max_replication_slots = 2 # max number of replication slots hot_standby = on # "on" allows queries during recovery effective_cache_size = 8GB log_destination = 'stderr' # Valid values are combinations of logging_collector = on # Enable capturing of stderr and csvlog log_directory = 'pg_log' # directory where log files are written, log_filename = 'postgresql-%Y-%m-%d.log' # log file name pattern, log_truncate_on_rotation = on # If on, an existing log file with the log_rotation_age = 1d # Automatic rotation of logfiles will log_rotation_size = 0 # Automatic rotation of logfiles will log_min_duration_statement = 300 # -1 is disabled, 0 logs all statements log_checkpoints = on log_connections = on log_disconnections = on log_line_prefix = '%t [%p]: [%l-1] db=%d,appname=%a,user=%u,client=%h ' # special values: log_lock_waits = on # log lock waits >= deadlock_timeout log_temp_files = 0 # log temporary files equal or larger log_timezone = 'Europe/Moscow' autovacuum_max_workers = 5 # max number of autovacuum subprocesses autovacuum_vacuum_scale_factor = 0.01 # fraction of table size before vacuum autovacuum_analyze_scale_factor = 0.05 # fraction of table size before analyze datestyle = 'iso, mdy' timezone = 'Europe/Moscow' lc_messages = 'en_US.utf8' # locale for system error message lc_monetary = 'en_US.utf8' # locale for monetary formatting lc_numeric = 'en_US.utf8' # locale for number formatting lc_time = 'en_US.utf8' # locale for time formatting default_text_search_config = 'pg_catalog.english'
postgresql bloat monitoring script
select * from ( SELECT current_database(), schemaname, tablename, /*reltuples::bigint, relpages::bigint, otta,*/ ROUND((CASE WHEN otta=0 THEN 0.0 ELSE sml.relpages::FLOAT/otta END)::NUMERIC,1) AS tbloat, CASE WHEN relpages < otta THEN 0 ELSE round(bs*((sml.relpages-otta)::bigint)/1024/1024,1) END AS wastedMBytes, iname, /*ituples::bigint, ipages::bigint, iotta,*/ ROUND((CASE WHEN iotta=0 OR ipages=0 THEN 0.0 ELSE ipages::FLOAT/iotta END)::NUMERIC,1) AS ibloat, CASE WHEN ipages < iotta THEN 0 ELSE round(bs*((ipages-iotta)/1024/1024)::bigint,1) END AS wastediMBytes FROM ( SELECT schemaname, tablename, cc.reltuples, cc.relpages, bs, CEIL((cc.reltuples*((datahdr+ma- (CASE WHEN datahdr%ma=0 THEN ma ELSE datahdr%ma END))+nullhdr2+4))/(bs-20::FLOAT)) AS otta, COALESCE(c2.relname,'?') AS iname, COALESCE(c2.reltuples,0) AS ituples, COALESCE(c2.relpages,0) AS ipages, COALESCE(CEIL((c2.reltuples*(datahdr-12))/(bs-20::FLOAT)),0) AS iotta -- very rough approximation, assumes all cols FROM ( SELECT ma,bs,schemaname,tablename, (datawidth+(hdr+ma-(CASE WHEN hdr%ma=0 THEN ma ELSE hdr%ma END)))::NUMERIC AS datahdr, (maxfracsum*(nullhdr+ma-(CASE WHEN nullhdr%ma=0 THEN ma ELSE nullhdr%ma END))) AS nullhdr2 FROM ( SELECT schemaname, tablename, hdr, ma, bs, SUM((1-null_frac)*avg_width) AS datawidth, MAX(null_frac) AS maxfracsum, hdr+( SELECT 1+COUNT(*)/8 FROM pg_stats s2 WHERE null_frac<>0 AND s2.schemaname = s.schemaname AND s2.tablename = s.tablename ) AS nullhdr FROM pg_stats s, ( SELECT (SELECT current_setting('block_size')::NUMERIC) AS bs, CASE WHEN SUBSTRING(v,12,3) IN ('8.0','8.1','8.2') THEN 27 ELSE 23 END AS hdr, CASE WHEN v ~ 'mingw32' THEN 8 ELSE 4 END AS ma FROM (SELECT version() AS v) AS foo ) AS constants GROUP BY 1,2,3,4,5 ) AS foo ) AS rs JOIN pg_class cc ON cc.relname = rs.tablename JOIN pg_namespace nn ON cc.relnamespace = nn.oid AND nn.nspname = rs.schemaname AND nn.nspname <> 'information_schema' LEFT JOIN pg_index i ON indrelid = cc.oid LEFT JOIN pg_class c2 ON c2.oid = i.indexrelid ) AS sml ) bloat where (bloat.tbloat > 5 or ibloat >5) and wastedMBytes > 100 or wastedMBytes > 1024 ORDER BY wastedMBytes DESC;
took from https://wiki.postgresql.org/wiki/Show_database_bloat