重庆思庄Oracle、KingBase、PostgreSQL、Redhat认证学习论坛

标题: PG库表定时巡检并清理的SQL语句及脚本 [打印本页]

作者: mahan 时间: 2025-12-7 20:13
标题: PG库表定时巡检并清理的SQL语句及脚本
本帖最后由 mahan 于 2025-12-7 20:14 编辑

Table of Contents
表与索引膨胀 TOP 10
库表统计信息长时间未更新 TOP 10
库表年龄 TOP 10
巡检处理脚本
定时任务策略

1、表与索引膨胀 TOP 10
CREATE SCHEMA IF NOT EXISTS monitor;

create OR REPLACE view monitor.pg_stat_bloat_tables as
-- 查看表膨胀
with tmp as (
SELECT current_database(), schemaname, tblname, bs*tblpages AS real_size,
  (tblpages-est_tblpages)*bs AS extra_size,
  CASE WHEN tblpages > 0 AND tblpages - est_tblpages > 0
THEN 100 * (tblpages - est_tblpages)/tblpages::float
ELSE 0
  END AS extra_pct, fillfactor,
  CASE WHEN tblpages - est_tblpages_ff > 0
THEN (tblpages-est_tblpages_ff)*bs
ELSE 0
  END AS bloat_size,
  CASE WHEN tblpages > 0 AND tblpages - est_tblpages_ff > 0
THEN 100 * (tblpages - est_tblpages_ff)/tblpages::float
ELSE 0
  END AS bloat_pct, is_na
  -- , tpl_hdr_size, tpl_data_size, (pst).free_percent + (pst).dead_tuple_percent AS real_frag -- (DEBUG INFO)
FROM (
  SELECT ceil( reltuples / ( (bs-page_hdr)/tpl_size ) ) + ceil( toasttuples / 4 ) AS est_tblpages,
ceil( reltuples / ( (bs-page_hdr)*fillfactor/(tpl_size*100) ) ) + ceil( toasttuples / 4 ) AS est_tblpages_ff,
tblpages, fillfactor, bs, tblid, schemaname, tblname, heappages, toastpages, is_na
-- , tpl_hdr_size, tpl_data_size, pgstattuple(tblid) AS pst -- (DEBUG INFO)
  FROM (
SELECT
   ( 4 + tpl_hdr_size + tpl_data_size + (2*ma)
      - CASE WHEN tpl_hdr_size%ma = 0 THEN ma ELSE tpl_hdr_size%ma END
      - CASE WHEN ceil(tpl_data_size)::int%ma = 0 THEN ma ELSE ceil(tpl_data_size)::int%ma END
   ) AS tpl_size, bs - page_hdr AS size_per_block, (heappages + toastpages) AS tblpages, heappages,
   toastpages, reltuples, toasttuples, bs, page_hdr, tblid, schemaname, tblname, fillfactor, is_na
   -- , tpl_hdr_size, tpl_data_size
FROM (
   SELECT
      tbl.oid AS tblid, ns.nspname AS schemaname, tbl.relname AS tblname, tbl.reltuples,
      tbl.relpages AS heappages, coalesce(toast.relpages, 0) AS toastpages,
      coalesce(toast.reltuples, 0) AS toasttuples,
      coalesce(substring(
      array_to_string(tbl.reloptions, ' ')
      FROM 'fillfactor=([0-9]+)')::smallint, 100) AS fillfactor,
      current_setting('block_size')::numeric AS bs,
      CASE WHEN version()~'mingw32' OR version()~'64-bit|x86_64|ppc64|ia64|amd64' THEN 8 ELSE 4 END AS ma,
      24 AS page_hdr,
      23 + CASE WHEN MAX(coalesce(s.null_frac,0)) > 0 THEN ( 7 + count(s.attname) ) / 8 ELSE 0::int END
         + CASE WHEN bool_or(att.attname = 'oid' and att.attnum < 0) THEN 4 ELSE 0 END AS tpl_hdr_size,
      sum( (1-coalesce(s.null_frac, 0)) * coalesce(s.avg_width, 0) ) AS tpl_data_size,
      bool_or(att.atttypid = 'pg_catalog.name'::regtype)
      OR sum(CASE WHEN att.attnum > 0 THEN 1 ELSE 0 END) <> count(s.attname) AS is_na
   FROM pg_attribute AS att
      JOIN pg_class AS tbl ON att.attrelid = tbl.oid
      JOIN pg_namespace AS ns ON ns.oid = tbl.relnamespace
      LEFT JOIN pg_stats AS s ON s.schemaname=ns.nspname
      AND s.tablename = tbl.relname AND s.inherited=false AND s.attname=att.attname
      LEFT JOIN pg_class AS toast ON tbl.reltoastrelid = toast.oid
   WHERE NOT att.attisdropped
      AND tbl.relkind in ('r','m','p')
   GROUP BY 1,2,3,4,5,6,7,8,9,10
   ORDER BY 2,3
) AS s
  ) AS s2
) AS s3
)
-- WHERE NOT is_na
-- AND tblpages*((pst).free_percent + (pst).dead_tuple_percent)::float4/100 >= 1
select * from tmp where schemaname='public';

create view  monitor.pg_stat_bloat_index_tables as
with tmp as (
SELECT current_database(), nspname AS schemaname, tblname, idxname, bs*(relpages)::bigint AS real_size,
  bs*(relpages-est_pages)::bigint AS extra_size,
  100 * (relpages-est_pages)::float / relpages AS extra_pct,
  fillfactor,
  CASE WHEN relpages > est_pages_ff
THEN bs*(relpages-est_pages_ff)
ELSE 0
  END AS bloat_size,
  100 * (relpages-est_pages_ff)::float / relpages AS bloat_pct,
  is_na
  -- , 100-(pst).avg_leaf_density AS pst_avg_bloat, est_pages, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples, relpages -- (DEBUG INFO)
FROM (
  SELECT coalesce(1 +
      ceil(reltuples/floor((bs-pageopqdata-pagehdr)/(4+nulldatahdrwidth)::float)), 0 -- ItemIdData size + computed avg size of a tuple (nulldatahdrwidth)
   ) AS est_pages,
   coalesce(1 +
      ceil(reltuples/floor((bs-pageopqdata-pagehdr)*fillfactor/(100*(4+nulldatahdrwidth)::float))), 0
   ) AS est_pages_ff,
   bs, nspname, tblname, idxname, relpages, fillfactor, is_na
   -- , pgstatindex(idxoid) AS pst, index_tuple_hdr_bm, maxalign, pagehdr, nulldatawidth, nulldatahdrwidth, reltuples -- (DEBUG INFO)
  FROM (
   SELECT maxalign, bs, nspname, tblname, idxname, reltuples, relpages, idxoid, fillfactor,
         ( index_tuple_hdr_bm +
            maxalign - CASE -- Add padding to the index tuple header to align on MAXALIGN
               WHEN index_tuple_hdr_bm%maxalign = 0 THEN maxalign
               ELSE index_tuple_hdr_bm%maxalign
            END
            + nulldatawidth + maxalign - CASE -- Add padding to the data to align on MAXALIGN
               WHEN nulldatawidth = 0 THEN 0
               WHEN nulldatawidth::integer%maxalign = 0 THEN maxalign
               ELSE nulldatawidth::integer%maxalign
            END
         )::numeric AS nulldatahdrwidth, pagehdr, pageopqdata, is_na
         -- , index_tuple_hdr_bm, nulldatawidth -- (DEBUG INFO)
   FROM (
      SELECT n.nspname, ct.relname AS tblname, i.idxname, i.reltuples, i.relpages,
            i.idxoid, i.fillfactor, current_setting('block_size')::numeric AS bs,
            CASE -- MAXALIGN: 4 on 32bits, 8 on 64bits (and mingw32 ?)
            WHEN version() ~ 'mingw32' OR version() ~ '64-bit|x86_64|ppc64|ia64|amd64' THEN 8
            ELSE 4
            END AS maxalign,
            /* per page header, fixed size: 20 for 7.X, 24 for others */
            24 AS pagehdr,
            /* per page btree opaque data */
            16 AS pageopqdata,
            /* per tuple header: add IndexAttributeBitMapData if some cols are null-able */
            CASE WHEN max(coalesce(s.stanullfrac,0)) = 0
               THEN 8 -- IndexTupleData size
               ELSE 8 + (( 32 + 8 - 1 ) / 8) -- IndexTupleData size + IndexAttributeBitMapData size ( max num filed per index + 8 - 1 /8)
            END AS index_tuple_hdr_bm,
            /* data len: we remove null values save space using it fractionnal part from stats */
            sum( (1-coalesce(s.stanullfrac, 0)) * coalesce(s.stawidth, 1024)) AS nulldatawidth,
            max( CASE WHEN a.atttypid = 'pg_catalog.name'::regtype THEN 1 ELSE 0 END ) > 0 AS is_na
      FROM (
            SELECT idxname, reltuples, relpages, tbloid, idxoid, fillfactor,
               CASE WHEN indkey=0 THEN idxoid ELSE tbloid END AS att_rel,
               CASE WHEN indkey=0 THEN i ELSE indkey END AS att_pos
            FROM (
               SELECT idxname, reltuples, relpages, tbloid, idxoid, fillfactor, indkey, generate_series(1,indnatts) AS i
               FROM (
                  SELECT ci.relname AS idxname, ci.reltuples, ci.relpages, i.indrelid AS tbloid,
                        i.indexrelid AS idxoid,
                        coalesce(substring(
                           array_to_string(ci.reloptions, ' ')
                           from 'fillfactor=([0-9]+)')::smallint, 90) AS fillfactor,
                        i.indnatts,

                        string_to_array(textin(int2vectorout(i.indkey)),' ')::int[ ] AS indkey

                  FROM pg_index i
                  JOIN pg_class ci ON ci.oid=i.indexrelid
                  WHERE ci.relam=(SELECT oid FROM pg_am WHERE amname = 'btree')
                     AND ci.relpages > 0
               ) AS idx_data
            ) AS idx_data_cross
      ) i
      JOIN pg_attribute a ON a.attrelid = i.att_rel
                           AND a.attnum = i.att_pos
      JOIN pg_statistic s ON s.starelid = i.att_rel
                           AND s.staattnum = i.att_pos
      JOIN pg_class ct ON ct.oid = i.tbloid
      JOIN pg_namespace n ON ct.relnamespace = n.oid
      GROUP BY 1,2,3,4,5,6,7,8,9,10
   ) AS rows_data_stats
  ) AS rows_hdr_pdg_stats
) AS relation_stats
) select * from tmp where schemaname='public'
ORDER BY bloat_size desc,bloat_pct desc ;

--  表膨胀 TOP 10 的表, 并且表膨胀大小要超过 10MB, 膨胀率大于 30%
select tblname from  monitor.pg_stat_bloat_tables where bloat_size >=1000000 and bloat_pct>=30 order by bloat_size desc , bloat_pct desc limit 10;

-- 索引膨胀 TOP 10 的表, 并且膨胀大小要超过 10MB, 膨胀率大于 30%
with tmp as (select * from  monitor.pg_stat_bloat_index_tables where bloat_size >=10000000 and bloat_pct>=30 order by bloat_size desc , bloat_pct desc ) select tblname from tmp group by tblname limit 10;

psql -U postgres -d $db <<"EOF"
(select tblname from  monitor.pg_stat_bloat_tables where bloat_size >=1000000 and bloat_pct>=30 order by bloat_size desc , bloat_pct desc limit 10)
union
(with tmp as (select * from  monitor.pg_stat_bloat_index_tables where bloat_size >=10000000 and bloat_pct>=30 order by bloat_size desc , bloat_pct desc ) select tblname from tmp group by tblname limit 10)
EOF

2、库表统计信息长时间未更新 TOP 10
-- 查询统计信息最久未更新的前十张表
drop view if exists monitor.pg_stat_nonanalyze_tables;
create or replace view monitor.pg_stat_nonanalyze_tables
as
SELECT
      schemaname,
      relname AS tablename,
      last_analyze,
      last_autoanalyze,
      -- 计算距离现在的时间差（天数）
      EXTRACT(EPOCH FROM (
         NOW() - COALESCE(last_analyze, '1970-01-01')
      ))/86400 AS days_since_last_manual_analyze,
      EXTRACT(EPOCH FROM (
         NOW() - COALESCE(last_autoanalyze, '1970-01-01')
      ))/86400 AS days_since_last_auto_analyze,
      -- 取两个时间差中的最大值作为排序依据
      GREATEST(
         NOW() - COALESCE(last_analyze, '1970-01-01'),
         NOW() - COALESCE(last_autoanalyze, '1970-01-01')
      ) AS max_days_since_last_analyze
FROM pg_stat_user_tables
where schemaname='public'
ORDER BY max_days_since_last_analyze DESC
LIMIT 10
;

3、库表年龄 TOP 10
drop view if exists  monitor.pg_stat_nonfreeze_tables ;
create or replace view monitor.pg_stat_nonfreeze_tables
as
SELECT
      n.nspname AS schemaname,
      c.relname AS tablename,
      age(c.relfrozenxid) AS xid_age
FROM pg_class c
JOIN pg_namespace n ON n.oid = c.relnamespace
WHERE c.relkind in ('r')          -- 普通表
      AND n.nspname ='public'
      AND age(c.relfrozenxid) > 100000000 -- 超过1亿
ORDER BY age(c.relfrozenxid) DESC
LIMIT 10;

4、巡检处理脚本
# 膨胀清理
psql -Upostgres -d postgres > to_be_repack_tables.txt -qAtX <<"EOF"
--  表膨胀 TOP 10 的表, 并且表膨胀大小要超过 10MB, 膨胀率大于 30%
(select tblname from  monitor.pg_stat_bloat_tables where bloat_size >=1000000 and bloat_pct>=30 order by bloat_size desc , bloat_pct desc limit 10)
union
-- 索引膨胀 TOP 10 的表, 并且膨胀大小要超过 10MB, 膨胀率大于 30%
(with tmp as (select * from  monitor.pg_stat_bloat_index_tables where bloat_size >=10000000 and bloat_pct>=30 order by bloat_size desc , bloat_pct desc ) select tblname from tmp group by tblname limit 10)
EOF

循环处理 to_be_repack_tables.txt 文件每一行, 并 pg_repack ; windows 环境则是 vacuum (verbose)
rm -f to_be_repack_tables.txt

# 统计信息更新
psql -Upostgres -d postgres > to_be_analyse_tables.txt -qAtX <<"EOF"
select tablename from monitor.pg_stat_nonanalyze_tables
EOF
循环处理 to_be_analyse_tables.txt 文件每一行,, 并 analyze
rm -f to_be_analyse_tables.txt

# 老表冻结
psql -Upostgres -d postgres > to_be_freeze_tables.txt -qAtX <<"EOF"
select tablename from monitor.pg_stat_nonfreeze_tables;
EOF
循环处理 to_be_freeze_tables.txt 文件每一行, 并 vacuum (freeze,verbose)
rm -f to_be_freeze_tables.txt

5、定时任务策略
定时凌晨 02:30 开始触发

30 02 * * * root flock -xn /tmp/vacuum_db.lock -c /usr/local/bin/vacuum_db.sh &>/dev/null

欢迎光临重庆思庄Oracle、KingBase、PostgreSQL、Redhat认证学习论坛 (http://bbs.cqsztech.com/)