# ==========================================
# 2. 备库同步状态与延迟监控 (单位:秒)
# ==========================================
[[metric]]
context = "dg_lag"
labels = ["lag_name"]
request = "select replace(lower(name), ' ', '_') as lag_name, (extract(day from cast(value as interval day(2) to second(0))) * 86400 + extract(hour from cast(value as interval day(2) to second(0))) * 3600 + extract(minute from cast(value as interval day(2) to second(0))) * 60 + extract(second from cast(value as interval day(2) to second(0)))) as lag_seconds from v$dataguard_stats where name in ('apply lag', 'transport lag')"
metricsdesc = { lag_seconds = "Data Guard lag time in seconds" }
[[metric]]
context = "dg_mrp_status"
request = "select count(*) as mrp_active from v$managed_standby where process = 'MRP0' and status = 'APPLYING_LOG'"
metricsdesc = { mrp_active = "MRP process status: 1=Applying Log(Syncing), 0=Not Syncing" }
# ==========================================
# 3. ASM 磁盘组容量与使用率监控
# ==========================================
[[metric]]
context = "asm_dg"
labels = ["group_name"]
request = "select name as group_name, total_mb, free_mb, round((total_mb - free_mb) / decode(total_mb, 0, 1, total_mb) * 100, 2) as use_pct from v$asm_diskgroup"
metricsdesc = { total_mb = "ASM disk group total size in MB", free_mb = "ASM disk group free size in MB", use_pct = "ASM disk group utilization percentage" }
# ==========================================
# 4. FRA (闪回区/归档区) 容量监控
# ==========================================
[[metric]]
context = "fra_usage"
request = "select name as fra_name, space_limit, space_used, round((space_used / decode(space_limit, 0, 1, space_limit)) * 100, 2) as used_pct from v$recovery_file_dest"
metricsdesc = { space_limit = "FRA total size in bytes", space_used = "FRA used size in bytes", used_pct = "FRA utilization percentage" }
# ==========================================
# 5. DG 归档传输链路状态 (仅在主库生效)
# ==========================================
[[metric]]
context = "dg_transport_dest"
labels = ["dest_id"]
request = "select to_char(dest_id) as dest_id, decode(error, null, 0, 1) as has_error from v$archive_dest where target = 'STANDBY' and status != 'INACTIVE'"
metricsdesc = { has_error = "Archive destination error status: 0=Normal, 1=Error(Broken Link)" }
# ==========================================
# 6. RAC 节点间通信延迟 (Cache Fusion)
# ==========================================
[[metric]]
context = "rac_interconnect"
labels = ["metric_name"]
request = "select replace(lower(metric_name), ' ', '_') as metric_name, value from v$sysmetric where metric_name in ('Global Cache Average CR Get Time', 'Global Cache Average Current Get Time')"
metricsdesc = { value = "RAC Global Cache Get Time (Centiseconds)" }
# ==========================================
# 7. RMAN 备份状态监控
# ==========================================
[[metric]]
context = "rman_backup"
request = "select nvl(min(round((sysdate - end_time) * 24, 2)), 999) as hours_since_last_backup from v$rman_status where operation = 'BACKUP' and status = 'COMPLETED'"
metricsdesc = { hours_since_last_backup = "Hours since last successful RMAN backup" }
# ==========================================
# 8. 业务对象失效监控
# ==========================================
[[metric]]
context = "invalid_objects"
request = "select count(*) as invalid_count from dba_objects where status = 'INVALID' and owner not in ('SYS', 'SYSTEM', 'OUTLN', 'DBSNMP', 'APPQOSSYS', 'WMSYS', 'EXFSYS', 'CTXSYS', 'XDB', 'ORDSYS')"
metricsdesc = { invalid_count = "Number of invalid objects in business schemas" }
# ==========================================
# 9. 阻塞锁数量监控 (使用 v$session 极速版)
# ==========================================
[[metric]]
context = "blocking_locks"
request = "select count(*) as block_count from v$session where blocking_session is not null and status = 'ACTIVE'"
metricsdesc = { block_count = "Number of blocked sessions" }