http://www.oracle-ckpt.com/dataguard_troubleshoot_snapper/

######sample

primay scripts:

spool dg_Primary_output.log
set feedback off
set trimspool on
set line 500
set pagesize 50
column name for a30
column display_value for a60
column ID format 99
column "SRLs" format 99
column active format 99
col type format a4
column ID format 99
column "SRLs" format 99
column active format 99
col type format a4
col PROTECTION_MODE for a20
col RECOVERY_MODE for a20
col db_mode for a15
SELECT name, display_value FROM v$parameter WHERE name IN ('db_name','db_unique_name','log_archive_config','log_archive_dest_2','log_archive_dest_state_2','log_archive_dest_3','log_archive_dest_state_3','fal_client','fal_server','standby_file_management','standby_archive_dest','db_file_name_convert','log_file_name_convert','remote_login_passwordfile','local_listener','dg_broker_start','dg_broker_config_file1','dg_broker_config_file2','log_archive_max_processes','db_recovery_file_dest') order by name;
col name for a10
col DATABASE_ROLE for a10
SELECT name,db_unique_name,protection_mode,DATABASE_ROLE,OPEN_MODE,switchover_status from v$database;
select thread#,max(sequence#) from v$archived_log group by thread#;
col severity for a15
col message for a70
col timestamp for a20
select severity,error_code,to_char(timestamp,'DD-MON-YYYY HH24:MI:SS') "timestamp" , message from v$dataguard_status where dest_id in (2,3,4);
select ds.dest_id id
, ad.status
, ds.database_mode db_mode
, ad.archiver type
, ds.recovery_mode
, ds.protection_mode
, ds.standby_logfile_count "SRLs"
, ds.standby_logfile_active active
, ds.archived_seq#
from v$archive_dest_status ds
, v$archive_dest ad
where ds.dest_id = ad.dest_id
and ad.status != 'INACTIVE'
order by
ds.dest_id;
column FILE_TYPE format a20
col name format a60
select    name
,    floor(space_limit / 1024 / 1024) "Size MB"
,    ceil(space_used  / 1024 / 1024) "Used MB"
from    v$recovery_file_dest
order by name;
SELECT substr(name, 1, 30) name, space_limit/(1073741824) AS Quota_GB, space_used/(1073741824) AS Used_GB, space_reclaimable/(1073741824) AS Reclaimable_GB, number_of_files AS files FROM V$RECOVERY_FILE_DEST ;

select * from V$FLASH_RECOVERY_AREA_USAGE;

column deleted format a7
column reclaimable format a11
column FIRST_TIME format a15
column LAST_TIME format a15

set linesize 500
select applied,deleted,backup_count ,decode(rectype,11,'YES','NO') reclaimable,count(*) ,to_char(min(completion_time),'dd-mon hh24:mi') first_time ,to_char(max(completion_time),'dd-mon hh24:mi') last_time ,min(sequence#) first_seq,max(sequence#) last_seq from v$archived_log left outer join sys.x$kccagf using(recid) where is_recovery_dest_file='YES' group by applied,deleted,backup_count,decode(rectype,11,'YES','NO') order by min(sequence#)

spool off

standby scripts:
spool dg_standby_output.log
set feedback off
set trimspool on
set line 500
set pagesize 50
set linesize 200
column name for a30
column display_value for a30
col value for a10
col PROTECTION_MODE for a15
col DATABASE_Role for a15
SELECT name, display_value FROM v$parameter WHERE name IN ('db_name','db_unique_name','log_archive_config','log_archive_dest_2','log_archive_dest_state_2','log_archive_dest_3','log_archive_dest_state_3','fal_client','fal_server','standby_file_management','standby_archive_dest','db_file_name_convert','log_file_name_convert','remote_login_passwordfile','local_listener','dg_broker_start','dg_broker_config_file1','dg_broker_config_file2','log_archive_max_processes','db_recovery_file_dest') order by name;
col name for a10 col DATABASE_ROLE for a10 
SELECT name,db_unique_name,protection_mode,DATABASE_ROLE,OPEN_MODE from v$database;
select thread#,max(sequence#) from v$archived_log where applied='YES' group by thread#;
select process, status,thread#,sequence# from v$managed_standby;
SELECT ARCH.THREAD# "Thread", ARCH.SEQUENCE# "Last Sequence Received", APPL.SEQUENCE# "Last Sequence Applied", (ARCH.SEQUENCE# - APPL.SEQUENCE#) "Difference" FROM (SELECT THREAD# ,SEQUENCE# FROM V$ARCHIVED_LOG WHERE (THREAD#,FIRST_TIME ) IN (SELECT THREAD#,MAX(FIRST_TIME) FROM V$ARCHIVED_LOG GROUP BY THREAD#)) ARCH, (SELECT THREAD# ,SEQUENCE# FROM V$LOG_HISTORY WHERE (THREAD#,FIRST_TIME ) IN (SELECT THREAD#,MAX(FIRST_TIME) FROM V$LOG_HISTORY GROUP BY THREAD#)) APPL WHERE ARCH.THREAD# = APPL.THREAD# ORDER BY 1;
col name for a30
select * from v$dataguard_stats;
select * from v$archive_gap;
col name format a60
select name,floor(space_limit / 1024 / 1024) "Size MB" ,ceil(space_used  / 1024 / 1024) "Used MB" from    v$recovery_file_dest order by name;
SELECT substr(name, 1, 30) name, space_limit/(1073741824) AS Quota_GB, space_used/(1073741824) AS Used_GB, space_reclaimable/(1073741824) AS Reclaimable_GB, number_of_files AS files FROM V$RECOVERY_FILE_DEST ;
select * from V$FLASH_RECOVERY_AREA_USAGE;
spool off

workaroud:

- Cancel Managed Recovery
select open_mode from v$database;

SQL> alter database recover managed standby database cancel;
- Shutdown the Standby Database
SQL> shutdown immediate

SQL> startup mount;
SQL> alter database recover managed standby database using current logfile disconnect;

alter database recover managed standby database disconnect;

##########sample 0

启用lgwr传输redolog,在主库查询应该有lgwr进程传输日志,而不是arch进程,

->11G 但是在async模式下,使用的是LNS进程传输redolog.也就是real-time apply.  或者是是arch传送。 
->11g 在sync模式下使用的是lgwr传输redolog如下:

SQL> select process,status,thread#,sequence#,block#,blocks from v$managed_standby;

PROCESS STATUS THREAD# SEQUENCE# BLOCK# BLOCKS

--------- ------------ ---------- ---------- ---------- ----------

ARCH CLOSING 1 11110 22528 304

ARCH CLOSING 1 11111 59392 1713

ARCH CLOSING 1 11106 1 1583

ARCH CLOSING 1 11109 12288 578

LNS WRITING 1 11112 42441 4

###### sample

REM Script to Report the Redo Log Switch History

set linesize 200 pagesize 1000
column day format a3
column total format 9999
column h00 format 999
column h01 format 999
column h02 format 999
column h03 format 999
column h04 format 999
column h04 format 999
column h05 format 999
column h06 format 999
column h07 format 999
column h08 format 999
column h09 format 999
column h10 format 999
column h11 format 999
column h12 format 999
column h13 format 999
column h14 format 999
column h15 format 999
column h16 format 999
column h17 format 999
column h18 format 999
column h19 format 999
column h20 format 999
column h21 format 999
column h22 format 999
column h23 format 999
column h24 format 999
break on report
compute max of "total" on report
compute max of "h01" on report
compute max of "h02" on report
compute max of "h03" on report
compute max of "h04" on report
compute max of "h05" on report
compute max of "h06" on report
compute max of "h07" on report
compute max of "h08" on report
compute max of "h09" on report
compute max of "h10" on report
compute max of "h11" on report
compute max of "h12" on report
compute max of "h13" on report
compute max of "h14" on report
compute max of "h15" on report
compute max of "h16" on report
compute max of "h17" on report
compute max of "h18" on report
compute max of "h19" on report
compute max of "h20" on report
compute max of "h21" on report
compute max of "h22" on report
compute max of "h23" on report
compute sum of NUM on report
compute sum of GB on report
compute sum of MB on report
compute sum of KB on report

alter session set nls_date_format='DD MON YYYY';
select thread#, trunc(first_time) as "date", to_char(first_time,'Dy') as "Day", count(1) as "total",
sum(decode(to_char(first_time,'HH24'),'00',1,0)) as "h00",
sum(decode(to_char(first_time,'HH24'),'01',1,0)) as "h01",
sum(decode(to_char(first_time,'HH24'),'02',1,0)) as "h02",
sum(decode(to_char(first_time,'HH24'),'03',1,0)) as "h03",
sum(decode(to_char(first_time,'HH24'),'04',1,0)) as "h04",
sum(decode(to_char(first_time,'HH24'),'05',1,0)) as "h05",
sum(decode(to_char(first_time,'HH24'),'06',1,0)) as "h06",
sum(decode(to_char(first_time,'HH24'),'07',1,0)) as "h07",
sum(decode(to_char(first_time,'HH24'),'08',1,0)) as "h08",
sum(decode(to_char(first_time,'HH24'),'09',1,0)) as "h09",
sum(decode(to_char(first_time,'HH24'),'10',1,0)) as "h10",
sum(decode(to_char(first_time,'HH24'),'11',1,0)) as "h11",
sum(decode(to_char(first_time,'HH24'),'12',1,0)) as "h12",
sum(decode(to_char(first_time,'HH24'),'13',1,0)) as "h13",
sum(decode(to_char(first_time,'HH24'),'14',1,0)) as "h14",
sum(decode(to_char(first_time,'HH24'),'15',1,0)) as "h15",
sum(decode(to_char(first_time,'HH24'),'16',1,0)) as "h16",
sum(decode(to_char(first_time,'HH24'),'17',1,0)) as "h17",
sum(decode(to_char(first_time,'HH24'),'18',1,0)) as "h18",
sum(decode(to_char(first_time,'HH24'),'19',1,0)) as "h19",
sum(decode(to_char(first_time,'HH24'),'20',1,0)) as "h20",
sum(decode(to_char(first_time,'HH24'),'21',1,0)) as "h21",
sum(decode(to_char(first_time,'HH24'),'22',1,0)) as "h22",
sum(decode(to_char(first_time,'HH24'),'23',1,0)) as "h23"
from
v$archived_log
where first_time > trunc(sysdate-10)
group by thread#, trunc(first_time), to_char(first_time, 'Dy') order by 2,1;

REM Script to calculate the archive log size generated per day for each Instances

select THREAD#, trunc(first_time) as "DATE"
, count(1) num
, trunc(sum(blocks*block_size)/1024/1024/1024) as GB
, trunc(sum(blocks*block_size)/1024/1024) as MB
, sum(blocks*block_size)/1024 as KB
from v$archived_log
where first_time > trunc(sysdate-1)
group by thread#, trunc(first_time)
order by 2,1
;

########

###sample

FRA 区 无法自动删除归档 。

今天检查发现,状态正常了,应该跟log_archive_dest_state_2 之前一直是开启的状态,但是指向的sdt1备库已不存在,把这个log_archive_dest_state_2参数关闭后,恢复正常了。同时删除修改参数之前的归档日志即可。

CONFIGURE ARCHIVELOG DELETION POLICY TO APPLIED ON ALL STANDBY;

SQL>  SELECT substr(name, 1, 30) name, space_limit/(1073741824) AS Quota_GB, space_used/(1073741824) AS Used_GB, space_reclaimable/(1073741824) AS Reclaimable_GB, number_of_files AS files FROM V$RECOVERY_FILE_DEST ;

NAME   QUOTA_GB    USED_GB RECLAIMABLE_GB      FILES

------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ ---------- ---------- -------------- ----------

/fra 36 .265626907     .248462677         80

SYMPTOMS

*** Reviewed for Relevance 15-Jul-2015 ***

May encounter different types of errors depending on the situations:

  • Logs are not shipped at log switch or fal archiving fails
  • Value of applied column on the primary is not consistent as that on the standby
  • RMAN does not delete logs and results in error RMAN-08137
  • In some rare cases ORA-12154 may show up in the primary alert log
  • Other possible errors ORA-3113, ORA-3135
  • WARN: ARCH: Terminating pid <PID> hung on an I/O operation
    krsv_proc_kill: Killing 1 processes (Process by index)

CHANGES

Problem usually occurs after OS or network errors, or restarting the primary or standby instance or reboot the primary or standby node that abruptly crashes log shipping between the primary and standby

CAUSE

ARCx processes on the primary stuck on the network forever or that are responsible to update the APPLIED column get stuck and can not recover themselves.
Additionally these processes that may be used for local and remote archiving, heartbeat and FAL fetching logs on the primary. 
So when they are all stuck and reach the maximum number of values specified in log_archive_max_processes, they can cause ambiguous errors as shown above.

The worst case would be all ARCx processes on the primary are stuck and they couldn't do local archiving, so that all online redo log files are full which causes the primary database hangs.

This may be due to standby db crash, network errors or some abrupt outage on the standby or primary.

The other common cause is the firewall.

SOLUTION

ARCx processes on the primary need to be restarted.

Assuming that log transport from the primary is configured by log_archive_dest_2.

Please perform the following:

1) If the Data Guard Broker is running, disable Data Guard Broker on both primary and standby:

SQL> alter system set dg_broker_start=FALSE;

2) On the Primary Database:

- Set log transport state to DEFER status:
SQL> alter system set log_archive_dest_state_2='defer';
SQL> alter system switch logfile;
- Reset log_archive_dest_2
SQL> show parameter log_archive_dest_2
SQL> alter system set log_archive_dest_2 = '';
- Switch logfiles on the Primary
SQL> alter system switch logfile;

3) On the Standby Database:

- Cancel Managed Recovery
SQL> alter database recover managed standby database cancel;
- Shutdown the Standby Database
SQL> shutdown immediate

4) On the Primary: kill the ARCx Processes and the Database will respawn them automatically immediately without harming it.

ps -ef | grep -i arc
kill -9 <ospid of ARC process> <another ospid of ARC process> ...

5) On standby server, startup Standby Database and resume Managed Recovery

SQL> startup mount;
SQL> alter database recover managed standby database [using current logfile] disconnect;

6) Re-enable Log Transport Services on the Primary:

SQL> alter system set log_archive_dest_state_2='enable' ;

At this point all the ARCx processes should be up and running on the Primary.

7) Re-enable the Data Guard Broker for both, Primary and Standby if applicable:

SQL> alter system set dg_broker_start=true;

8) Please work with your Network Administrator to make sure the following Firewall Features are disabled.

  • SQLNet fixup protocol
  • Deep Packet Inspection (DPI)
  • SQLNet packet inspection
  • SQL Fixup
  • SQL ALG (Juniper firewall)

NOTE : To enable SQLNET tracing refer,

tep by Step Method to Enable SQLNET(Server Tracing) Dynamically on Dataguard (Doc ID 2010476.1)

The wording and features can vary by vendor but all the above have some impact on some packets (not all packets are affected).
Some firewalls can have an adverse effect on certain SQL packets transported across them (again, some not all).

#####11G ADG 监控

http://blog.itpub.net/29376842/viewspace-1222115/

监控 real-time apply 实时同步的dataguard 是否正常同步:

select to_char(SYSDATE,'yyyymmdd hh24:mi:ss') CTIME,NAME,VALUE,DATUM_TIME 
 from V$DATAGUARD_STATS  
 WHERE NAME LIKE '%lag';

其中如果apply lag对应的value大于0,那么就需要注意检查是否同步正常。如果相差
时间非常多,value值等于几分钟,甚至达到1个小时,那可能实时同步有问题,需要
检查 alert log 文件,其中会提示是否启动了real time apply, 检查最近的log,可以
发现: 
alter database recover managed standby database using current logfile disconnect from session 
MRP0 started with pid=20, OS id=647263927 
Managed Standby Recovery starting Real Time Apply

查询备库是否有MRP0进程: 
select * from v$managed_standby where process = 'MRP0';

说明实时同步已经开启,那么如果apply lag 还是常常大于0,那么可能是DG上standby redo log
的问题,如果standby redo log增加不正确,那么可以在alert log中找到类似如下的语句: 
RFS[1]: No Standby redo logfiles created for thread 1

那么可能是standby redo log没有在DG上创建,也有可能创建了,但是thread错误导致。 可以通过
select GROUP#, THREAD#, SEQUENCE#, STATUS from  v$standby_log  查看, 如果创建了standby 
redo log , 那么在应用redo时,一定有至少1个standby redo log的状态是 ACTIVE 的,其他大多数
是 unassigned .  如果都是 UNASSIGNED ,那么就是standby redo log的问题。

可以通过如下方式重新加入: 
shut immediate  
startup mount

alter database add standby logfile thread 1 group 4  '/opt/oracle/oradata/test/standby_redo04.log ' size 1024M;                                   
alter database add standby logfile thread 1 group 5  '/opt/oracle/oradata/test/standby_redo05.log ' size 1024M;  
....

最后再开启为read only with apply的方式:

startup nomount
alter database mount standby database ; 
alter database open read only ; 
SQL> select open_mode from v$database; 
OPEN_MODE 
--------------------
READ ONLY

SQL> alter database recover managed standby database using current logfile disconnect from session;
SQL> select open_mode from v$database;  
OPEN_MODE
--------------------
READ ONLY WITH APPLY

还有一种影响同步的是,apply 同步出现错误 , 可以查询 select error from  v$archive_dest 查看是否有同步错误 。

如果需要暂停或取消同步apply :  
alter database recover managed standby database cancel ;

转 Logs are not shipped to the physical standby database的更多相关文章

  1. 【原】Configuring Oracle Data Guard In Physical Standby Database

    作者:david_zhang@sh [转载时请以超链接形式标明文章] http://www.cnblogs.com/david-zhang-index/p/5042640.html参照文档:https ...

  2. Physical Standby Database Failover

    1.物理standby failover 切换 故障转移时在一些糟糕的事情发生时执行的计划外事件,需要将生产库移动到DR站点.有意思的是,这时候人们通常忙来忙去,试图弄明白发生了什么,需要做些什么才能 ...

  3. Flashing Back a Failed Primary Database into a Physical Standby Database(闪回FAILOVER失败的物理备库)

    文档操作依据来自官方网址:https://docs.oracle.com/cd/E11882_01/server.112/e41134/scenarios.htm#SBYDB4888 闪回FAILOV ...

  4. Creating a Physical Standby Database 11g

    1.Environment Item Primary database standby database Platform Redhat 5.4 Redhat 5.4 Hostname gc1 gc2 ...

  5. Oracle_Data_Gard Create a physical standby database

    创建之前要对DG的环境有一个总体的规划和了解.                                                   规划 IP 192.168.3.161 192.16 ...

  6. Data Guard Physical Standby - RAC Primary to RAC Standby 使用第二个网络 (Doc ID 1349977.1)

    Data Guard Physical Standby - RAC Primary to RAC Standby using a second network (Doc ID 1349977.1) A ...

  7. 18c & 19c Physical Standby Switchover Best Practices using SQL*Plus (Doc ID 2485237.1)

    18c & 19c Physical Standby Switchover Best Practices using SQL*Plus (Doc ID 2485237.1) APPLIES T ...

  8. 11.2 Data Guard Physical Standby Switchover Best Practices using SQL*Plus (Doc ID 1304939.1)

    11.2 Data Guard Physical Standby Switchover Best Practices using SQL*Plus (Doc ID 1304939.1) APPLIES ...

  9. ORACLE 11gR2 DG(Physical Standby)日常维护01

    环境:RHEL 6.4 + Oracle 11.2.0.4 一.主备手工切换 1.1 主库,切换成备库并启动到mount 1.2 备库,切换成主库并启动到open 1.3 新的备库启动日志应用 二.重 ...

随机推荐

  1. 101334E Exploring Pyramids

    传送门 题目大意 看样例,懂题意 分析 实际就是个区间dp,我开始居然不会...详见代码(代码用的记忆化搜索) 代码 #include<iostream> #include<cstd ...

  2. Entity Framework Tutorial Basics(6):Model Browser

    Model Browser: We have created our first Entity Data Model for School database in the previous secti ...

  3. 5.移动渗透测试工具之drozer

    本篇博文主要来介绍drozer在移动渗透中的作用 本次实验环境所用工具为:夜神模拟器,drozer-2.3.4,sieve.apk,adb drozer安装这里不再多嘴,给出链接:https://ww ...

  4. Paint类的介绍

    * Paint即画笔,在绘图过程中起到了极其重要的作用,画笔主要保存了颜色, * 样式等绘制信息,指定了如何绘制文本和图形,画笔对象有很多设置方法, * 大体上可以分为两类,一类与图形绘制相关,一类与 ...

  5. 利用using和try/finally语句来清理资源

    使用非托管资源的类型必须实现IDisposable接口的Dispose()方法来精确的释放系统资源..Net环境的这一规则使得释放资源代码的职责 是类型的使用者,而不是类型或系统.因此,任何时候你在使 ...

  6. idea中解决Error:java: Compilation failed: internal java compiler error的问题

    项目中,使用gradle做项目构建,当我们想更改JDK的版本时,报以下错误: Information:Using javac 1.8.0_111 to compile java sourcesInfo ...

  7. 小小c#算法题 - 7 - 堆排序 (Heap Sort)

    在讨论堆排序之前,我们先来讨论一下另外一种排序算法——插入排序.插入排序的逻辑相当简单,先遍历一遍数组找到最小值,然后将这个最小值跟第一个元素交换.然后遍历第一个元素之后的n-1个元素,得到这n-1个 ...

  8. 在Tomcat启动时直接创建servlet(二)

  9. Codeforces 917B MADMAX (DP+博弈)

    <题目链接> 题目大意:给定一个DAG图,其中图的边权是给定的字符所对应的ascii码,现在A先手,B后手,每次沿DAG图走一步,但是第i次走的边权一定要大于等于第i-1次走的边权(这里是 ...

  10. ubuntu - 14.04,该如何分区安装(初学者或不用它作为生成环境使用)?

    ubuntu14.04,实际上现在它的安装很简单了,全图形界面,可以选择母语,但是实际使用起来如果分区不当,会让我们付出惨痛的代价,那么我们应该怎么分区安装呢? 如果我们并不是把它作为专业的服务器,或 ...