[Linux-HA] simple ip failover problem

Sun Jiang Dong hasjd at cn.ibm.com
Thu Oct 13 04:08:58 MDT 2005


Hartmaier Alexander wrote:
> Hi!
> We use heartbeat 1.x for several years now for our RedHat based firewall
> distro.
> I try to convert our current config to 2.x and have problems to do so.
Which 2.x version are you using? 2.0.2 is the latest stable version.

> 
> Using IPaddr2 doesn't work, because I get the error:
> Oct 11 14:46:30 hbtest1 lrmd: [6989]: ERROR: Process 6994 failed to redirect
> stdout for its background child (daemon) processes. This will likely cause
> those processes to die mysteriously at some later time (terminated by signal
> SIGPIPE).
I fixed the bug just now. Please try the CVS version of IPaddr2.

> 
> When using IPaddr I have to kill the process '/bin/sh 
> /usr/lib/ocf/resource.d//heartbeat/IPaddr start' to be able to stop heartbeat 
> cause it hangs.
how long did it hangs?

 From the log, it seems there are too many operations deposited in lrmd.
But I cannot judge what causes this. If you can open the debug option in 
/etc/ha.d/ha.cf ( add " debug 1 "), the log should be more helpful.


> 
> Here are my current config files:
> 
> /etc/ha.d/ha.cf
> 
> #logfacility    local0
> #keepalive 2
> #deadtime 10
> #initdead 20
> # heartbeat communication-port
> udpport         694
> # heartbeat communication.interface
> #bcast          eth1
> ucast           eth1    10.30.8.93
> 
> auto_failback   on
> 
> crm             yes
> use_logd        on
> 
> respawn         hacluster       /usr/lib/heartbeat/ipfail
> ping            10.30.8.90
> 
> # heartbeat nodes
> node            hbtest1.srv.dsh.at
> node            hbtest2.srv.dsh.at
> 
> 
> 
> /var/lib/heartbeat/crm/cib.xml:
> 
> <cib dc_uuid="bf4c20b8-6131-4c6d-8f10-3a512e012fa1" cib_feature_revision="1" 
> admin_epoch="0" epoch="9" num_updates="109" have_quorum="true" 
> last_written="Tue Oct 11 15:08:25 2005
> " generated="true" num_peers="1" origin="hbtest1.srv.dsh.at" 
> debug_source="finalize_join" ccm_transition="1">
>    <configuration>
>      <crm_config>
>        <nvpair id="transition_idle_timeout" name="transition_idle_timeout" 
> value="20s"/>
>        <nvpair id="symmetric_cluster" name="symmetric_cluster" value="true"/>
>        <nvpair id="stonith_enabled" name="stonith_enabled" value="false"/>
>        <nvpair id="no_quorum_policy" name="no_quorum_policy" value="stop"/>
>        <nvpair id="suppress_cib_writes" name="suppress_cib_writes" 
> value="false"/>
>        <nvpair id="default_resource_stickiness" 
> name="default_resource_stickiness" value="INFINITY"/>
>        <nvpair id="require_quorum" name="require_quorum" value="true"/>
>      </crm_config>
>      <nodes>
>        <node id="bf4c20b8-6131-4c6d-8f10-3a512e012fa1" 
> uname="hbtest1.srv.dsh.at" type="member"/>
>      </nodes>
>      <resources>
>        <group id="group_1">
>          <primitive id="IPaddr_1" class="ocf" type="IPaddr" 
> provider="heartbeat">
>            <operations>
>              <op id="1" name="monitor" interval="5s" timeout="3s"/>
>            </operations>
>            <instance_attributes>
>              <attributes>
>                <nvpair id="1" name="ip" value="10.30.8.93"/>
>              </attributes>
>            </instance_attributes>
>          </primitive>
>        </group>
>      </resources>
>      <constraints>
>        <rsc_location id="rsc_location_group_1" rsc="group_1">
>          <rule id="prefered_location_group_1" score="100">
>            <expression id="1" attribute="#uname" operation="eq" 
> value="hbtest1.srv.dsh.at"/>
>          </rule>
>        </rsc_location>
>      </constraints>
>    </configuration>
>    <status>
>      <node_state id="bf4c20b8-6131-4c6d-8f10-3a512e012fa1" 
> uname="hbtest1.srv.dsh.at" in_ccm="true" join="member" origin="do_lrm_query" 
> crmd="online" ha="active" expected="member"
> 
>        <lrm>
>          <lrm_resources>
>            <lrm_resource id="group_1:IPaddr_1" last_op="stop" 
> rsc_state="stopped" rc_code="0" op_status="0">
>              <lrm_rsc_op id="group_1:IPaddr_1_start_0" operation="start" 
> origin="do_update_resource" 
> transition_key="0:68c9ef46-ab25-4479-8fc5-545f249c3535" transition_magic="0:0:
> 68c9ef46-ab25-4479-8fc5-545f249c3535" rsc_state="running" call_id="2" 
> rc_code="0" op_status="0"/>
>              <lrm_rsc_op id="group_1:IPaddr_1_stop_0" operation="stop" 
> origin="do_update_resource" 
> transition_key="5:68c9ef46-ab25-4479-8fc5-545f249c3535" 
> transition_magic="0:5:68
> c9ef46-ab25-4479-8fc5-545f249c3535" rsc_state="stopped" call_id="7" 
> rc_code="0" op_status="0"/>
>            </lrm_resource>
>          </lrm_resources>
>        </lrm>
>      </node_state>
>    </status>
>  </cib>
> 
> 
> /var/log/cluster.log is attached.
> 
> With best regards
> Alexander Hartmaier
> 
> T-Systems Austria GesmbH
> Rennweg 97-99
> A-1030 Vienna
> 
> 
> 
> 
> ------------------------------------------------------------------------
> 
> _______________________________________________
> Linux-HA mailing list
> Linux-HA at lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha
> See also: http://linux-ha.org/ReportingProblems

-- 
BRs,

Sun Jiang Dong




More information about the Linux-HA mailing list