[Linux-HA] How to allow resources to ping-pong forever?

Dominik Klein dk at in-telegence.net
Mon Mar 3 03:56:11 MST 2008


Alex Spengler wrote:
> Hi,
> 
> I'm stuck in setting up my cluster.
> What I want to achive is
> - run apache on whatever node together with cluster IP which is
> 172.23.100.200.
> - if apache fails -> switch over to other node
> - if gateway 172.23.100.1 is not reachable -> switch over to other node
> 
> AND allow unlimited number of switchovers!
> This is the problem, it does switch over but only once or twice and then
> it's stuck .. any ideas?

*unlimited* is pretty much impossible I think.

But you may start with this:

Set a #uname score of 5050 for one node, 5000 for the other node.
Set resource_failure_stickiness to -100.
Use a multiplier of 100 for pingd and pingd as a score_attribute (sth like:
        <rsc_location id="rsc-loc-syslog" rsc="syslog">
                <rule id="syslog-connected" score_attribute="pingd">
                        <expression id="syslog-connected-rule-1" 
attribute="pingd" operation="defined"/>
                </rule>
        </rsc_location>
)

Then you will end up with:

Startup
node1: 5050 + 100 pingd
node2: 5000 + 100 pingd
decision: start on node1
now, whatever fails will reduce the score by 100, causing a failover.

With a start at 5000, you can have 50 failovers. Enlarge as needed.

Regards
Dominik

> thanks in advance
> Alex
> 
> *Here my config:*
> *Node1 - big-sSTATSfe1*
> eth0: 172.23.100.26
> eth1: 192.168.0.1
> 
> *Node2 - big-sSTATSfe2*
> eth0: 172.23.100.22
> eth1:192.168.0.2
> 
> *ha.cf:*
> use_logd yes
> node big-sSTATSfe1 big-sSTATSfe2
> deadtime 5
> deadping 5
> initdead 60
> warntime 3
> crm true
> ucast eth0 172.23.100.22                 # 172.23.100.26 on the second node
> ucast eth1 192.168.0.2                # 192.168.0.1 on the second node
> ping 172.23.100.1
> 
> *cib.xml*
> <cib admin_epoch="1" epoch="1" num_updates="1" generated="true"
> have_quorum="true" ignore_dtd="false" num_peers="2"
> dc_uuid="68cd29ed-c7fe-44d9-9fe8-a1258e5b1d0f">
>  <configuration>
>   <crm_config>
>    <cluster_property_set id="cib-bootstrap-options">
>     <attributes>
>      <nvpair id="cib-bootstrap-options-symmetric-cluster"
> name="symmetric-cluster" value="true"/>
>      <nvpair id="cib-bootstrap-options-no-quorum-policy"
> name="no-quorum-policy" value="ignore"/>
>      <nvpair id="cib-bootstrap-options-default-resource-stickiness"
> name="default-resource-stickiness" value="0"/>
>      <nvpair id="cib-bootstrap-options-default-resource-failure-stickiness"
> name="default-resource-failure-stickiness" value="-100"/>
>      <nvpair id="cib-bootstrap-options-stonith-enabled"
> name="stonith-enabled" value="true"/>
>      <nvpair id="cib-bootstrap-options-stonith-action" name="stonith-action"
> value="reboot"/>
>      <nvpair id="cib-bootstrap-options-remove-after-stop"
> name="remove-after-stop" value="false"/>
>      <nvpair id="cib-bootstrap-options-short-resource-names"
> name="short-resource-names" value="true"/>
>      <nvpair id="cib-bootstrap-options-transition-idle-timeout"
> name="transition-idle-timeout" value="1min"/>
>      <nvpair id="cib-bootstrap-options-default-action-timeout"
> name="default-action-timeout" value="10s"/>
>      <nvpair id="cib-bootstrap-options-is-managed-default"
> name="is-managed-default" value="true"/>
>     </attributes>
>    </cluster_property_set>
>   </crm_config>
>   <nodes/>
>   <resources>
>    <group id="apache_group_p80" ordered="true" collocated="true">
>     <primitive class="ocf" provider="heartbeat" type="IPaddr"
> id="IPaddr_p80">
>      <instance_attributes id="IPaddr_1_inst_attr">
>       <attributes>
>        <nvpair id="IPaddr_p80_attr_0" name="ip" value="172.23.100.200"/>
>        <nvpair id="IPaddr_p80_attr_1" name="netmask" value="255.255.255.0"/>
>        <nvpair id="IPaddr_p80_attr_2" name="nic" value="eth0"/>
>        <nvpair id="IPaddr_p80_attr_3" name="broadcast" value="172.23.100.255
> "/>
>       </attributes>
>      </instance_attributes>
>      <operations>
>       <op id="IPaddr_p80_mon" name="monitor" interval="2s" timeout="3s"/>
>      </operations>
>     </primitive>
>     <primitive id="apache_p80" class="lsb" type="apache"
> provider="heartbeat">
>      <instance_attributes id="inatt_apache_p80">
>       <attributes>
>        <nvpair name="configfile" value="/etc/httpd/conf/httpd.conf"
> id="nvpb1_apache_p80"/>
>        <nvpair name="statusurl" value="
> http://172.23.100.200:80/server-status <http://172.23.100.200/server-status>"
> id="nvpb2_apache_p80"/>
>       </attributes>
>      </instance_attributes>
>      <operations>
>       <op id="apache_p80:start" name="start" timeout="10s"/>
>       <op id="apache_p80:stop" name="stop" timeout="10s"/>
>       <op id="apache_p80:monitor" name="monitor" interval="2s"
> timeout="5s"/>
>      </operations>
>     </primitive>
>    </group>
>    <clone id="pingd">
>     <instance_attributes id="pingd">
>      <attributes>
>       <nvpair id="pingd-clone_max" name="clone_max" value="2"/>
>       <nvpair id="pingd-clone_node_max" name="clone_node_max" value="1"/>
>      </attributes>
>     </instance_attributes>
>     <primitive id="gateway" class="ocf" type="pingd" provider="heartbeat">
>      <operations>
>       <op id="gateway:child-monitor" name="monitor" interval="5s"
> timeout="5s" prereq="nothing"/>
>       <op id="gateway:child-start" name="start" prereq="nothing"/>
>      </operations>
>      <instance_attributes id="pingd_inst_attrs">
>       <attributes>
>        <nvpair id="pingd-dampen" name="dampen" value="5s"/>
>        <nvpair id="pingd-multiplier" name="multiplier" value="100"/>
>       </attributes>
>      </instance_attributes>
>     </primitive>
>    </clone>
>   </resources>
>   <constraints>
>    <rsc_colocation id="colocation_apache_group_p80" from="apache_group_p80"
> to="apache_group_p80" score="INFINITY"/>
>    <rsc_location id="gateway:connected" rsc="apache_group_p80">
>     <rule id="gateway:connected:rule" score="-INFINITY" boolean_op="or">
>      <expression id="gateway:connected:expr:undefined" attribute="pingd"
> operation="not_defined"/>
>      <expression id="gateway:connected:expr:zero" attribute="pingd"
> operation="lte" value="0"/>
>     </rule>
>    </rsc_location>
>   </constraints>
>  </configuration>
>  <status/>
> </cib>
> _______________________________________________
> Linux-HA mailing list
> Linux-HA at lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha
> See also: http://linux-ha.org/ReportingProblems
> 


More information about the Linux-HA mailing list