[Linux-HA] How to allow resources to ping-pong forever?
Dominik Klein
dk at in-telegence.net
Mon Mar 3 03:56:11 MST 2008
Alex Spengler wrote:
> Hi,
>
> I'm stuck in setting up my cluster.
> What I want to achive is
> - run apache on whatever node together with cluster IP which is
> 172.23.100.200.
> - if apache fails -> switch over to other node
> - if gateway 172.23.100.1 is not reachable -> switch over to other node
>
> AND allow unlimited number of switchovers!
> This is the problem, it does switch over but only once or twice and then
> it's stuck .. any ideas?
*unlimited* is pretty much impossible I think.
But you may start with this:
Set a #uname score of 5050 for one node, 5000 for the other node.
Set resource_failure_stickiness to -100.
Use a multiplier of 100 for pingd and pingd as a score_attribute (sth like:
<rsc_location id="rsc-loc-syslog" rsc="syslog">
<rule id="syslog-connected" score_attribute="pingd">
<expression id="syslog-connected-rule-1"
attribute="pingd" operation="defined"/>
</rule>
</rsc_location>
)
Then you will end up with:
Startup
node1: 5050 + 100 pingd
node2: 5000 + 100 pingd
decision: start on node1
now, whatever fails will reduce the score by 100, causing a failover.
With a start at 5000, you can have 50 failovers. Enlarge as needed.
Regards
Dominik
> thanks in advance
> Alex
>
> *Here my config:*
> *Node1 - big-sSTATSfe1*
> eth0: 172.23.100.26
> eth1: 192.168.0.1
>
> *Node2 - big-sSTATSfe2*
> eth0: 172.23.100.22
> eth1:192.168.0.2
>
> *ha.cf:*
> use_logd yes
> node big-sSTATSfe1 big-sSTATSfe2
> deadtime 5
> deadping 5
> initdead 60
> warntime 3
> crm true
> ucast eth0 172.23.100.22 # 172.23.100.26 on the second node
> ucast eth1 192.168.0.2 # 192.168.0.1 on the second node
> ping 172.23.100.1
>
> *cib.xml*
> <cib admin_epoch="1" epoch="1" num_updates="1" generated="true"
> have_quorum="true" ignore_dtd="false" num_peers="2"
> dc_uuid="68cd29ed-c7fe-44d9-9fe8-a1258e5b1d0f">
> <configuration>
> <crm_config>
> <cluster_property_set id="cib-bootstrap-options">
> <attributes>
> <nvpair id="cib-bootstrap-options-symmetric-cluster"
> name="symmetric-cluster" value="true"/>
> <nvpair id="cib-bootstrap-options-no-quorum-policy"
> name="no-quorum-policy" value="ignore"/>
> <nvpair id="cib-bootstrap-options-default-resource-stickiness"
> name="default-resource-stickiness" value="0"/>
> <nvpair id="cib-bootstrap-options-default-resource-failure-stickiness"
> name="default-resource-failure-stickiness" value="-100"/>
> <nvpair id="cib-bootstrap-options-stonith-enabled"
> name="stonith-enabled" value="true"/>
> <nvpair id="cib-bootstrap-options-stonith-action" name="stonith-action"
> value="reboot"/>
> <nvpair id="cib-bootstrap-options-remove-after-stop"
> name="remove-after-stop" value="false"/>
> <nvpair id="cib-bootstrap-options-short-resource-names"
> name="short-resource-names" value="true"/>
> <nvpair id="cib-bootstrap-options-transition-idle-timeout"
> name="transition-idle-timeout" value="1min"/>
> <nvpair id="cib-bootstrap-options-default-action-timeout"
> name="default-action-timeout" value="10s"/>
> <nvpair id="cib-bootstrap-options-is-managed-default"
> name="is-managed-default" value="true"/>
> </attributes>
> </cluster_property_set>
> </crm_config>
> <nodes/>
> <resources>
> <group id="apache_group_p80" ordered="true" collocated="true">
> <primitive class="ocf" provider="heartbeat" type="IPaddr"
> id="IPaddr_p80">
> <instance_attributes id="IPaddr_1_inst_attr">
> <attributes>
> <nvpair id="IPaddr_p80_attr_0" name="ip" value="172.23.100.200"/>
> <nvpair id="IPaddr_p80_attr_1" name="netmask" value="255.255.255.0"/>
> <nvpair id="IPaddr_p80_attr_2" name="nic" value="eth0"/>
> <nvpair id="IPaddr_p80_attr_3" name="broadcast" value="172.23.100.255
> "/>
> </attributes>
> </instance_attributes>
> <operations>
> <op id="IPaddr_p80_mon" name="monitor" interval="2s" timeout="3s"/>
> </operations>
> </primitive>
> <primitive id="apache_p80" class="lsb" type="apache"
> provider="heartbeat">
> <instance_attributes id="inatt_apache_p80">
> <attributes>
> <nvpair name="configfile" value="/etc/httpd/conf/httpd.conf"
> id="nvpb1_apache_p80"/>
> <nvpair name="statusurl" value="
> http://172.23.100.200:80/server-status <http://172.23.100.200/server-status>"
> id="nvpb2_apache_p80"/>
> </attributes>
> </instance_attributes>
> <operations>
> <op id="apache_p80:start" name="start" timeout="10s"/>
> <op id="apache_p80:stop" name="stop" timeout="10s"/>
> <op id="apache_p80:monitor" name="monitor" interval="2s"
> timeout="5s"/>
> </operations>
> </primitive>
> </group>
> <clone id="pingd">
> <instance_attributes id="pingd">
> <attributes>
> <nvpair id="pingd-clone_max" name="clone_max" value="2"/>
> <nvpair id="pingd-clone_node_max" name="clone_node_max" value="1"/>
> </attributes>
> </instance_attributes>
> <primitive id="gateway" class="ocf" type="pingd" provider="heartbeat">
> <operations>
> <op id="gateway:child-monitor" name="monitor" interval="5s"
> timeout="5s" prereq="nothing"/>
> <op id="gateway:child-start" name="start" prereq="nothing"/>
> </operations>
> <instance_attributes id="pingd_inst_attrs">
> <attributes>
> <nvpair id="pingd-dampen" name="dampen" value="5s"/>
> <nvpair id="pingd-multiplier" name="multiplier" value="100"/>
> </attributes>
> </instance_attributes>
> </primitive>
> </clone>
> </resources>
> <constraints>
> <rsc_colocation id="colocation_apache_group_p80" from="apache_group_p80"
> to="apache_group_p80" score="INFINITY"/>
> <rsc_location id="gateway:connected" rsc="apache_group_p80">
> <rule id="gateway:connected:rule" score="-INFINITY" boolean_op="or">
> <expression id="gateway:connected:expr:undefined" attribute="pingd"
> operation="not_defined"/>
> <expression id="gateway:connected:expr:zero" attribute="pingd"
> operation="lte" value="0"/>
> </rule>
> </rsc_location>
> </constraints>
> </configuration>
> <status/>
> </cib>
> _______________________________________________
> Linux-HA mailing list
> Linux-HA at lists.linux-ha.org
> http://lists.linux-ha.org/mailman/listinfo/linux-ha
> See also: http://linux-ha.org/ReportingProblems
>
More information about the Linux-HA
mailing list