Various additions, primarily related to providing backup services. Right now the main lacking component is "world manager" integration. This can be done

gce3 [2002-06-20 23:32:40]
Various additions, primarily related to providing backup services.  Right now the main lacking component is "world manager" integration.  This can be done
manually or lifted from CWM code but discussion is needed regarding CWM
design.
Filename
server/zone/MessageDefinitions.java
server/zone/PeerCommunicator.java
server/zone/ZoneServer.java
server/zone/authority/NetworkAuthority.java
server/zone/metrics/NetworkMetrics.java
server/zone/metrics/ServerTracker.java
server/zone/schemes/DefaultNetworkAuthority.java
diff --git a/server/zone/MessageDefinitions.java b/server/zone/MessageDefinitions.java
index 0657f08..1060eb8 100644
--- a/server/zone/MessageDefinitions.java
+++ b/server/zone/MessageDefinitions.java
@@ -16,12 +16,14 @@ public interface MessageDefinitions {
     public static String OFFER_SECONDARY_BACKUP   = "offer_secondary_backup";
     public static String REJECT_ZONE_BACKUP       = "reject_zone_backup";
     public static String ACCEPT_ZONE_BACKUP       = "accept_zone_backup";
-
-
+    public static String SEND_ALIVE_UPDATE        = "send_alive_update";
+    public static String REQUEST_ZONE_SERVER_CANDIDATE = "rzsc";
+    public static String OFFER_ZONE_SERVICE       = "offer_zone_service";

     // Message body key definitions
     public static String ZONE_LIST                = "zonelist";
     public static String SET_PREVIOUS_NEIGHBOR    = "setprevneighbor";
     public static String SET_NEXT_NEIGHBOR        = "setnextneighbor";
+    public static String CHIME_SERVER             = "chimeserver";

 } // end MessageDefinitions Stringerface
diff --git a/server/zone/PeerCommunicator.java b/server/zone/PeerCommunicator.java
index 768e0a9..773ce73 100644
--- a/server/zone/PeerCommunicator.java
+++ b/server/zone/PeerCommunicator.java
@@ -23,6 +23,15 @@ public class PeerCommunicator implements MessageDefinitions {



+    public static void sendBroadcast(String message) {
+	DirectoryInterface.publish(new DIType(message),
+				   new DIMessageBody(message));
+    }
+
+
+
+
+
     public static void initiateZoneTransfer
 	(Zone[] list, NetworkNode zoneServer, NetworkNode newPrevNeighbor,
 	 NetworkNode newNextNeighbor) {
diff --git a/server/zone/ZoneServer.java b/server/zone/ZoneServer.java
index 0bf1035..7d195a2 100644
--- a/server/zone/ZoneServer.java
+++ b/server/zone/ZoneServer.java
@@ -88,6 +88,14 @@ public class ZoneServer implements DIEventReceiver, MessageDefinitions {
 	serverThread = null;
 	runThread = true;

+	metrics = new NetworkMetrics();
+
+	// When we first start up we do NOT perform any zone services.  We
+	// enable service ONLY if
+	//     a) requested to by another known zone server
+	//     b) we can not find any other zone servers on the network
+	metrics.disableZoneService();
+
 	zoneLayout = new ZoneSettings();
 	dataManager = new DataManager();
 	zoneManager = new ZoneManager(zoneLayout, dataManager);
@@ -96,8 +104,7 @@ public class ZoneServer implements DIEventReceiver, MessageDefinitions {

 	zoneControl = new DefaultZoneAuthority(zoneManager, di);
 	backupControl = new DefaultBackupAuthority(zoneManager,dataManager,di);
-	metrics = null;
-	networkControl = new DefaultNetworkAuthority(metrics);
+	networkControl = new DefaultNetworkAuthority(metrics, zoneLayout);

     }

@@ -133,6 +140,7 @@ public class ZoneServer implements DIEventReceiver, MessageDefinitions {
      **/
     public void stopZoneServer() {
 	runThread = false;
+	metrics.disableZoneService();
     }


@@ -147,10 +155,19 @@ public class ZoneServer implements DIEventReceiver, MessageDefinitions {
      * servers, accepting extra zone responsibility, or offsetting current
      * zone responsibility.  This is the zone server's "main method" for
      * network balancing, and runs in its own thread.
+     *
+     * If we're providing zone services (i.e. the zone server is "enabled"),
+     * we also send an "alive" update periodically to announce our presence
+     * to the network
      **/
     private void runMainThread() {
 	while (runThread) {
-	    networkControl.monitorNetwork();
+
+	    if (metrics.zoneServiceEnabled()) {
+		networkControl.monitorNetwork();
+		PeerCommunicator.sendBroadcast(SEND_ALIVE_UPDATE);
+	    }
+
 	    try {new Thread().sleep(1000);} catch (Exception e) {}
 	}
     }
@@ -161,14 +178,36 @@ public class ZoneServer implements DIEventReceiver, MessageDefinitions {

     public void receiveMessage(DIMessage msg) {

-	String msgBody = msg.getBody().getData();
+	String msgBody = msg.getBody().toString();
 	Hashtable data = StringParser.parseKeyValueString(msgBody);
 	NetworkNode source = new NetworkNode(msg.getSender().toString(), null);

-
+
+	/**
+	 *  Locally handled stuff
+	 **/
 	if (msg.getType().equals(STOP_ZONE_SERVICE))
 	    stopZoneServer();
+	else if (msg.getType().equals(SEND_ALIVE_UPDATE))
+	    metrics.getServerList().setLiveServer(msg.getSender().toString());
+
+
+	/**
+	 * Handled by the network control authority
+	 **/
+	else if (msg.getType().equals(REQUEST_ZONE_SERVER_CANDIDATE))
+	    ; // MUST HAVE SOME WAY TO SELECT FROM LIST OF CHIME SERVERS
+	// UNDER OUR DOMAIN AND RETURN ONE OF THOSE CHIME SERVERS TO THE
+	// REQUESTING ZS
+	else if (msg.getType().equals(OFFER_ZONE_SERVICE)) {
+	    NetworkNode cs = (NetworkNode) retrieveObject(data, CHIME_SERVER);
+	    networkControl.handleZoneServiceOffer(cs);
+	}
+

+	/**
+         *  Handled by zone control authority
+         **/
 	else if (msg.getType().equals(TRANSFER_ZONE)) {
 	    Zone[] list = (Zone[]) retrieveObject(data, ZONE_LIST);
 	    NetworkNode pn=(NetworkNode) retrieveObject(data,
@@ -186,7 +225,10 @@ public class ZoneServer implements DIEventReceiver, MessageDefinitions {
 	    zoneControl.handleZoneTransferAcceptance(list, source);
 	}

-
+
+	/**
+         *  Handled by backup control authority
+	 **/
 	else if (msg.getType().equals(REQUEST_PRIMARY_BACKUP)) {
 	    Zone[] list = (Zone[]) retrieveObject(data, ZONE_LIST);
 	    backupControl.handlePrimaryBackupRequest(list, source);
@@ -219,6 +261,7 @@ public class ZoneServer implements DIEventReceiver, MessageDefinitions {



+    /** Nothing needs to be done here --> interface requirement **/
     public void receiveEvent(DIEvent event) {
     }

@@ -226,6 +269,7 @@ public class ZoneServer implements DIEventReceiver, MessageDefinitions {



+    /** Nothing needs to be done here --> interface requirement **/
     public void receiveResult(DIHost result) {
     }

@@ -234,21 +278,6 @@ public class ZoneServer implements DIEventReceiver, MessageDefinitions {


     /**
-     * Searches across the network for current CHIME servers that are
-     * eligible to become zone servers, selects one such server,
-     * promotes it to a zone server, and assigns zone responsibility to
-     * the new server.  This method assumes that the need for such a server
-     * has already been identified elsewhere and that the local server has
-     * responsibility for fulfilling that need.
-     **/
-    private void addNewZoneServer() {
-    }
-
-
-
-
-
-    /**
      * Given a serialized object stored in a hashtable, retrieves that
      * object, deserializes it, and returns the re-instantiated object
      *
@@ -256,13 +285,18 @@ public class ZoneServer implements DIEventReceiver, MessageDefinitions {
      * @param serialized - hash key for the desired object
      **/
     private static Object retrieveObject(Hashtable h, String serialized) {
-	serialized = (String) h.get(serialized);
-	ByteArrayInputStream stream;
-	stream = new ByteArrayInputStream(serialized.getBytes());
-	ObjectInputStream in = new ObjectInputStream(stream);
-	Object o = in.readObject();
-	in.close();
-	return o;
+	try {
+	    serialized = (String) h.get(serialized);
+	    ByteArrayInputStream stream;
+	    stream = new ByteArrayInputStream(serialized.getBytes());
+	    ObjectInputStream in = new ObjectInputStream(stream);
+	    Object o = in.readObject();
+	    in.close();
+	    return o;
+	}
+	catch (Exception e) {
+	    return null;
+	}
     }


@@ -270,3 +304,4 @@ public class ZoneServer implements DIEventReceiver, MessageDefinitions {


 } // end ZoneServer class
+
diff --git a/server/zone/authority/NetworkAuthority.java b/server/zone/authority/NetworkAuthority.java
index d5dcbd4..bf7de48 100644
--- a/server/zone/authority/NetworkAuthority.java
+++ b/server/zone/authority/NetworkAuthority.java
@@ -51,10 +51,15 @@ public interface NetworkAuthority {
     public void handleDroppedChimeServer(NetworkNode chimeServer);


-
+    /**
+     * Given a CHIME server that is offering (available) to provide zone
+     * services, react appropriately.
+     **/
+    public void handleZoneServiceOffer(NetworkNode chimeServer);

 } // end NetworkAuthority interface




+
diff --git a/server/zone/metrics/NetworkMetrics.java b/server/zone/metrics/NetworkMetrics.java
index 77c0ebd..2ba50cb 100644
--- a/server/zone/metrics/NetworkMetrics.java
+++ b/server/zone/metrics/NetworkMetrics.java
@@ -13,11 +13,6 @@ import java.util.Hashtable;
  * are useful for determining when network reorganization may be appropriate
  * due to excessively high or low usage, mis-configuration, etc.
  *
- * This class maintains no knowledge about what its data means.  It is up
- * to other classes to manipulate and interpret data correctly, as well as
- * to know what types of data are actually available.  Note that all
- * metric data is numeric.
- *
  * @author Gregory Estren (gce3@columbia.edu)
  * @version 1.0
  **/
@@ -25,7 +20,10 @@ import java.util.Hashtable;
 public class NetworkMetrics {

     private Hashtable t;
+    private ServerTracker serverList;

+    private boolean zoneServiceEnabled = false;
+

     /**
      * Instantiate a new NetworkMetrics object with no metrics initially
@@ -33,8 +31,25 @@ public class NetworkMetrics {
      **/
     public NetworkMetrics() {
 	t = new Hashtable();
+	serverList = new ServerTracker();
+    }
+
+
+    public boolean zoneServiceEnabled() {
+	return zoneServiceEnabled;
+    }
+
+    public void disableZoneService() {
+	zoneServiceEnabled = false;
+    }
+
+    public void enableZoneService() {
+	zoneServiceEnabled = true;
     }

+    public ServerTracker getServerList() {
+	return serverList;
+    }


     /**
@@ -64,13 +79,13 @@ public class NetworkMetrics {
      * Retrieve the specified metric.
      *
      * @param name - metric variable name
-     * @return numeric value for that metric, or 0 if no metric by the
+     * @return numeric value for that metric, or -1 if no metric by the
      *         specified name exists
      **/
     public int getMetric(String name) {
 	Integer i = (Integer) t.get(name);
 	if (i == null)
-	    return 0;
+	    return -1;
 	else
 	    return i.intValue();
     }
@@ -83,10 +98,3 @@ public class NetworkMetrics {



-
-
-
-
-
-
-
diff --git a/server/zone/metrics/ServerTracker.java b/server/zone/metrics/ServerTracker.java
new file mode 100644
index 0000000..654ded6
--- /dev/null
+++ b/server/zone/metrics/ServerTracker.java
@@ -0,0 +1,59 @@
+
+package psl.chime4.server.zone.metrics;
+
+import java.util.*;
+
+
+public class ServerTracker {
+
+
+    private Hashtable liveServers;
+
+    // number of seconds since a last "alive" status has been received for
+    // a server to no longer be considered alive
+    private static int TIME_THRESHOLD = 10;
+
+    public ServerTracker() {
+	liveServers = new Hashtable();
+    }
+
+
+    /**
+     * When an "I'm alive" message is received from a server,
+     * this method should be called to update its live status.
+     **/
+    public void setLiveServer(String uniqueid) {
+	liveServers.put(uniqueid, new Date());
+    }
+
+
+
+    public int getLiveServerCount() {
+	purgeServerList();
+	return liveServers.size();
+    }
+
+
+
+    public boolean isServerAlive(String uniqueid) {
+	purgeServerList();
+	return liveServers.contains(uniqueid);
+    }
+
+
+    private void purgeServerList() {
+	Date now = new Date();
+
+	for (Enumeration e = liveServers.keys(); e.hasMoreElements();) {
+	    String id = (String) e.nextElement();
+	    Date updateTime = (Date) liveServers.get(id);
+
+	    if ((now.getTime() - updateTime.getTime())/1000 > TIME_THRESHOLD)
+		liveServers.remove(id);
+	}
+    }
+
+
+
+
+} // end ServerTracker class
diff --git a/server/zone/schemes/DefaultNetworkAuthority.java b/server/zone/schemes/DefaultNetworkAuthority.java
index acd5f7f..3ef6c94 100644
--- a/server/zone/schemes/DefaultNetworkAuthority.java
+++ b/server/zone/schemes/DefaultNetworkAuthority.java
@@ -2,28 +2,133 @@
 package psl.chime4.server.zone.schemes;

 import psl.chime4.server.auth.NetworkNode;
+import psl.chime4.server.zone.*;
 import psl.chime4.server.zone.authority.NetworkAuthority;
-import psl.chime4.server.zone.metrics.NetworkMetrics;
+import psl.chime4.server.zone.metrics.*;


-public class DefaultNetworkAuthority implements NetworkAuthority {
-
+public class DefaultNetworkAuthority implements NetworkAuthority,
+                                                MessageDefinitions {

     private NetworkMetrics metrics;
+    private ZoneSettings zoneLayout;
+
+    private NetworkNode freeServer = null;
+
+    private int peerCheck = 0;
+    private boolean requestingBackup = false;


-    public DefaultNetworkAuthority(NetworkMetrics m) {
+    public DefaultNetworkAuthority(NetworkMetrics m, ZoneSettings zs) {
 	this.metrics = m;
+	this.zoneLayout = zs;
     }



+
+
     public void monitorNetwork() {
+
+	// Check if we are the only host on the network and should therefore
+	// promote ourselves to a zone server
+	if (metrics.zoneServiceEnabled() == false) {
+	    peerCheck++;
+	    if ((metrics.getServerList().getLiveServerCount()==0) &&
+		(peerCheck >= 10))
+		startProactiveZoneService();
+	}
+
+
+	else {   // if we are currently providing zone services
+
+	    ensureBackupAvailable();
+
+	}
     }




+
+    /**
+     * Upgrade the local server to a zone server, assuming that there are
+     * no other zone servers on the network.
+     **/
+    private void startProactiveZoneService() {
+	Zone z1 = new Zone();
+	Zone z2 = new Zone();
+	Zone z3 = new Zone();
+
+	zoneLayout.addPrimaryZone(z1, true);
+	zoneLayout.addPrimaryZone(z2, true);
+	zoneLayout.addPrimaryZone(z3, true);
+
+	zoneLayout.setNextNeighbor(null);
+	zoneLayout.setPreviousNeighbor(null);
+
+	metrics.enableZoneService();
+    }
+
+
+
+
+
+    /**
+     * Check that this server is currently being covered by active (alive)
+     * primary and secondary backup servers.  If not, request as appropriate.
+     **/
+    private void ensureBackupAvailable() {
+	Zone[] primaryZones = zoneLayout.getPrimaryZones();
+
+	if (primaryZones.length == 0)   // not doing primary coverage
+	    return;
+
+	// We may assume that all zones under our control share the same
+	// backup servers.  So we need only look at the info for the first
+	// element in the list.
+	NetworkNode backup1 = primaryZones[0].getFirstBackupServer();
+	NetworkNode backup2 = primaryZones[0].getSecondBackupServer();
+
+
+	// See if we need to request new backup services (only request one
+	// at a time)
+	ServerTracker sl = metrics.getServerList();
+	if ((backup1 == null) ||
+	    (sl.isServerAlive(backup1.getIPAddress()) == false)) {
+
+	    if (requestingBackup == false) {
+		NetworkNode backup = getFreeZoneServer();
+		if (backup != null) {
+		    PeerCommunicator.requestPrimaryBackup(primaryZones,backup);
+		    requestingBackup = true;
+		}
+	    }
+	}
+
+
+	else if ((backup2 == null) ||
+		 (sl.isServerAlive(backup2.getIPAddress()) == false)) {
+
+	    if (requestingBackup == false) {
+		NetworkNode backup = getFreeZoneServer();
+		if (backup != null) {
+		    PeerCommunicator.requestSecondaryBackup(primaryZones,
+							    backup);
+		    requestingBackup = true;
+		}
+	    }
+	}
+
+	else {
+	    requestingBackup = false;
+	}
+    }
+
+
+
+
+
     /**
      * Given a CHIME server that wants to be placed under this server's
      * zone responsibility, this method handles that request.  Via
@@ -71,8 +176,48 @@ public class DefaultNetworkAuthority implements NetworkAuthority {



-} // end DefaultNetworkAuthority class
+
+    /**
+     * Returns a link to a zone server which is currently unoccupied (not
+     * performing any zone duties).  If no such server exists, then this
+     * method sends out a request to the network for a new server to
+     * be instantiated (from a CHIME server) and returns null.  It will
+     * return null until that server has been upgraded and is ready to go.
+     **/
+    private NetworkNode getFreeZoneServer() {
+
+	if (freeServer != null) {
+	    NetworkNode ret = freeServer;
+	    freeServer = null;  // we assume that the method's caller will
+	                        // utilize the server and make it no longer
+	                        // free
+	    return ret;
+
+	}
+	else {
+	    PeerCommunicator.sendBroadcast(REQUEST_ZONE_SERVER_CANDIDATE);
+	    return null;
+	}
+    }




+
+    /**
+     * If we're looking for a free zone server, then upgrade this chime
+     * server.  Otherwise, ignore the zone service offer, since we already
+     * have someone else we can use.
+     **/
+    public void handleZoneServiceOffer(NetworkNode chimeServer) {
+	if (freeServer == null) {
+	    PeerCommunicator.sendMessage(START_ZONE_SERVICE, chimeServer);
+	    freeServer = chimeServer;
+	}
+    }
+
+
+
+
+} // end DefaultNetworkAuthority class
+