/** Command-line implementation of LocalCommunityFinder. Copyright 2014 Konstantin Voevodski Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ import java.util.*; import java.io.*; public class localCommunityFinder { public static void main(String[] args) throws IOException, InterruptedException{ // Arguments are network file, starting vertex, sMin, sMax, forceVertex, outputFile. String networkFile, sv, outputFile; int sMin, sMax; boolean forceStartingVertexInCluster; try{ networkFile = args[0]; sv = args[1]; sMin = Integer.parseInt(args[2]); sMax = Integer.parseInt(args[3]); forceStartingVertexInCluster = Boolean.parseBoolean(args[4]); outputFile = args[5]; } catch(Exception ex){ System.out.println("invalid arguments, see usage"); return; } try{ initializeNetwork(networkFile,sv); } catch(Exception ex){ System.out.println("invalid network file"); return; } if(!isValidNetwork()){ System.out.println("invalid network file"); return; } if(!containsStartingVertex()){ System.out.println("the network does not contain the starting vertex"); return; } HashSet cluster; try{ HashSet cluster1 = runNibble(sMin,sMax,forceStartingVertexInCluster); HashSet cluster2 = runPageRankNibble(sMin,sMax,forceStartingVertexInCluster); cluster = new HashSet(); if(cluster1.size() == 0) cluster = cluster2; else if(cluster2.size() == 0) cluster = cluster1; else{ // If both are non-empty. double phi1 = computeConductance(cluster1); double phi2 = computeConductance(cluster2); if(phi1 < phi2) cluster = cluster1; else cluster = cluster2; } } catch(Exception ex){ System.out.println("error finding cluster"); return; } if(cluster.size() == 0){ System.out.println("no cluster can be found that fits your constraints"); return; } String StartingVertex = getStartingVertex(); BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile)); if(cluster.contains(StartingVertex)){ writer.write(StartingVertex); writer.newLine(); cluster.remove(StartingVertex); } for(String node : cluster){ writer.write(node); writer.newLine(); } writer.close(); int size = cluster.size(); double phi = computeConductance(cluster); System.out.println("cluster statistics:"); System.out.println("size: " + size); System.out.println("conductance: " + phi); System.out.println("the nodes in the cluster have been written to " + outputFile); } // Members of this class. private static Hashtable> network; private static Hashtable degreeTable; private static double totalVolume; private static String startingVertex; private static boolean isWeighted; private static double freedom = 100.0; // The methods of this class follow. private static void initializeNetwork(String inputFile, String startingNode) throws Exception{ isWeighted = false; network = new Hashtable>(); BufferedReader reader = new BufferedReader(new FileReader(inputFile)); String temp; while((temp = reader.readLine()) != null){ String[] data = temp.split("\t"); // Each line should have exactly 2 or 3 entries. if(data.length < 2 || data.length > 3) continue; String vertex1 = data[0]; String vertex2 = data[1]; Double similarity; if(vertex1.length() == 0 || vertex2.length() == 0) throw new InvalidFileFormatException("invalid file format"); if(data.length > 2){ try{ similarity = Double.parseDouble(data[2]); isWeighted = true; } catch(Exception ex){ throw new InvalidFileFormatException("invalid file format"); } } else similarity = 1.0; if(similarity > 0.0){ addEdge(vertex1,vertex2,similarity,network); addEdge(vertex2,vertex1,similarity,network); if(startingVertex == null && vertex1.equalsIgnoreCase(startingNode)) startingVertex = vertex1; if(startingVertex == null && vertex2.equalsIgnoreCase(startingNode)) startingVertex = vertex2; } } reader.close(); makeDegreeTable(); } private static void addEdge(String node1, String node2, Double similarity, Hashtable> adjacencyList) { HashSet adjacentNodes; if(!adjacencyList.containsKey(node1)) adjacentNodes = new HashSet(); else adjacentNodes = adjacencyList.get(node1); adjacentNodes.add(new Edge(node2,similarity)); adjacencyList.put(node1,adjacentNodes); } private static void makeDegreeTable(){ degreeTable = new Hashtable(); totalVolume = 0; Enumeration myEnum = network.keys(); while(myEnum.hasMoreElements()){ String node = myEnum.nextElement(); double degree = 0; HashSet adjacentNodes = network.get(node); for(Edge e : adjacentNodes) degree += e.getSimilarity(); totalVolume += degree; degreeTable.put(node,degree); } } private static boolean isValidNetwork(){ return totalVolume > 0; } private static boolean containsStartingVertex(){ return startingVertex != null; } private static String getStartingVertex(){ return startingVertex; } private static HashSet runNibble(int sMin, int sMax, boolean forceStartingVertexInCluster){ double dAve = totalVolume / network.size(), tLast = Math.max(100,network.size() * 0.05); double epsilon = 1 / (sMax * dAve * freedom); HashSet cluster, bestCluster = new HashSet(); double bestPhi = 1, Phi; Vector nodeList; Hashtable support = new Hashtable(); support.put(startingVertex,1.0); int iter = 0; for(int x = 0; x < tLast; x++){ iter++; if(support.size() == 0) break; if(forceStartingVertexInCluster) nodeList = sortConstrainedDistribution(support,startingVertex); else nodeList = sortDistribution(support); cluster = checkConductance(nodeList,totalVolume,sMin,sMax); Phi = computeConductance(cluster); if(Phi <= bestPhi){ bestPhi = Phi; bestCluster = cluster; } support = updateDistribution(support); support = roundDistribution(support,epsilon); } return bestCluster; } private static double computeConductance(HashSet nodes){ double insideVolume = 0, edgesAcross = 0; for(String node: nodes){ HashSet adjacentNodes = network.get(node); insideVolume += degreeTable.get(node); for(Edge e : adjacentNodes){ if(!nodes.contains(e.getNode())) edgesAcross+= e.getSimilarity(); } } return(edgesAcross/Math.min(insideVolume, totalVolume - insideVolume)); } private static Vector sortDistribution(Hashtable support){ DistributionEntry[] distribution = new DistributionEntry[support.size()]; Enumeration myEnum = support.keys(); String node; double p, d; int counter = 0; while(myEnum.hasMoreElements()){ node = (String) myEnum.nextElement(); p = (Double) support.get(node); d = (Double) degreeTable.get(node); distribution[counter] = new DistributionEntry(node,p,d); counter++; } Arrays.sort(distribution); Vector output = new Vector(); for(int x = 0; x < distribution.length; x++) output.add(distribution[x].getVertex()); return output; } private static Vector sortConstrainedDistribution(Hashtable support, String startingVertex){ DistributionEntry[] distribution = new DistributionEntry[support.size()]; Enumeration myEnum = support.keys(); String node; double p, d; int counter = 0; while(myEnum.hasMoreElements()){ node = (String) myEnum.nextElement(); p = (Double) support.get(node); d = (Double) degreeTable.get(node); distribution[counter] = new DistributionEntry(node,p,d); counter++; } Arrays.sort(distribution); Vector output = new Vector(); output.add(startingVertex); for(int x = 0; x < distribution.length; x++){ node = distribution[x].getVertex(); if(!node.equals(startingVertex)) output.add(node); } return output; } private static HashSet checkConductance(Vector support, double totalVolume, int sMin, int sMax){ double inside, outside, smallestConductance = 1, edgesAcross = 0, insideVolume = 0, phi; HashSet previous = new HashSet(), bestCluster = new HashSet(); for(int x = 0; x < support.size(); x++){ String node = (String) support.get(x); inside = 0; outside = 0; HashSet adjacentNodes = network.get(node); for(Edge e : adjacentNodes){ if(previous.contains(e.getNode())) inside+=e.getSimilarity(); else outside+=e.getSimilarity(); } previous.add(node); edgesAcross = edgesAcross - inside + outside; insideVolume += degreeTable.get(node); phi = edgesAcross / Math.min(insideVolume,totalVolume - insideVolume); if(previous.size() >= sMin && previous.size()<= sMax && phi <= smallestConductance){ smallestConductance = phi; bestCluster = (HashSet) previous.clone(); } } return bestCluster; } private static Hashtable updateDistribution(Hashtable support){ Hashtable newDistribution = new Hashtable(); Enumeration myEnum = support.keys(); String node; Double prob, increment; HashSet adjacentNodes; while(myEnum.hasMoreElements()){ node = (String) myEnum.nextElement(); prob = (Double) support.get(node); augmentDistribution(newDistribution,node,prob*0.5); adjacentNodes = network.get(node); double sum = 0; for(Edge e : adjacentNodes) sum += e.getSimilarity(); for(Edge e : adjacentNodes) augmentDistribution(newDistribution,e.getNode(),e.getSimilarity()/sum * prob * 0.5); } return newDistribution; } private static void augmentDistribution(Hashtable distribution, String node, Double probability){ Double oldProbability; if(!distribution.containsKey(node)) oldProbability = 0.0; else oldProbability = (Double) distribution.get(node); distribution.put(node,oldProbability + probability); } private static Hashtable roundDistribution(Hashtable support, double epsilon){ Hashtable output = new Hashtable(); Enumeration myEnum = support.keys(); String node; Double prob, degree; while(myEnum.hasMoreElements()){ node = (String) myEnum.nextElement(); prob = (Double) support.get(node); degree = (Double) degreeTable.get(node); if(degree > 0 && prob/degree >= epsilon) output.put(node,prob); } return output; } private static HashSet runPageRankNibble(int sMin, int sMax, boolean forceStartingVertexInCluster){ double alpha = 0.02, dAve = totalVolume / network.size(); double epsilon = 1 / (sMax * dAve * freedom); Hashtable p = approximatePr(epsilon,alpha); Vector nodeList = sortDistribution(p); return checkConductance(nodeList,totalVolume,sMin,sMax); } private static Hashtable approximatePr(double epsilon, double alpha){ Hashtable r = new Hashtable(); r.put(startingVertex,1.0); Hashtable p = new Hashtable(); HashSet queue = new HashSet(); if((Double) r.get(startingVertex) >= epsilon * (Double) degreeTable.get(startingVertex)) queue.add(startingVertex); while(queue.size() > 0) push(queue,r,p,epsilon,alpha); return p; } // Move alpha of the probability at the node from r to p, apply lazy random walk to the rest: // half of 1-alpha stays, the rest goes to the neighbors. private static void push(HashSet queue, Hashtable r, Hashtable p, double epsilon, double alpha){ Iterator i = queue.iterator(); String node = (String) i.next(); queue.remove(node); double prob = (Double) r.get(node); augmentDistribution(p,node,alpha * prob); double newProb = (1 - alpha) * prob / 2.0; r.put(node,newProb); if(newProb >= epsilon * (Double) degreeTable.get(node)) queue.add(node); HashSet adjacentNodes = network.get(node); double sum = 0; // Sum weights of adjacent edges. for(Edge e : adjacentNodes) sum += e.getSimilarity(); for(Edge e : adjacentNodes){ String node2 = e.getNode(); augmentDistribution(r,node2,e.getSimilarity()/sum * newProb); if((Double) r.get(node2) >= epsilon * (Double) degreeTable.get(node2)) queue.add(node2); } } private static class DistributionEntry implements Comparable{ private String v; private Double p; private Double d; public DistributionEntry(String v, Double p, Double d){ this.v = v; this.p = p; this.d = d; } public String getVertex(){ return v; } public double getProbability(){ return p; } public double getDegree(){ return d; } public int compareTo(Object o) throws ClassCastException{ double p2 = ((DistributionEntry) o).getProbability(); double d2 = ((DistributionEntry) o).getDegree(); if(p/d < p2/d2) return 1; else if(p/d == p2/d2) return 0; else return -1; } } private static class Edge{ String node; Double similarity; public Edge(String node, Double similarity){ this.node = node; this.similarity = similarity; } public String getNode(){ return node; } public Double getSimilarity(){ return similarity; } public boolean equals(Edge e2){ return node.equals(e2.getNode()); } public int hashCode(){ return node.hashCode(); } } private static class InvalidFileFormatException extends Exception{ InvalidFileFormatException() {} InvalidFileFormatException(String msg) { super(msg); } } }