diff -Naur webgraph-3.2.1/src/it/unimi/dsi/webgraph/algo/ConnectedComponents.java webgraph-3.2.1_aplf/src/it/unimi/dsi/webgraph/algo/ConnectedComponents.java --- webgraph-3.2.1/src/it/unimi/dsi/webgraph/algo/ConnectedComponents.java 2013-06-10 11:56:31.000000000 +0100 +++ webgraph-3.2.1_aplf/src/it/unimi/dsi/webgraph/algo/ConnectedComponents.java 2013-10-29 16:12:39.424596221 +0000 @@ -2,6 +2,7 @@ /* * Copyright (C) 2011-2013 Sebastiano Vigna + * Copyright (C) 2013 A P Francisco * * This program is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License as published by the Free @@ -24,6 +25,8 @@ import it.unimi.dsi.fastutil.io.BinIO; import it.unimi.dsi.logging.ProgressLogger; import it.unimi.dsi.webgraph.ImmutableGraph; +import it.unimi.dsi.webgraph.LazyIntIterator; +import it.unimi.dsi.webgraph.NodeIterator; import java.io.IOException; import java.util.concurrent.TimeUnit; @@ -42,25 +45,16 @@ import com.martiansoftware.jsap.UnflaggedOption; /** - * Computes the connected components of a symmetric (a.k.a. undirected) graph - * using a {@linkplain ParallelBreadthFirstVisit parallel breadth-first visit}. + * Computes the connected components of a graph. * *
The {@link #compute(ImmutableGraph, int, ProgressLogger)} method of this class will return an - * instance that contains the data computed by visiting the graph (using an instance of - * {@link ParallelBreadthFirstVisit}). Note that it is your responsibility to pass a symmetric graph - * to {@link #compute(ImmutableGraph, int, ProgressLogger)}. Otherwise, results will be - * unpredictable. + * instance that contains the data computed by visiting the graph (loaded offline). * *
After getting an instance, it is possible to run the {@link #computeSizes()} and * {@link #sortBySize(int[])} methods to obtain further information. This scheme has been devised to * exploit the available memory as much as possible—after the components have been computed, * the returned instance keeps no track of the graph, and the related memory can be freed by the * garbage collector. - * - *
This class uses an instance of {@link ParallelBreadthFirstVisit} to ensure a high degree of
- * parallelism (see its documentation for memory requirements).
*/
public class ConnectedComponents {
@@ -81,20 +75,41 @@
* Computes the diameter of a symmetric graph.
*
* @param symGraph a symmetric graph.
- * @param threads the requested number of threads (0 for {@link Runtime#availableProcessors()}).
* @param pl a progress logger, or null
.
* @return an instance of this class containing the computed components.
*/
- public static ConnectedComponents compute( final ImmutableGraph symGraph, final int threads, final ProgressLogger pl ) {
- ParallelBreadthFirstVisit visit = new ParallelBreadthFirstVisit( symGraph, threads, false, pl );
- visit.visitAll();
- final AtomicIntegerArray visited = visit.marker;
- final int numberOfComponents = visit.round + 1;
- visit = null;
- final int[] component = new int[ visited.length() ];
+ public static ConnectedComponents compute( final ImmutableGraph g, final ProgressLogger pl ) {
+
+ DisjointSet set = new DisjointSet(g.numNodes());
+ LOGGER.info("Computing CCs...");
+ pl.start("Processing " + g.numNodes() + " nodes...");
+ pl.expectedUpdates = g.numNodes();
+ pl.itemsName = "nodes";
+
+ NodeIterator nIter = g.nodeIterator();
+ while (nIter.hasNext()) {
+ int u = nIter.nextInt();
+ LazyIntIterator eIter = nIter.successors();
+
+ int v = 0;
+ while ((v = eIter.nextInt()) != -1) {
+ set.unionSet(u, v);
+ }
+ pl.update();
+ }
+ pl.done();
+
+ int ccid = 0;
+ final int[] component = new int[ g.numNodes() ];
for ( int i = component.length; i-- != 0; )
- component[ i ] = visited.get( i );
- return new ConnectedComponents( numberOfComponents, component );
+ component[ i ] = -1;
+ for ( int i = component.length; i-- != 0; ) {
+ if (component[ set.findSet(i) ] == -1)
+ component[ set.findSet(i) ] = ccid++;
+ component[ i ] = component[ set.findSet(i) ];
+ }
+
+ return new ConnectedComponents( ccid, component );
}
/**
@@ -142,9 +157,6 @@
new Switch( "renumber", 'r', "renumber", "Renumber components in decreasing-size order." ),
new FlaggedOption( "logInterval", JSAP.LONG_PARSER, Long.toString( ProgressLogger.DEFAULT_LOG_INTERVAL ), JSAP.NOT_REQUIRED, 'l', "log-interval",
"The minimum time interval between activity logs in milliseconds." ),
- new Switch( "mapped", 'm', "mapped", "Do not load the graph in main memory, but rather memory-map it." ),
- new FlaggedOption( "threads", JSAP.INTSIZE_PARSER, "0", JSAP.NOT_REQUIRED, 'T', "threads",
- "The number of threads to be used. If 0, the number will be estimated automatically." ),
new UnflaggedOption( "basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.REQUIRED, JSAP.NOT_GREEDY, "The basename of the graph." ),
new UnflaggedOption( "resultsBasename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.NOT_GREEDY, "The basename of the resulting files." ),
}
@@ -155,11 +167,9 @@
final String basename = jsapResult.getString( "basename" );
final String resultsBasename = jsapResult.getString( "resultsBasename", basename );
- final int threads = jsapResult.getInt( "threads" );
ProgressLogger pl = new ProgressLogger( LOGGER, jsapResult.getLong( "logInterval" ), TimeUnit.MILLISECONDS );
- final ConnectedComponents components = ConnectedComponents.compute( jsapResult.userSpecified( "mapped" ) ?
- ImmutableGraph.loadMapped( basename ) : ImmutableGraph.load( basename ), threads, pl );
+ final ConnectedComponents components = ConnectedComponents.compute( ImmutableGraph.loadOffline( basename ), pl );
if ( jsapResult.getBoolean( "sizes" ) || jsapResult.getBoolean( "renumber" ) ) {
final int size[] = components.computeSizes();
diff -Naur webgraph-3.2.1/src/it/unimi/dsi/webgraph/algo/DisjointSet.java webgraph-3.2.1_aplf/src/it/unimi/dsi/webgraph/algo/DisjointSet.java
--- webgraph-3.2.1/src/it/unimi/dsi/webgraph/algo/DisjointSet.java 1970-01-01 01:00:00.000000000 +0100
+++ webgraph-3.2.1_aplf/src/it/unimi/dsi/webgraph/algo/DisjointSet.java 2013-10-29 16:12:39.424596221 +0000
@@ -0,0 +1,69 @@
+package it.unimi.dsi.webgraph.algo;
+
+/*-
+ * Copyright (c) 2013, A P Francisco
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+public class DisjointSet {
+ private int size;
+ private int[] pi;
+ private int[] rank;
+
+ public DisjointSet(int n) {
+ size = n + 1;
+ pi = new int[size];
+ rank = new int[size];
+
+ for (int i = 0; i < size; i++) {
+ rank[i] = 1;
+ pi[i] = i;
+ }
+ }
+
+ public int findSet(int i) {
+ if (i < 0 || i >= size)
+ return -1;
+ for (; i != pi[i]; i = pi[i])
+ pi[i] = pi[pi[i]];
+ return i;
+ }
+
+ public boolean sameSet(int i, int j) {
+ return findSet(i) == findSet(j);
+ }
+
+ public void unionSet(int i, int j) {
+ if (i < 0 || j < 0 || i >= size || j >= size)
+ return;
+ int iRoot = findSet(i);
+ int jRoot = findSet(j);
+ if (iRoot == jRoot)
+ return;
+ if (rank[iRoot] > rank[jRoot]) {
+ pi[jRoot] = iRoot;
+ rank[iRoot] += rank[jRoot];
+ } else if (rank[iRoot] < rank[jRoot]) {
+ pi[iRoot] = jRoot;
+ rank[jRoot] += rank[iRoot];
+ } else {
+ pi[iRoot] = jRoot;
+ rank[jRoot] += rank[iRoot];
+ }
+ }
+
+ public int getRank(int i) {
+ return rank[i];
+ }
+}
diff -Naur webgraph-3.2.1/test/it/unimi/dsi/webgraph/algo/ConnectedComponentsTest.java webgraph-3.2.1_aplf/test/it/unimi/dsi/webgraph/algo/ConnectedComponentsTest.java
--- webgraph-3.2.1/test/it/unimi/dsi/webgraph/algo/ConnectedComponentsTest.java 2013-10-29 16:12:51.431596484 +0000
+++ webgraph-3.2.1_aplf/test/it/unimi/dsi/webgraph/algo/ConnectedComponentsTest.java 2013-10-04 14:26:42.000000000 +0100
@@ -38,7 +38,7 @@
stronglyConnectedComponents.sortBySize( size2 );
for( int t = 0; t < 3; t++ ) {
- ConnectedComponents connectedComponents = ConnectedComponents.compute( g, t, new ProgressLogger() );
+ ConnectedComponents connectedComponents = ConnectedComponents.compute( g, new ProgressLogger() );
int[] size = connectedComponents.computeSizes();
connectedComponents.sortBySize( size );
for( int i = g.numNodes(); i-- != 0; )