This is a continuation of this post , the code is now based on Structuring Depths — Haskell's first search algorithms to make the depth of the first search , king and Launchbury in the 1990s. This document proposes to generate and shorten the strategy, but uses a volatile array with a state monad (with some grammar with which I suspect it has been deprecated since then). The authors hint that the set can be used to remember visited nodes as the cost of an additional O (log n). I tried to implement using a set (now we have better machines than in the 1990s!), To use the modern State Monad syntax and use Vectors, not arrays (since I read that this is usually better).
As before, my code works on small data sets, but does not return on the 5m boundary graph, which I need to analyze, and I'm looking for clues about the weakness acting on the scale. I know that the code works conveniently in memory, so it doesn't problem, but did I accidentally slip to O (n2)? (On the contrary, the official implementation of this article in the Data.Graph library (in which I also borrowed some code recently) uses a mutable array, but it crashes in a large data set with ... Stack Overflow !!!)
So now I have a Vector data store with an IntSet state that is not completed, and an array with the ST status of ST Monad Array, which is crashing! Should Haskell be able to do better than that?
import Data.Vector (Vector)
import qualified Data.IntSet as IS
import qualified Data.Vector as V
import qualified Data.ByteString.Char8 as BS
import Control.Monad.State
type Vertex = Int
type Table a = Vector a
type Graph = Table [Vertex]
type Edge = (Vertex, Vertex)
data Tree a = Node a (Forest a) deriving (Show,Eq)
type Forest a = [Tree a]
-- ghc -O2 -threaded --make
-- +RTS -Nx
generate :: Graph -> Vertex -> Tree Vertex
generate g v = Node v $ map (generate g) (g V.! v)
chop :: Forest Vertex -> State IS.IntSet (Forest Vertex)
chop [] = return []
chop (Node x ts:us) = do
visited <- contains x
if visited then
chop us
else do
include x
x1 <- chop ts
x2 <- chop us
return (Node x x1:x2)
prune :: Forest Vertex -> State IS.IntSet (Forest Vertex)
prune vs = chop vs
main = do
--edges <- V.fromList `fmap` getEdges "testdata.txt"
edges <- V.fromList `fmap` getEdges "SCC.txt"
let
-- calculate size of five largest SCC
maxIndex = fst $ V.last edges
gr = buildG maxIndex edges
sccRes = scc gr
big5 = take 5 sccRes
big5' = map (\l -> length $ postorder l) big5
putStrLn $ show $ big5'
contains :: Vertex -> State IS.IntSet Bool
contains v = state $ \visited -> (v `IS.member` visited, visited)
include :: Vertex -> State IS.IntSet ()
include v = state $ \visited -> ((), IS.insert v visited)
getEdges :: String -> IO [Edge]
getEdges path = do
lines <- (map BS.words . BS.lines) `fmap` BS.readFile path
let pairs = (map . map) (maybe (error "can't read Int") fst . BS.readInt) lines
return [(a, b) | [a, b] <- pairs]
vertices :: Graph -> [Vertex]
vertices gr = [1.. (V.length gr - 1)]
edges :: Graph -> [Edge]
edges g = [(u,v) | u <- vertices g, v <- g V.! u]
-- accumulate :: (a -> b -> a) -> Vector a-> Vector (Int, b)--> Vector a
-- accumulating function f
-- initial vector (of length m)
-- vector of index/value pairs (of length n)
buildG :: Int -> Table Edge -> Graph
buildG maxIndex edges = graph' where
graph = V.replicate (maxIndex + 1) []
--graph' = V.accumulate (\existing new -> new:existing) graph edges
-- flip f takes its (first) two arguments in the reverse order of f
graph' = V.accumulate (flip (:)) graph edges
mapT :: Ord a => (Vertex -> a -> b) -> Table a -> Table b
mapT = V.imap
outDegree :: Graph -> Table Int
outDegree g = mapT numEdges g
where numEdges v es = length es
indegree :: Graph -> Table Int
indegree g = outDegree $ transposeG g
transposeG :: Graph -> Graph
transposeG g = buildG (V.length g - 1) (reverseE g)
reverseE :: Graph -> Table Edge
reverseE g = V.fromList [(w, v) | (v,w) <- edges g]
-- --------------------------------------------------------------
postorder :: Tree a -> [a]
postorder (Node a ts) = postorderF ts ++ [a]
postorderF :: Forest a -> [a]
postorderF ts = concat (map postorder ts)
postOrd :: Graph -> [Vertex]
postOrd g = postorderF (dff g)
dfs :: Graph -> [Vertex] -> Forest Vertex
dfs g vs = map (generate g) vs
dfs' :: Graph -> [Vertex] -> Forest Vertex
dfs' g vs = fst $ runState (prune d) $ IS.fromList []
where d = dfs g vs
dff :: Graph -> Forest Vertex
dff g = dfs' g $ reverse (vertices g)
scc :: Graph -> Forest Vertex
scc g = dfs' g $ reverse $ postOrd (transposeG g)