Haskell: processing algorithm optimization algorithm

This is a continuation of this post , the code is now based on Structuring Depths — Haskell's first search algorithms to make the depth of the first search , king and Launchbury in the 1990s. This document proposes to generate and shorten the strategy, but uses a volatile array with a state monad (with some grammar with which I suspect it has been deprecated since then). The authors hint that the set can be used to remember visited nodes as the cost of an additional O (log n). I tried to implement using a set (now we have better machines than in the 1990s!), To use the modern State Monad syntax and use Vectors, not arrays (since I read that this is usually better).

As before, my code works on small data sets, but does not return on the 5m boundary graph, which I need to analyze, and I'm looking for clues about the weakness acting on the scale. I know that the code works conveniently in memory, so it doesn't problem, but did I accidentally slip to O (n2)? (On the contrary, the official implementation of this article in the Data.Graph library (in which I also borrowed some code recently) uses a mutable array, but it crashes in a large data set with ... Stack Overflow !!!)

So now I have a Vector data store with an IntSet state that is not completed, and an array with the ST status of ST Monad Array, which is crashing! Should Haskell be able to do better than that?

import Data.Vector (Vector)
import qualified Data.IntSet as IS
import qualified Data.Vector as V
import qualified Data.ByteString.Char8 as BS
import Control.Monad.State

type Vertex   = Int
type Table a  = Vector a
type Graph    = Table [Vertex]
type Edge     = (Vertex, Vertex)
data Tree a   = Node a (Forest a) deriving (Show,Eq)
type Forest a = [Tree a]
-- ghc -O2 -threaded --make
-- +RTS -Nx
generate :: Graph -> Vertex -> Tree Vertex
generate g v = Node v $ map (generate g) (g V.! v)

chop :: Forest Vertex -> State IS.IntSet (Forest Vertex)
chop [] = return []
chop (Node x ts:us) = do
    visited <- contains x
    if visited then
        chop us
    else do
        include x
        x1 <- chop ts
        x2 <- chop us
        return (Node x x1:x2)

prune :: Forest Vertex -> State IS.IntSet (Forest Vertex)
prune vs = chop vs

main = do
    --edges <- V.fromList `fmap` getEdges "testdata.txt"
    edges <- V.fromList `fmap` getEdges "SCC.txt"
    let 
        -- calculate size of five largest SCC
        maxIndex = fst $ V.last edges
        gr = buildG maxIndex edges
        sccRes = scc gr
        big5 = take 5 sccRes
        big5' = map (\l -> length $ postorder l) big5
    putStrLn $ show $ big5'

contains :: Vertex -> State IS.IntSet Bool
contains v = state $ \visited -> (v `IS.member` visited, visited)

include :: Vertex -> State IS.IntSet ()
include v = state $ \visited -> ((), IS.insert v visited)


getEdges :: String -> IO [Edge]
getEdges path = do
    lines <- (map BS.words . BS.lines) `fmap` BS.readFile path
    let pairs = (map . map) (maybe (error "can't read Int") fst . BS.readInt) lines
    return [(a, b) | [a, b] <- pairs] 

vertices :: Graph -> [Vertex]
vertices gr = [1.. (V.length gr - 1)]

edges :: Graph -> [Edge]
edges g = [(u,v) | u <- vertices g, v <- g V.! u]

-- accumulate :: (a -> b -> a)  -> Vector a-> Vector (Int, b)--> Vector a
-- accumulating function f
-- initial vector (of length m)
-- vector of index/value pairs (of length n)
buildG :: Int -> Table Edge -> Graph
buildG maxIndex edges = graph' where
    graph    = V.replicate (maxIndex + 1) []
    --graph'   = V.accumulate (\existing new -> new:existing) graph edges
    -- flip f takes its (first) two arguments in the reverse order of f
    graph'   = V.accumulate (flip (:)) graph edges

mapT :: Ord a => (Vertex -> a -> b) -> Table a -> Table b
mapT = V.imap

outDegree :: Graph -> Table Int
outDegree g = mapT numEdges g
    where numEdges v es = length es

indegree :: Graph -> Table Int
indegree g = outDegree $ transposeG g

transposeG :: Graph -> Graph
transposeG g = buildG (V.length g - 1) (reverseE g)

reverseE :: Graph -> Table Edge
reverseE g = V.fromList [(w, v) | (v,w) <- edges g]

-- --------------------------------------------------------------

postorder :: Tree a -> [a]
postorder (Node a ts) = postorderF ts ++ [a]

postorderF :: Forest a -> [a]
postorderF ts = concat (map postorder ts)

postOrd :: Graph -> [Vertex]
postOrd g = postorderF (dff g)

dfs :: Graph -> [Vertex] -> Forest Vertex
dfs g vs = map (generate g) vs

dfs' :: Graph -> [Vertex] -> Forest Vertex
dfs' g vs = fst $ runState (prune d) $ IS.fromList []
    where d = dfs g vs

dff :: Graph -> Forest Vertex
dff g = dfs' g $ reverse (vertices g)

scc :: Graph -> Forest Vertex
scc g = dfs' g $ reverse $ postOrd (transposeG g)
+4
1

:

type Edge = (Vertex, Vertex)

data Edge = Edge {-# UNPACK #-} !Vertex {-# UNPACK #-} !Vertex

7 3 . . @jberryman, unboxed vector Table Edge ( ).

generate :: Graph -> Vertex -> Tree Vertex
generate g v = Node v $ map (generate g) (g V.! v)

, , .!.

contains :: Vertex -> State IS.IntSet Bool
contains v = state $ \visited -> (v `IS.member` visited, visited)

get put $!.

include :: Vertex -> State IS.IntSet ()
include v = state $ \visited -> ((), IS.insert v visited)

modify' .

. /. , , .

+1

Source: https://habr.com/ru/post/1545720/


All Articles