Why adding inline slows down my program

I was looking for a creation foldlthat works in endless lists, for situations where you could not get guarded recursion, but where, depending on the first argument, the second argument cannot be used.

For example, multiplication, where you usually need both arguments and protected recursion, does not work, but if the first argument is 0, you can short circuit.

So, I wrote the following function:

foldlp :: (b -> a -> b) -> (b -> Bool) -> b -> [a] -> b
foldlp f p = go where
    go b [] = b
    go b (x : xs) 
        | p b = go (f b x) xs
        | otherwise = b

And tested it with my short circuit function:

 mult :: Integer -> Integer -> Integer
 mult 0 _ = 0
 mult x y = x * y

 main :: IO ()
 main = print . <test_function>

The results obtained using -prof -fprof-auto -O2, +RTS -pwere as follows:

foldlp mult (/= 0) 1 $ replicate (10 ^ 7) 1
total time = 0.40 secs
total alloc = 480,049,336 bytes

foldlp mult (`seq` True) 1 $ replicate (10 ^ 7) 1
total time = 0.37 secs
total alloc = 480,049,336 bytes

foldl' mult 1 $ replicate (10 ^ 7) 1
total time = 0.37 secs
total alloc = 480,049,352 bytes

foldl mult 1 $ replicate (10 ^ 7) 1
total time = 0.74 secs
total alloc = 880,049,352 bytes

foldr mult 1 $ replicate (10 ^ 7) 1
total time = 0.87 secs
total alloc = 880,049,336 bytes

This was very promising, as my custom function allows flexible types of stringency, and also works with infinite lists.

, 0, foldr, foldr .

, thunks , ((1 + 2) + 3, (10 + 20) + 30) WHNF, foldl'.

foldl flip foldl (const True) foldl' flip foldl ( seq True). , -, .

, , , foldlp Foldable.

, , {-# INLINE foldlp #-}, , :

foldlp mult (/= 0) 1 $ replicate (10 ^ 7) 1
total time = 0.67 secs
total alloc = 800,049,336 bytes

, - . , , .

+4
1

GHC, pragma INLINE , , .

, , INLINE , GHC , .

, - ( -ddump-simpl ), , GHC. ​​ foldlp :

:

foldlp =
  \ (@ b_a10N)
    (@ a_a10O)
    (eta_B2 :: b_a10N -> a_a10O -> b_a10N)
    (eta1_B1 :: b_a10N -> Bool)
    (eta2_X3 :: b_a10N)
    (eta3_X5 :: [a_a10O]) ->
    letrec {
      go_s1Ao [Occ=LoopBreaker] :: b_a10N -> [a_a10O] -> b_a10N
      [LclId, Arity=2, Str=DmdType <L,U><S,1*U>]
      go_s1Ao =
        \ (b1_avT :: b_a10N) (ds_d1xQ :: [a_a10O]) ->
        -- Removed the actual definition of go for brevity,
        -- it the same in both cases
          }; } in
    go_s1Ao eta2_X3 eta3_X5

Non-:

foldlp =
  \ (@ b_a10N)
    (@ a_a10O)
    (f_avQ :: b_a10N -> a_a10O -> b_a10N)
    (p_avR :: b_a10N -> Bool) ->
    letrec {
      go_s1Am [Occ=LoopBreaker] :: b_a10N -> [a_a10O] -> b_a10N
      [LclId, Arity=2, Str=DmdType <L,U><S,1*U>]
      go_s1Am =
        \ (b1_avT :: b_a10N) (ds_d1xQ :: [a_a10O]) ->
        -- Removed the actual definition of go for brevity,
        -- it the same in both cases
          }; } in
    go_s1Am

. , foldlp go foldlp, . , ​​ , , .

, foldlp:

module Main where

foldlp :: (b -> a -> b) -> (b -> Bool) -> b -> [a] -> b
foldlp f p = go where
      go b [] = b
      go b (x : xs)
        | p b = go (f b x) xs
        | otherwise = b

{-# INLINE foldlpInline #-}
foldlpInline :: (b -> a -> b) -> (b -> Bool) -> b -> [a] -> b
foldlpInline f p = go where
      go b [] = b
      go b (x : xs)
        | p b = go (f b x) xs
        | otherwise = b


{-# INLINE foldlp' #-} -- So that the code is not optimized
foldlp' b [] = b
foldlp' b (x : xs)
        | (/= 0) b = foldlp' (mult b x) xs
        | otherwise = b

mult :: Integer -> Integer -> Integer
mult 0 _ = 0
mult x y = x * y

--main = print $ foldlp mult (/= 0) 1 $ replicate (10 ^ 7) 1
--main = print $ foldlpInline mult (/= 0) 1 $ replicate (10 ^ 7) 1
main = print $ foldlp' 1 $ replicate (10 ^ 7) 1

:

( ):

./test  0,42s user 0,01s system 96% cpu 0,446 total

(inlined):

./test  0,83s user 0,02s system 98% cpu 0,862 total

( -)

./test  0,42s user 0,01s system 99% cpu 0,432 total
+4
source

Source: https://habr.com/ru/post/1657462/


All Articles