Go ZIP Compression

I am trying to create an archive zipfrom a large number of small and medium files. I want to be able to do this at the same time, since the compression is intense, and I work on a multi-core server. Also, I do not want to have the entire archive in memory, since it can be large.

My question is, do I need to compress each file and then merge manually, merge everything together with the zip header, checksum, etc.?

Any help would be greatly appreciated.

+4
source share
2 answers

I don't think you can combine zip headers.

, zip.Writer , goroutine, goroutine , goroutine, .

IO, , , , , .

. , , ,

  • , , - ,
  • defer, , .

defer LIFO, , .

package main

import (
    "archive/zip"
    "io"
    "os"
    "sync"
)

func ZipWriter(files chan *os.File) *sync.WaitGroup {
    f, err := os.Create("out.zip")
    if err != nil {
        panic(err)
    }
    var wg sync.WaitGroup
    wg.Add(1)
    zw := zip.NewWriter(f)
    go func() {
        // Note the order (LIFO):
        defer wg.Done() // 2. signal that we're done
        defer f.Close() // 1. close the file
        var err error
        var fw io.Writer
        for f := range files {
            // Loop until channel is closed.
            if fw, err = zw.Create(f.Name()); err != nil {
                panic(err)
            }
            io.Copy(fw, f)
            if err = f.Close(); err != nil {
                panic(err)
            }
        }
        // The zip writer must be closed *before* f.Close() is called!
        if err = zw.Close(); err != nil {
            panic(err)
        }
    }()
    return &wg
}

func main() {
    files := make(chan *os.File)
    wait := ZipWriter(files)

    // Send all files to the zip writer.
    var wg sync.WaitGroup
    wg.Add(len(os.Args)-1)
    for i, name := range os.Args {
        if i == 0 {
            continue
        }
        // Read each file in parallel:
        go func(name string) {
            defer wg.Done()
            f, err := os.Open(name)
            if err != nil {
                panic(err)
            }
            files <- f
        }(name)
    }

    wg.Wait()
    // Once we're done sending the files, we can close the channel.
    close(files)
    // This will cause ZipWriter to break out of the loop, close the file,
    // and unblock the next mutex:
    wait.Wait()
}

: go run example.go /path/to/*.log.

, :

  • .
  • zip.Writer .
  • goroutine, .
  • , .
  • goroutine, 3.
  • goroutine , .
  • goroutine, .
  • , zip ( ).
  • , zip ( ), zip-writer .
  • zip-, .
  • , , sync.WaitGroup, , . ( , sync.WaitGroup .)
  • zip-, , main .

, zip- " " - . , . - .

+3

archive/zip, :

  • io.Writer, zip.Writer.Create CreateHeader.
  • Create/CreateHeader , .

, , Create, goroutines .

, , , :

  • goroutines compress/flate CRC32 . . .
  • , Zip , .
  • , .
  • . .

parallelism 1 , , , .

- .

+2

Source: https://habr.com/ru/post/1536056/


All Articles