Exercise: Compression Utility
Difficulty - Intermediate
Learning Objectives
- Master gzip compression and decompression
- Work with tar archives
- Practice efficient IO with readers/writers
- Implement archive creation and extraction
- Handle compressed file formats
Problem Statement
Create a compress package for file compression operations.
Implementation
1package compress
2
3import (
4 "archive/tar"
5 "compress/gzip"
6 "fmt"
7 "io"
8 "os"
9 "path/filepath"
10)
11
12// GzipFile compresses a file using gzip
13func GzipFile(src, dst string) error {
14 srcFile, err := os.Open(src)
15 if err != nil {
16 return err
17 }
18 defer srcFile.Close()
19
20 dstFile, err := os.Create(dst)
21 if err != nil {
22 return err
23 }
24 defer dstFile.Close()
25
26 gzipWriter := gzip.NewWriter(dstFile)
27 defer gzipWriter.Close()
28
29 _, err = io.Copy(gzipWriter, srcFile)
30 return err
31}
32
33// GunzipFile decompresses a gzip file
34func GunzipFile(src, dst string) error {
35 srcFile, err := os.Open(src)
36 if err != nil {
37 return err
38 }
39 defer srcFile.Close()
40
41 gzipReader, err := gzip.NewReader(srcFile)
42 if err != nil {
43 return err
44 }
45 defer gzipReader.Close()
46
47 dstFile, err := os.Create(dst)
48 if err != nil {
49 return err
50 }
51 defer dstFile.Close()
52
53 _, err = io.Copy(dstFile, gzipReader)
54 return err
55}
56
57// CreateTarGz creates a tar.gz archive from directory
58func CreateTarGz(srcDir, dstFile string) error {
59 outFile, err := os.Create(dstFile)
60 if err != nil {
61 return err
62 }
63 defer outFile.Close()
64
65 gzipWriter := gzip.NewWriter(outFile)
66 defer gzipWriter.Close()
67
68 tarWriter := tar.NewWriter(gzipWriter)
69 defer tarWriter.Close()
70
71 return filepath.Walk(srcDir, func(path string, info os.FileInfo, err error) error {
72 if err != nil {
73 return err
74 }
75
76 header, err := tar.FileInfoHeader(info, "")
77 if err != nil {
78 return err
79 }
80
81 relPath, err := filepath.Rel(srcDir, path)
82 if err != nil {
83 return err
84 }
85 header.Name = relPath
86
87 if err := tarWriter.WriteHeader(header); err != nil {
88 return err
89 }
90
91 if !info.IsDir() {
92 file, err := os.Open(path)
93 if err != nil {
94 return err
95 }
96 defer file.Close()
97
98 if _, err := io.Copy(tarWriter, file); err != nil {
99 return err
100 }
101 }
102
103 return nil
104 })
105}
106
107// ExtractTarGz extracts a tar.gz archive
108func ExtractTarGz(srcFile, dstDir string) error {
109 file, err := os.Open(srcFile)
110 if err != nil {
111 return err
112 }
113 defer file.Close()
114
115 gzipReader, err := gzip.NewReader(file)
116 if err != nil {
117 return err
118 }
119 defer gzipReader.Close()
120
121 tarReader := tar.NewReader(gzipReader)
122
123 for {
124 header, err := tarReader.Next()
125 if err == io.EOF {
126 break
127 }
128 if err != nil {
129 return err
130 }
131
132 target := filepath.Join(dstDir, header.Name)
133
134 switch header.Typeflag {
135 case tar.TypeDir:
136 if err := os.MkdirAll(target, 0755); err != nil {
137 return err
138 }
139 case tar.TypeReg:
140 outFile, err := os.Create(target)
141 if err != nil {
142 return err
143 }
144 if _, err := io.Copy(outFile, tarReader); err != nil {
145 outFile.Close()
146 return err
147 }
148 outFile.Close()
149 }
150 }
151
152 return nil
153}
Solution
Click to see the complete solution
The implementation demonstrates production-ready file compression utilities. Key concepts:
Core Operations
1. Gzip Compression:
- Streaming compression using
compress/gzip - Memory-efficient with
io.Copy - Proper resource cleanup with defer
2. Tar Archiving:
- Combines multiple files into single archive
- Preserves file metadata
- Directory structure maintained
3. Combined Tar.gz:
- Industry-standard format
- Efficient for distributing directories
- Used by Docker, package managers, etc.
Best Practices
1. Always Close Resources:
1defer srcFile.Close()
2defer dstFile.Close()
3defer gzipWriter.Close() // Flush compressed data
2. Error Handling:
1// Check errors at each step
2if err != nil {
3 return fmt.Errorf("failed to create archive: %w", err)
4}
3. Preserve Metadata:
1header, err := tar.FileInfoHeader(info, "")
2header.Name = relPath // Maintain directory structure
4. Security Considerations:
1// Prevent path traversal attacks when extracting
2target := filepath.Join(dstDir, header.Name)
3if !strings.HasPrefix(target, filepath.Clean(dstDir)) {
4 return fmt.Errorf("invalid path: %s", header.Name)
5}
Additional Features
Progress Tracking:
1type ProgressReader struct {
2 io.Reader
3 Total int64
4 Current int64
5 Callback func(int64, int64)
6}
7
8func Read(p []byte) {
9 n, err := pr.Reader.Read(p)
10 pr.Current += int64(n)
11 if pr.Callback != nil {
12 pr.Callback(pr.Current, pr.Total)
13 }
14 return n, err
15}
Compression Level:
1gzipWriter, err := gzip.NewWriterLevel(outFile, gzip.BestCompression)
2// Options: NoCompression, BestSpeed, BestCompression, DefaultCompression
Key Takeaways
- Streaming IO: Use io.Copy for efficient data transfer
- Layered Compression: Combine tar with gzip
- Resource Management: Always close readers/writers with defer
- File Permissions: Preserve permissions when archiving
- Error Handling: Check errors at every IO operation
- Security: Validate paths when extracting to prevent attacks
Related Topics
- Compression - Main compression tutorial
- File I/O - File operations