Production-Ready Error Handling
Exercise Overview
Build a robust error handling system for a microservice that handles payment processing. You'll implement structured errors, retry mechanisms, circuit breakers, and comprehensive error observability.
Learning Objectives
- Implement structured error types with error codes and metadata
- Build retry mechanisms with exponential backoff
- Create circuit breakers for fault tolerance
- Add error observability and monitoring
- Handle error context propagation across service boundaries
Initial Code
1package main
2
3import (
4 "context"
5 "encoding/json"
6 "fmt"
7 "log"
8 "math/rand"
9 "net/http"
10 "time"
11)
12
13// TODO: Implement structured error types
14type PaymentError struct {
15 // Add error fields
16}
17
18// TODO: Implement error codes
19type ErrorCode string
20
21const (
22 ErrInvalidAmount ErrorCode = "INVALID_AMOUNT"
23 ErrInsufficientFunds ErrorCode = "INSUFFICIENT_FUNDS"
24 ErrCardDeclined ErrorCode = "CARD_DECLINED"
25 ErrServiceUnavailable ErrorCode = "SERVICE_UNAVAILABLE"
26)
27
28// TODO: Implement error interface methods
29func Error() string {
30 return "" // Implement
31}
32
33func Code() ErrorCode {
34 return "" // Implement
35}
36
37// TODO: Implement retry mechanism with exponential backoff
38type RetryConfig struct {
39 MaxRetries int
40 BaseDelay time.Duration
41 MaxDelay time.Duration
42}
43
44func WithRetry(ctx context.Context, config RetryConfig, fn func() error) error {
45 // Implement retry logic with exponential backoff
46 return nil
47}
48
49// TODO: Implement circuit breaker
50type CircuitState int
51
52const (
53 StateClosed CircuitState = iota
54 StateOpen
55 StateHalfOpen
56)
57
58type CircuitBreaker struct {
59 state CircuitState
60 failures int
61 threshold int
62 timeout time.Duration
63 lastFailure time.Time
64}
65
66func Call(fn func() error) error {
67 // Implement circuit breaker logic
68 return nil
69}
70
71// TODO: Implement error observability
72type ErrorMetrics struct {
73 // Add metrics fields
74}
75
76func RecordError(err error) {
77 // Implement error recording
78}
79
80// Mock payment service that randomly fails
81type PaymentService struct {
82 metrics *ErrorMetrics
83 cb *CircuitBreaker
84}
85
86func ProcessPayment(ctx context.Context, amount float64, cardNumber string) error {
87 // TODO: Add input validation
88 // TODO: Add error context
89 // TODO: Implement retry logic
90 // TODO: Add circuit breaker protection
91 // TODO: Record metrics
92
93 // Simulate payment processing
94 if rand.Float32() < 0.3 { // 30% failure rate
95 return &PaymentError{
96 // Create appropriate error
97 }
98 }
99
100 return nil
101}
102
103func main() {
104 paymentService := &PaymentService{
105 metrics: &ErrorMetrics{},
106 cb: &CircuitBreaker{
107 threshold: 5,
108 timeout: time.Minute,
109 },
110 }
111
112 // TODO: Implement HTTP handler with proper error handling
113 http.HandleFunc("/payment", func(w http.ResponseWriter, r *http.Request) {
114 // Implement payment endpoint with error handling
115 })
116
117 fmt.Println("Payment service starting on :8080")
118 log.Fatal(http.ListenAndServe(":8080", nil))
119}
Tasks
Task 1: Structured Error Types
Implement the PaymentError struct with proper error codes and metadata:
1type PaymentError struct {
2 Code ErrorCode `json:"code"`
3 Message string `json:"message"`
4 Details map[string]interface{} `json:"details,omitempty"`
5 Timestamp time.Time `json:"timestamp"`
6 RequestID string `json:"request_id,omitempty"`
7}
Task 2: Error Interface Methods
Implement Error() and Code() methods, plus additional methods for error handling:
1func Is(target error) bool
2func Unwrap() error
3func WithDetail(key string, value interface{}) *PaymentError
4func WithRequestID(id string) *PaymentError
Task 3: Retry Mechanism
Implement exponential backoff with jitter:
1func WithRetry(ctx context.Context, config RetryConfig, fn func() error) error {
2 var lastErr error
3
4 for attempt := 0; attempt <= config.MaxRetries; attempt++ {
5 if attempt > 0 {
6 delay := calculateBackoff(attempt, config)
7 select {
8 case <-time.After(delay):
9 case <-ctx.Done():
10 return ctx.Err()
11 }
12 }
13
14 if err := fn(); err == nil {
15 return nil
16 } else {
17 lastErr = err
18 // Check if error is retryable
19 if !isRetryable(err) {
20 return err
21 }
22 }
23 }
24
25 return lastErr
26}
Task 4: Circuit Breaker
Implement the circuit breaker pattern with state transitions:
1func Call(fn func() error) error {
2 cb.mutex.Lock()
3 defer cb.mutex.Unlock()
4
5 switch cb.state {
6 case StateOpen:
7 if time.Since(cb.lastFailure) > cb.timeout {
8 cb.state = StateHalfOpen
9 } else {
10 return errors.New("circuit breaker is open")
11 }
12 case StateHalfOpen:
13 // Allow one call through
14 }
15
16 err := fn()
17 if err != nil {
18 cb.onFailure()
19 } else {
20 cb.onSuccess()
21 }
22
23 return err
24}
Task 5: Error Observability
Implement error metrics and structured logging:
1type ErrorMetrics struct {
2 TotalErrors int64 `json:"total_errors"`
3 ErrorsByCode map[ErrorCode]int `json:"errors_by_code"`
4 ErrorsByType map[string]int `json:"errors_by_type"`
5 RetryAttempts int64 `json:"retry_attempts"`
6 CircuitBreakerTrips int64 `json:"circuit_breaker_trips"`
7}
8
9func RecordError(err error) {
10 em.TotalErrors++
11
12 if paymentErr, ok := err.(*PaymentError); ok {
13 em.ErrorsByCode[paymentErr.Code()]++
14 }
15
16 em.ErrorsByType[reflect.TypeOf(err).Name()]++
17
18 // Log structured error
19 logData := map[string]interface{}{
20 "error": err.Error(),
21 "timestamp": time.Now(),
22 "total_errors": em.TotalErrors,
23 }
24
25 jsonData, _ := json.Marshal(logData)
26 log.Printf("ERROR: %s", string(jsonData))
27}
Solution Approach
Click to see detailed solution
Complete Implementation:
1package main
2
3import (
4 "context"
5 "encoding/json"
6 "errors"
7 "fmt"
8 "log"
9 "math"
10 "math/rand"
11 "net/http"
12 "reflect"
13 "sync"
14 "time"
15)
16
17type ErrorCode string
18
19const (
20 ErrInvalidAmount ErrorCode = "INVALID_AMOUNT"
21 ErrInsufficientFunds ErrorCode = "INSUFFICIENT_FUNDS"
22 ErrCardDeclined ErrorCode = "CARD_DECLINED"
23 ErrServiceUnavailable ErrorCode = "SERVICE_UNAVAILABLE"
24)
25
26type PaymentError struct {
27 Code ErrorCode `json:"code"`
28 Message string `json:"message"`
29 Details map[string]interface{} `json:"details,omitempty"`
30 Timestamp time.Time `json:"timestamp"`
31 RequestID string `json:"request_id,omitempty"`
32 wrapped error `json:"-"`
33}
34
35func Error() string {
36 if e.wrapped != nil {
37 return fmt.Sprintf("[%s] %s: %v", e.Code, e.Message, e.wrapped)
38 }
39 return fmt.Sprintf("[%s] %s", e.Code, e.Message)
40}
41
42func Code() ErrorCode {
43 return e.Code
44}
45
46func Is(target error) bool {
47 if t, ok := target.(*PaymentError); ok {
48 return e.Code == t.Code
49 }
50 return false
51}
52
53func Unwrap() error {
54 return e.wrapped
55}
56
57func WithDetail(key string, value interface{}) *PaymentError {
58 if e.Details == nil {
59 e.Details = make(map[string]interface{})
60 }
61 e.Details[key] = value
62 return e
63}
64
65func WithRequestID(id string) *PaymentError {
66 e.RequestID = id
67 return e
68}
69
70func NewPaymentError(code ErrorCode, message string, wrapped error) *PaymentError {
71 return &PaymentError{
72 Code: code,
73 Message: message,
74 Timestamp: time.Now(),
75 wrapped: wrapped,
76 }
77}
78
79type RetryConfig struct {
80 MaxRetries int
81 BaseDelay time.Duration
82 MaxDelay time.Duration
83 Multiplier float64
84 Jitter float64
85}
86
87func calculateBackoff(attempt int, config RetryConfig) time.Duration {
88 delay := float64(config.BaseDelay) * math.Pow(config.Multiplier, float64(attempt-1))
89 if delay > float64(config.MaxDelay) {
90 delay = float64(config.MaxDelay)
91 }
92
93 // Add jitter
94 jitter := delay * config.Jitter * rand.Float64()
95 delay += jitter
96
97 return time.Duration(delay)
98}
99
100func isRetryable(err error) bool {
101 if paymentErr, ok := err.(*PaymentError); ok {
102 switch paymentErr.Code {
103 case ErrServiceUnavailable, ErrCardDeclined:
104 return true
105 case ErrInvalidAmount, ErrInsufficientFunds:
106 return false
107 }
108 }
109 return true
110}
111
112func WithRetry(ctx context.Context, config RetryConfig, fn func() error) error {
113 var lastErr error
114
115 for attempt := 0; attempt <= config.MaxRetries; attempt++ {
116 if attempt > 0 {
117 delay := calculateBackoff(attempt, config)
118 select {
119 case <-time.After(delay):
120 case <-ctx.Done():
121 return ctx.Err()
122 }
123 }
124
125 if err := fn(); err == nil {
126 return nil
127 } else {
128 lastErr = err
129 if !isRetryable(err) {
130 return err
131 }
132 }
133 }
134
135 return lastErr
136}
137
138type CircuitState int
139
140const (
141 StateClosed CircuitState = iota
142 StateOpen
143 StateHalfOpen
144)
145
146type CircuitBreaker struct {
147 mutex sync.Mutex
148 state CircuitState
149 failures int
150 threshold int
151 timeout time.Duration
152 lastFailure time.Time
153 successCount int
154}
155
156func Call(fn func() error) error {
157 cb.mutex.Lock()
158 defer cb.mutex.Unlock()
159
160 switch cb.state {
161 case StateOpen:
162 if time.Since(cb.lastFailure) > cb.timeout {
163 cb.state = StateHalfOpen
164 cb.successCount = 0
165 } else {
166 return errors.New("circuit breaker is open")
167 }
168 case StateHalfOpen:
169 if cb.successCount > 0 {
170 cb.state = StateClosed
171 cb.failures = 0
172 }
173 }
174
175 err := fn()
176 if err != nil {
177 cb.onFailure()
178 } else {
179 cb.onSuccess()
180 }
181
182 return err
183}
184
185func onFailure() {
186 cb.failures++
187 cb.lastFailure = time.Now()
188
189 if cb.failures >= cb.threshold {
190 cb.state = StateOpen
191 }
192}
193
194func onSuccess() {
195 cb.failures = 0
196 if cb.state == StateHalfOpen {
197 cb.successCount++
198 }
199}
200
201type ErrorMetrics struct {
202 mutex sync.Mutex
203 TotalErrors int64 `json:"total_errors"`
204 ErrorsByCode map[ErrorCode]int `json:"errors_by_code"`
205 ErrorsByType map[string]int `json:"errors_by_type"`
206 RetryAttempts int64 `json:"retry_attempts"`
207 CircuitBreakerTrips int64 `json:"circuit_breaker_trips"`
208}
209
210func RecordError(err error) {
211 em.mutex.Lock()
212 defer em.mutex.Unlock()
213
214 em.TotalErrors++
215
216 if paymentErr, ok := err.(*PaymentError); ok {
217 if em.ErrorsByCode == nil {
218 em.ErrorsByCode = make(map[ErrorCode]int)
219 }
220 em.ErrorsByCode[paymentErr.Code()]++
221 }
222
223 errorType := reflect.TypeOf(err).Name()
224 if em.ErrorsByType == nil {
225 em.ErrorsByType = make(map[string]int)
226 }
227 em.ErrorsByType[errorType]++
228
229 // Log structured error
230 logData := map[string]interface{}{
231 "error": err.Error(),
232 "timestamp": time.Now(),
233 "total_errors": em.TotalErrors,
234 "error_type": errorType,
235 }
236
237 if paymentErr, ok := err.(*PaymentError); ok {
238 logData["error_code"] = paymentErr.Code()
239 logData["request_id"] = paymentErr.RequestID
240 if paymentErr.Details != nil {
241 logData["details"] = paymentErr.Details
242 }
243 }
244
245 jsonData, _ := json.Marshal(logData)
246 log.Printf("ERROR: %s", string(jsonData))
247}
248
249func RecordRetryAttempt() {
250 em.mutex.Lock()
251 defer em.mutex.Unlock()
252 em.RetryAttempts++
253}
254
255func RecordCircuitBreakerTrip() {
256 em.mutex.Lock()
257 defer em.mutex.Unlock()
258 em.CircuitBreakerTrips++
259}
260
261type PaymentService struct {
262 metrics *ErrorMetrics
263 cb *CircuitBreaker
264}
265
266func ProcessPayment(ctx context.Context, amount float64, cardNumber string) error {
267 // Input validation
268 if amount <= 0 {
269 err := NewPaymentError(ErrInvalidAmount,
270 fmt.Sprintf("Invalid payment amount: %.2f", amount), nil)
271 ps.metrics.RecordError(err)
272 return err
273 }
274
275 if len(cardNumber) < 13 || len(cardNumber) > 19 {
276 err := NewPaymentError(ErrCardDeclined,
277 "Invalid card number", nil)
278 ps.metrics.RecordError(err)
279 return err
280 }
281
282 // Create error context
283 requestID := fmt.Sprintf("req_%d", time.Now().UnixNano())
284 ctx = context.WithValue(ctx, "request_id", requestID)
285
286 // Process payment with retry
287 retryConfig := RetryConfig{
288 MaxRetries: 3,
289 BaseDelay: 100 * time.Millisecond,
290 MaxDelay: 5 * time.Second,
291 Multiplier: 2.0,
292 Jitter: 0.1,
293 }
294
295 return WithRetry(ctx, retryConfig, func() error {
296 return ps.cb.Call(func() error {
297 return ps.processPaymentInternal(ctx, amount, cardNumber, requestID)
298 })
299 })
300}
301
302func processPaymentInternal(ctx context.Context, amount float64, cardNumber string, requestID string) error {
303 // Simulate payment processing with random failures
304 if rand.Float32() < 0.3 { // 30% failure rate
305 var errorCode ErrorCode
306 var message string
307
308 randVal := rand.Float32()
309 switch {
310 case randVal < 0.2:
311 errorCode = ErrInsufficientFunds
312 message = "Insufficient funds"
313 case randVal < 0.5:
314 errorCode = ErrCardDeclined
315 message = "Card declined by issuer"
316 default:
317 errorCode = ErrServiceUnavailable
318 message = "Payment service temporarily unavailable"
319 }
320
321 err := NewPaymentError(errorCode, message, nil).
322 WithRequestID(requestID).
323 WithDetail("amount", amount).
324 WithDetail("card_last4", cardNumber[len(cardNumber)-4:])
325
326 ps.metrics.RecordError(err)
327 return err
328 }
329
330 log.Printf("Payment processed successfully: amount=%.2f, request_id=%s", amount, requestID)
331 return nil
332}
333
334func main() {
335 paymentService := &PaymentService{
336 metrics: &ErrorMetrics{},
337 cb: &CircuitBreaker{
338 threshold: 5,
339 timeout: time.Minute,
340 },
341 }
342
343 http.HandleFunc("/payment", func(w http.ResponseWriter, r *http.Request) {
344 ctx := r.Context()
345
346 // Parse request
347 type PaymentRequest struct {
348 Amount float64 `json:"amount"`
349 CardNumber string `json:"card_number"`
350 }
351
352 var req PaymentRequest
353 if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
354 http.Error(w, "Invalid request body", http.StatusBadRequest)
355 return
356 }
357
358 // Process payment
359 err := paymentService.ProcessPayment(ctx, req.Amount, req.CardNumber)
360 if err != nil {
361 if paymentErr, ok := err.(*PaymentError); ok {
362 w.Header().Set("Content-Type", "application/json")
363 w.WriteHeader(http.StatusBadRequest)
364 json.NewEncoder(w).Encode(paymentErr)
365 } else {
366 http.Error(w, "Internal server error", http.StatusInternalServerError)
367 }
368 return
369 }
370
371 w.Header().Set("Content-Type", "application/json")
372 json.NewEncoder(w).Encode(map[string]string{
373 "status": "success",
374 "message": "Payment processed successfully",
375 })
376 })
377
378 fmt.Println("Payment service starting on :8080")
379 log.Fatal(http.ListenAndServe(":8080", nil))
380}
Testing Your Solution
Test your implementation with these scenarios:
1# Test successful payment
2curl -X POST http://localhost:8080/payment \
3 -H "Content-Type: application/json" \
4 -d '{"amount": 100.00, "card_number": "4111111111111111"}'
5
6# Test invalid amount
7curl -X POST http://localhost:8080/payment \
8 -H "Content-Type: application/json" \
9 -d '{"amount": -50.00, "card_number": "4111111111111111"}'
10
11# Test invalid card
12curl -X POST http://localhost:8080/payment \
13 -H "Content-Type: application/json" \
14 -d '{"amount": 100.00, "card_number": "123"}'
Verify that:
- Errors are properly structured with codes and metadata
- Retry mechanism works for retryable errors
- Circuit breaker opens after threshold failures
- Error metrics are tracked and logged
- Request IDs are propagated through error context
Extension Challenges
- Add distributed tracing - Implement OpenTelemetry integration
- Implement error rate limiting - Throttle errors during outages
- Add error aggregation - Group similar errors for analysis
- Implement graceful degradation - Fallback behaviors for different error types
- Add error recovery patterns - Automatic recovery procedures
Key Takeaways
- Structured errors provide better debugging and monitoring
- Retry with backoff improves resilience against transient failures
- Circuit breakers prevent cascading failures
- Error observability is crucial for production systems
- Context propagation helps track errors across service boundaries