Exercise: Regular Expression Processor
Difficulty - Intermediate
Learning Objectives
- Master the regexp package for pattern matching
- Learn to compile and cache regular expressions
- Practice named capture groups
- Implement common validation patterns
- Build text processing utilities
Problem Statement
Create a regexutil package with common regular expression utilities and validators.
Function Signatures
1package regexutil
2
3// Validator functions
4func ValidateEmail(email string) bool
5func ValidatePhone(phone string) bool
6func ValidateURL(url string) bool
7func ValidateIPv4(ip string) bool
8func ValidateHexColor(color string) bool
9func ValidatePassword(password string)
10
11// Extraction functions
12func ExtractEmails(text string) []string
13func ExtractURLs(text string) []string
14func ExtractPhoneNumbers(text string) []string
15func ExtractHashtags(text string) []string
16func ExtractMentions(text string) []string
17
18// Replacement functions
19func MaskCreditCard(text string) string
20func MaskEmail(text string) string
21func RedactSSN(text string) string
22
23// Parsing functions
24func ParseLogEntry(entry string)
25func ParseKeyValue(text string) map[string]string
Solution
Click to see the complete solution
1package regexutil
2
3import (
4 "regexp"
5 "strings"
6)
7
8// Compiled regex patterns
9var (
10 emailRe = regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)
11 phoneRe = regexp.MustCompile(`^\+?1?\d{10,14}$`)
12 urlRe = regexp.MustCompile(`^https?://[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(/.*)?$`)
13 ipv4Re = regexp.MustCompile(`^(\d{1,3}\.){3}\d{1,3}$`)
14 hexColorRe = regexp.MustCompile(`^#[0-9A-Fa-f]{6}$`)
15
16 // Extraction patterns
17 emailExtractRe = regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`)
18 urlExtractRe = regexp.MustCompile(`https?://[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}[^\s]*`)
19 phoneExtractRe = regexp.MustCompile(`\+?1?\d{10,14}`)
20 hashtagRe = regexp.MustCompile(`#\w+`)
21 mentionRe = regexp.MustCompile(`@\w+`)
22
23 // Masking patterns
24 creditCardRe = regexp.MustCompile(`\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b`)
25 ssnRe = regexp.MustCompile(`\b\d{3}-\d{2}-\d{4}\b`)
26
27 // Parsing patterns
28 logEntryRe = regexp.MustCompile(`^\[(?P<timestamp>[\d\-: ]+)\]:$`)
29 keyValueRe = regexp.MustCompile(`(\w+)=([^\s]+)`)
30)
31
32// ValidateEmail checks if email is valid
33func ValidateEmail(email string) bool {
34 return emailRe.MatchString(email)
35}
36
37// ValidatePhone checks if phone number is valid
38func ValidatePhone(phone string) bool {
39 cleaned := strings.ReplaceAll(strings.ReplaceAll(phone, "-", ""), " ", "")
40 return phoneRe.MatchString(cleaned)
41}
42
43// ValidateURL checks if URL is valid
44func ValidateURL(url string) bool {
45 return urlRe.MatchString(url)
46}
47
48// ValidateIPv4 checks if IP address is valid
49func ValidateIPv4(ip string) bool {
50 if !ipv4Re.MatchString(ip) {
51 return false
52 }
53 parts := strings.Split(ip, ".")
54 for _, part := range parts {
55 var num int
56 if _, err := fmt.Sscanf(part, "%d", &num); err != nil || num < 0 || num > 255 {
57 return false
58 }
59 }
60 return true
61}
62
63// ValidateHexColor checks if hex color is valid
64func ValidateHexColor(color string) bool {
65 return hexColorRe.MatchString(color)
66}
67
68// ValidatePassword validates password strength
69func ValidatePassword(password string) {
70 var errors []string
71
72 if len(password) < 8 {
73 errors = append(errors, "Password must be at least 8 characters")
74 }
75
76 hasUpper := regexp.MustCompile(`[A-Z]`).MatchString(password)
77 if !hasUpper {
78 errors = append(errors, "Password must contain uppercase letter")
79 }
80
81 hasLower := regexp.MustCompile(`[a-z]`).MatchString(password)
82 if !hasLower {
83 errors = append(errors, "Password must contain lowercase letter")
84 }
85
86 hasDigit := regexp.MustCompile(`\d`).MatchString(password)
87 if !hasDigit {
88 errors = append(errors, "Password must contain digit")
89 }
90
91 hasSpecial := regexp.MustCompile(`[!@#$%^&*(),.?":{}|<>]`).MatchString(password)
92 if !hasSpecial {
93 errors = append(errors, "Password must contain special character")
94 }
95
96 return len(errors) == 0, errors
97}
98
99// ExtractEmails finds all emails in text
100func ExtractEmails(text string) []string {
101 return emailExtractRe.FindAllString(text, -1)
102}
103
104// ExtractURLs finds all URLs in text
105func ExtractURLs(text string) []string {
106 return urlExtractRe.FindAllString(text, -1)
107}
108
109// ExtractPhoneNumbers finds all phone numbers in text
110func ExtractPhoneNumbers(text string) []string {
111 return phoneExtractRe.FindAllString(text, -1)
112}
113
114// ExtractHashtags finds all hashtags in text
115func ExtractHashtags(text string) []string {
116 return hashtagRe.FindAllString(text, -1)
117}
118
119// ExtractMentions finds all @mentions in text
120func ExtractMentions(text string) []string {
121 return mentionRe.FindAllString(text, -1)
122}
123
124// MaskCreditCard masks credit card numbers
125func MaskCreditCard(text string) string {
126 return creditCardRe.ReplaceAllStringFunc(text, func(cc string) string {
127 cleaned := strings.ReplaceAll(strings.ReplaceAll(cc, " ", ""), "-", "")
128 if len(cleaned) >= 16 {
129 return "****-****-****-" + cleaned[len(cleaned)-4:]
130 }
131 return cc
132 })
133}
134
135// MaskEmail masks email addresses
136func MaskEmail(text string) string {
137 return emailExtractRe.ReplaceAllStringFunc(text, func(email string) string {
138 parts := strings.Split(email, "@")
139 if len(parts) != 2 {
140 return email
141 }
142 username := parts[0]
143 if len(username) > 2 {
144 username = username[:2] + "***"
145 }
146 return username + "@" + parts[1]
147 })
148}
149
150// RedactSSN redacts social security numbers
151func RedactSSN(text string) string {
152 return ssnRe.ReplaceAllString(text, "***-**-****")
153}
154
155// ParseLogEntry parses a log entry into components
156func ParseLogEntry(entry string) {
157 matches := logEntryRe.FindStringSubmatch(entry)
158 if matches == nil {
159 return nil, fmt.Errorf("invalid log format")
160 }
161
162 result := make(map[string]string)
163 names := logEntryRe.SubexpNames()
164 for i, name := range names {
165 if i != 0 && name != "" {
166 result[name] = matches[i]
167 }
168 }
169 return result, nil
170}
171
172// ParseKeyValue parses key=value pairs from text
173func ParseKeyValue(text string) map[string]string {
174 result := make(map[string]string)
175 matches := keyValueRe.FindAllStringSubmatch(text, -1)
176 for _, match := range matches {
177 if len(match) == 3 {
178 result[match[1]] = match[2]
179 }
180 }
181 return result
182}
Key Takeaways
- Compile Once: Use package-level variables for frequently used patterns
- Named Groups: Use
(?P<name>...)for structured extraction - Raw Strings: Use backticks for regex patterns to avoid escaping
- Validation: Regex is great for format validation
- Performance: Compiled regex is much faster than string parsing
Related Topics
- Regular Expressions - Main regexp tutorial
- Text Processing - Text manipulation
- String Operations - String basics