Regular Expression Processor

Exercise: Regular Expression Processor

Difficulty - Intermediate

Learning Objectives

  • Master the regexp package for pattern matching
  • Learn to compile and cache regular expressions
  • Practice named capture groups
  • Implement common validation patterns
  • Build text processing utilities

Problem Statement

Create a regexutil package with common regular expression utilities and validators.

Function Signatures

 1package regexutil
 2
 3// Validator functions
 4func ValidateEmail(email string) bool
 5func ValidatePhone(phone string) bool
 6func ValidateURL(url string) bool
 7func ValidateIPv4(ip string) bool
 8func ValidateHexColor(color string) bool
 9func ValidatePassword(password string)
10
11// Extraction functions
12func ExtractEmails(text string) []string
13func ExtractURLs(text string) []string
14func ExtractPhoneNumbers(text string) []string
15func ExtractHashtags(text string) []string
16func ExtractMentions(text string) []string
17
18// Replacement functions
19func MaskCreditCard(text string) string
20func MaskEmail(text string) string
21func RedactSSN(text string) string
22
23// Parsing functions
24func ParseLogEntry(entry string)
25func ParseKeyValue(text string) map[string]string

Solution

Click to see the complete solution
  1package regexutil
  2
  3import (
  4	"regexp"
  5	"strings"
  6)
  7
  8// Compiled regex patterns
  9var (
 10	emailRe    = regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)
 11	phoneRe    = regexp.MustCompile(`^\+?1?\d{10,14}$`)
 12	urlRe      = regexp.MustCompile(`^https?://[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(/.*)?$`)
 13	ipv4Re     = regexp.MustCompile(`^(\d{1,3}\.){3}\d{1,3}$`)
 14	hexColorRe = regexp.MustCompile(`^#[0-9A-Fa-f]{6}$`)
 15
 16	// Extraction patterns
 17	emailExtractRe       = regexp.MustCompile(`[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}`)
 18	urlExtractRe         = regexp.MustCompile(`https?://[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}[^\s]*`)
 19	phoneExtractRe       = regexp.MustCompile(`\+?1?\d{10,14}`)
 20	hashtagRe            = regexp.MustCompile(`#\w+`)
 21	mentionRe            = regexp.MustCompile(`@\w+`)
 22
 23	// Masking patterns
 24	creditCardRe = regexp.MustCompile(`\b\d{4}[\s-]?\d{4}[\s-]?\d{4}[\s-]?\d{4}\b`)
 25	ssnRe        = regexp.MustCompile(`\b\d{3}-\d{2}-\d{4}\b`)
 26
 27	// Parsing patterns
 28	logEntryRe = regexp.MustCompile(`^\[(?P<timestamp>[\d\-: ]+)\]:$`)
 29	keyValueRe = regexp.MustCompile(`(\w+)=([^\s]+)`)
 30)
 31
 32// ValidateEmail checks if email is valid
 33func ValidateEmail(email string) bool {
 34	return emailRe.MatchString(email)
 35}
 36
 37// ValidatePhone checks if phone number is valid
 38func ValidatePhone(phone string) bool {
 39	cleaned := strings.ReplaceAll(strings.ReplaceAll(phone, "-", ""), " ", "")
 40	return phoneRe.MatchString(cleaned)
 41}
 42
 43// ValidateURL checks if URL is valid
 44func ValidateURL(url string) bool {
 45	return urlRe.MatchString(url)
 46}
 47
 48// ValidateIPv4 checks if IP address is valid
 49func ValidateIPv4(ip string) bool {
 50	if !ipv4Re.MatchString(ip) {
 51		return false
 52	}
 53	parts := strings.Split(ip, ".")
 54	for _, part := range parts {
 55		var num int
 56		if _, err := fmt.Sscanf(part, "%d", &num); err != nil || num < 0 || num > 255 {
 57			return false
 58		}
 59	}
 60	return true
 61}
 62
 63// ValidateHexColor checks if hex color is valid
 64func ValidateHexColor(color string) bool {
 65	return hexColorRe.MatchString(color)
 66}
 67
 68// ValidatePassword validates password strength
 69func ValidatePassword(password string) {
 70	var errors []string
 71
 72	if len(password) < 8 {
 73		errors = append(errors, "Password must be at least 8 characters")
 74	}
 75
 76	hasUpper := regexp.MustCompile(`[A-Z]`).MatchString(password)
 77	if !hasUpper {
 78		errors = append(errors, "Password must contain uppercase letter")
 79	}
 80
 81	hasLower := regexp.MustCompile(`[a-z]`).MatchString(password)
 82	if !hasLower {
 83		errors = append(errors, "Password must contain lowercase letter")
 84	}
 85
 86	hasDigit := regexp.MustCompile(`\d`).MatchString(password)
 87	if !hasDigit {
 88		errors = append(errors, "Password must contain digit")
 89	}
 90
 91	hasSpecial := regexp.MustCompile(`[!@#$%^&*(),.?":{}|<>]`).MatchString(password)
 92	if !hasSpecial {
 93		errors = append(errors, "Password must contain special character")
 94	}
 95
 96	return len(errors) == 0, errors
 97}
 98
 99// ExtractEmails finds all emails in text
100func ExtractEmails(text string) []string {
101	return emailExtractRe.FindAllString(text, -1)
102}
103
104// ExtractURLs finds all URLs in text
105func ExtractURLs(text string) []string {
106	return urlExtractRe.FindAllString(text, -1)
107}
108
109// ExtractPhoneNumbers finds all phone numbers in text
110func ExtractPhoneNumbers(text string) []string {
111	return phoneExtractRe.FindAllString(text, -1)
112}
113
114// ExtractHashtags finds all hashtags in text
115func ExtractHashtags(text string) []string {
116	return hashtagRe.FindAllString(text, -1)
117}
118
119// ExtractMentions finds all @mentions in text
120func ExtractMentions(text string) []string {
121	return mentionRe.FindAllString(text, -1)
122}
123
124// MaskCreditCard masks credit card numbers
125func MaskCreditCard(text string) string {
126	return creditCardRe.ReplaceAllStringFunc(text, func(cc string) string {
127		cleaned := strings.ReplaceAll(strings.ReplaceAll(cc, " ", ""), "-", "")
128		if len(cleaned) >= 16 {
129			return "****-****-****-" + cleaned[len(cleaned)-4:]
130		}
131		return cc
132	})
133}
134
135// MaskEmail masks email addresses
136func MaskEmail(text string) string {
137	return emailExtractRe.ReplaceAllStringFunc(text, func(email string) string {
138		parts := strings.Split(email, "@")
139		if len(parts) != 2 {
140			return email
141		}
142		username := parts[0]
143		if len(username) > 2 {
144			username = username[:2] + "***"
145		}
146		return username + "@" + parts[1]
147	})
148}
149
150// RedactSSN redacts social security numbers
151func RedactSSN(text string) string {
152	return ssnRe.ReplaceAllString(text, "***-**-****")
153}
154
155// ParseLogEntry parses a log entry into components
156func ParseLogEntry(entry string) {
157	matches := logEntryRe.FindStringSubmatch(entry)
158	if matches == nil {
159		return nil, fmt.Errorf("invalid log format")
160	}
161
162	result := make(map[string]string)
163	names := logEntryRe.SubexpNames()
164	for i, name := range names {
165		if i != 0 && name != "" {
166			result[name] = matches[i]
167		}
168	}
169	return result, nil
170}
171
172// ParseKeyValue parses key=value pairs from text
173func ParseKeyValue(text string) map[string]string {
174	result := make(map[string]string)
175	matches := keyValueRe.FindAllStringSubmatch(text, -1)
176	for _, match := range matches {
177		if len(match) == 3 {
178			result[match[1]] = match[2]
179		}
180	}
181	return result
182}

Key Takeaways

  1. Compile Once: Use package-level variables for frequently used patterns
  2. Named Groups: Use (?P<name>...) for structured extraction
  3. Raw Strings: Use backticks for regex patterns to avoid escaping
  4. Validation: Regex is great for format validation
  5. Performance: Compiled regex is much faster than string parsing