# SQL Examples and Usage Patterns

This document provides comprehensive SQL examples for the Distributed Database system.

## Current Status

The SQL interface is currently in **placeholder** status. This document shows the intended SQL syntax and provides workarounds using the current key-value interface.

## Intended SQL Interface (Future Release)

### DDL (Data Definition Language)

#### CREATE TABLE

```sql
-- Simple table creation
CREATE TABLE users (
  id SERIAL PRIMARY KEY,
  email VARCHAR(255) UNIQUE NOT NULL,
  name VARCHAR(255),
  age INT,
  created_at TIMESTAMP DEFAULT NOW()
);

-- Table with constraints
CREATE TABLE orders (
  id BIGSERIAL PRIMARY KEY,
  user_id INT NOT NULL,
  product_id INT NOT NULL,
  quantity INT CHECK (quantity > 0),
  total DECIMAL(10, 2),
  status VARCHAR(20) DEFAULT 'pending',
  created_at TIMESTAMP DEFAULT NOW(),
  FOREIGN KEY (user_id) REFERENCES users(id),
  INDEX idx_user_id (user_id),
  INDEX idx_status (status)
);

-- Partitioned table (for very large tables)
CREATE TABLE events (
  id BIGSERIAL,
  user_id INT,
  event_type VARCHAR(50),
  timestamp TIMESTAMP,
  data JSON
) PARTITION BY RANGE (timestamp);
```

#### ALTER TABLE

```sql
-- Add column
ALTER TABLE users ADD COLUMN phone VARCHAR(20);

-- Drop column
ALTER TABLE users DROP COLUMN phone;

-- Add index
ALTER TABLE users ADD INDEX idx_email (email);

-- Add constraint
ALTER TABLE orders ADD CONSTRAINT chk_total CHECK (total > 0);
```

#### DROP TABLE

```sql
DROP TABLE users CASCADE;  -- CASCADE drops dependent objects
DROP TABLE IF EXISTS temp_table;
```

### DML (Data Manipulation Language)

#### INSERT

```sql
-- Single row insert
INSERT INTO users (email, name, age)
VALUES ('alice@example.com', 'Alice', 30);

-- Multiple row insert
INSERT INTO users (email, name, age) VALUES
('bob@example.com', 'Bob', 28),
('charlie@example.com', 'Charlie', 35),
('diana@example.com', 'Diana', 27);

-- Insert from select
INSERT INTO users_backup
SELECT * FROM users WHERE created_at < '2024-01-01';

-- Insert with defaults
INSERT INTO orders (user_id, product_id, quantity, total)
VALUES (1, 123, 2, 99.99);
-- status defaults to 'pending', created_at defaults to NOW()
```

#### SELECT

```sql
-- Simple select
SELECT id, email, name FROM users;

-- Select with WHERE
SELECT id, email, name FROM users
WHERE age > 25;

-- Select with AND/OR
SELECT id, email, name, age FROM users
WHERE age > 25 AND (name LIKE 'A%' OR age < 30);

-- Select with JOIN
SELECT u.name, o.id, o.total
FROM users u
JOIN orders o ON u.id = o.user_id
WHERE u.country = 'US';

-- Select with GROUP BY and aggregation
SELECT
  user_id,
  COUNT(*) as order_count,
  SUM(total) as total_spent,
  AVG(total) as avg_order,
  MAX(total) as largest_order
FROM orders
GROUP BY user_id
HAVING COUNT(*) > 5
ORDER BY total_spent DESC;

-- Select with DISTINCT
SELECT DISTINCT country FROM users;

-- Select with LIMIT and OFFSET
SELECT * FROM users
ORDER BY created_at DESC
LIMIT 10 OFFSET 20;

-- Select with subquery
SELECT u.id, u.name
FROM users u
WHERE u.id IN (
  SELECT user_id FROM orders WHERE total > 1000
);

-- Select with window functions
SELECT
  user_id,
  order_date,
  total,
  SUM(total) OVER (PARTITION BY user_id) as user_total,
  ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY order_date) as order_num
FROM orders;
```

#### UPDATE

```sql
-- Simple update
UPDATE users SET name = 'Alice Smith' WHERE id = 1;

-- Update multiple columns
UPDATE users
SET age = 31, updated_at = NOW()
WHERE email = 'alice@example.com';

-- Update with expression
UPDATE orders
SET total = total * 1.1  -- 10% increase
WHERE created_at > '2024-10-01';

-- Update from join
UPDATE orders o
SET status = 'shipped'
FROM shipments s
WHERE o.id = s.order_id AND s.shipped = true;

-- Update with subquery
UPDATE users
SET status = 'premium'
WHERE id IN (
  SELECT user_id FROM orders
  GROUP BY user_id
  HAVING SUM(total) > 5000
);
```

#### DELETE

```sql
-- Simple delete
DELETE FROM users WHERE id = 1;

-- Delete with condition
DELETE FROM orders WHERE status = 'cancelled';

-- Delete with subquery
DELETE FROM users
WHERE id NOT IN (
  SELECT user_id FROM orders
);

-- Delete with JOIN
DELETE FROM orders o
USING users u
WHERE o.user_id = u.id
AND u.deleted_at IS NOT NULL;
```

### Query Patterns

#### E-Commerce: Product Search

```sql
-- Find products in category with good ratings
SELECT
  p.id,
  p.name,
  p.price,
  r.avg_rating,
  r.review_count
FROM products p
LEFT JOIN product_ratings r ON p.id = r.product_id
WHERE p.category = 'electronics'
  AND p.price BETWEEN 100 AND 1000
  AND r.avg_rating >= 4.0
  AND p.in_stock = true
ORDER BY r.avg_rating DESC, p.price ASC
LIMIT 20;
```

#### User Analytics: Monthly Active Users

```sql
-- Find users active in last 30 days
SELECT
  DATE_TRUNC('day', DATE_TRUNC('month', last_login)) as login_month,
  COUNT(*) as users_active,
  COUNT(CASE WHEN subscription = 'premium' THEN 1 END) as premium_users,
  COUNT(CASE WHEN subscription = 'free' THEN 1 END) as free_users
FROM users
WHERE last_login >= NOW() - INTERVAL 30 DAY
GROUP BY DATE_TRUNC('day', DATE_TRUNC('month', last_login))
ORDER BY login_month DESC;
```

#### Financial: Transaction Reconciliation

```sql
-- Reconcile accounts with journal entries
SELECT
  a.account_id,
  a.account_name,
  a.balance as account_balance,
  SUM(j.amount) as journal_total,
  a.balance - SUM(j.amount) as difference
FROM accounts a
LEFT JOIN journal_entries j ON a.id = j.account_id
  AND j.date >= '2024-01-01'
GROUP BY a.account_id, a.account_name, a.balance
HAVING a.balance != SUM(j.amount)
ORDER BY difference DESC;
```

#### Social Network: Friend Recommendations

```sql
-- Suggest friends based on mutual connections
SELECT
  f1.friend_id,
  u.name,
  u.email,
  COUNT(*) as mutual_friends,
  MAX(f2.created_at) as latest_mutual
FROM friends f1
JOIN friends f2 ON f1.friend_id = f2.user_id
JOIN users u ON f1.friend_id = u.id
WHERE f1.user_id = 123  -- Current user
  AND f2.friend_id = 123
  AND NOT EXISTS (
    SELECT 1 FROM friends f3
    WHERE f3.user_id = 123 AND f3.friend_id = f1.friend_id
  )
GROUP BY f1.friend_id, u.name, u.email
ORDER BY mutual_friends DESC
LIMIT 10;
```

#### Time Series: Metrics Aggregation

```sql
-- Aggregate system metrics hourly
SELECT
  DATE_TRUNC('hour', timestamp) as hour,
  server_id,
  AVG(cpu_percent) as avg_cpu,
  MAX(cpu_percent) as max_cpu,
  AVG(memory_mb) as avg_memory,
  MAX(memory_mb) as max_memory,
  AVG(disk_io_bytes_per_sec) as avg_disk_io,
  COUNT(*) as sample_count
FROM system_metrics
WHERE timestamp >= NOW() - INTERVAL 24 HOUR
GROUP BY DATE_TRUNC('hour', timestamp), server_id
ORDER BY hour DESC, server_id;
```

## Working Around Current Limitations

Until full SQL is implemented, use these patterns with the key-value interface.

### Current Key-Value Interface

```bash
# Connect to database
./bin/dbclient --addr=localhost:9001

# Database commands available:
# SET key value
# GET key
# DELETE key
# SCAN start_key end_key
# EXISTS key
# KEYS pattern
# BEGIN / COMMIT / ROLLBACK (transactions)
```

### Pattern 1: Simulating Tables with Prefixes

**SQL Equivalent:**
```sql
INSERT INTO users (id, email, name) VALUES (1, 'alice@example.com', 'Alice');
SELECT * FROM users WHERE id = 1;
```

**Key-Value Implementation:**
```bash
# INSERT simulation
db> SET user:1:id "1"
OK
db> SET user:1:email "alice@example.com"
OK
db> SET user:1:name "Alice"
OK

# SELECT simulation
db> GET user:1:id
"1"
db> GET user:1:email
"alice@example.com"
db> GET user:1:name
"Alice"
```

### Pattern 2: Simulating Indexes with Reverse Keys

**SQL Equivalent:**
```sql
CREATE INDEX idx_email ON users(email);
SELECT * FROM users WHERE email = 'alice@example.com';
```

**Key-Value Implementation:**
```bash
# Create reverse index
db> SET email:alice@example.com:user_id "1"
OK
db> SET user:1:email "alice@example.com"
OK

# Query using index
db> GET email:alice@example.com:user_id
"1"

# Then fetch full record
db> GET user:1:name
"Alice"
```

### Pattern 3: Simulating Relationships with Foreign Keys

**SQL Equivalent:**
```sql
INSERT INTO orders (user_id, product_id) VALUES (1, 123);
SELECT * FROM orders WHERE user_id = 1;
```

**Key-Value Implementation:**
```bash
# Create order
db> SET order:1001:user_id "1"
OK
db> SET order:1001:product_id "123"
OK
db> SET order:1001:total "99.99"
OK

# Index by user_id for lookups
db> SET user:1:order:1001 "1"
OK

# Query orders for user
db> SCAN user:1:order: user:1:order~
user:1:order:1001 = 1

# Get full order details
db> GET order:1001:user_id
"1"
db> GET order:1001:product_id
"123"
db> GET order:1001:total
"99.99"
```

### Pattern 4: Simulating DISTINCT with Sets

**SQL Equivalent:**
```sql
SELECT DISTINCT country FROM users;
```

**Key-Value Implementation:**
```bash
# Store countries as separate keys
db> SET country:US:count "100"
OK
db> SET country:UK:count "50"
OK
db> SET country:CA:count "25"
OK

# List all countries
db> KEYS country:*
country:US:count
country:UK:count
country:CA:count

# Or use a set-like structure
db> SET countries "US,UK,CA"
OK
```

### Pattern 5: Simulating COUNT with Counters

**SQL Equivalent:**
```sql
SELECT COUNT(*) FROM users;
SELECT COUNT(*) FROM orders WHERE user_id = 1;
```

**Key-Value Implementation:**
```bash
# Global counter
db> SET stats:users:count "1000"
OK

# Per-user counter
db> SET user:1:order_count "5"
OK

# Update counter atomically
# (In application code, use CAS/compare-and-swap)
db> GET user:1:order_count
"5"
# [increment to 6 in application]
db> SET user:1:order_count "6"
OK
```

### Pattern 6: Simulating Sorting with Numeric Prefixes

**SQL Equivalent:**
```sql
SELECT * FROM products ORDER BY rating DESC;
```

**Key-Value Implementation:**
```bash
# Store with rating prefix (9999 - rating for descending order)
db> SET product:9998:product_id "1"  # 9999 - 9.8 rating
OK
db> SET product:9950:product_id "2"  # 9999 - 9.5 rating
OK
db> SET product:9900:product_id "3"  # 9999 - 9.0 rating
OK

# Scan in sorted order (reverse numeric)
db> SCAN product: product~
product:9998:product_id = 1  # Highest rating
product:9950:product_id = 2
product:9900:product_id = 3  # Lowest rating
```

### Pattern 7: Simulating Transactions for Complex Operations

**SQL Equivalent:**
```sql
BEGIN;
  UPDATE accounts SET balance = balance - 100 WHERE id = 1;
  UPDATE accounts SET balance = balance + 100 WHERE id = 2;
  INSERT INTO transactions (from_id, to_id, amount) VALUES (1, 2, 100);
COMMIT;
```

**Key-Value Implementation:**
```bash
# Money transfer using 2PC transaction
db> BEGIN
Transaction started: tx-1234567890

# Deduct from source account
db> SET account:1:balance "900"
OK (buffered)

# Add to destination account
db> SET account:2:balance "1100"
OK (buffered)

# Record transaction
db> SET txn:1001:from "1"
OK (buffered)
db> SET txn:1001:to "2"
OK (buffered)
db> SET txn:1001:amount "100"
OK (buffered)
db> SET txn:1001:status "completed"
OK (buffered)

# Commit all changes atomically
db> COMMIT
Transaction committed in 8.2ms

# If any step failed, entire transaction would roll back
```

### Pattern 8: Time-Based Queries with Timestamp Prefixes

**SQL Equivalent:**
```sql
SELECT * FROM logs WHERE timestamp > '2024-10-25' AND timestamp < '2024-10-26';
```

**Key-Value Implementation:**
```bash
# Store with timestamp prefix (ISO 8601 format sorts naturally)
db> SET log:2024-10-25T14:30:45:msg "User login"
OK
db> SET log:2024-10-25T14:35:12:msg "User logout"
OK
db> SET log:2024-10-25T15:00:00:msg "Report generated"
OK

# Range scan for time period
db> SCAN log:2024-10-25T00:00:00 log:2024-10-25T23:59:59
log:2024-10-25T14:30:45:msg = "User login"
log:2024-10-25T14:35:12:msg = "User logout"
log:2024-10-25T15:00:00:msg = "Report generated"
```

## Application-Level Patterns

### Full-Text Search Simulation

```go
// Application-level full-text search
package main

import (
    "strings"
    "github.com/distributed-db/pkg/client"
)

func searchProducts(db *client.Client, query string) []string {
    // Split query into terms
    terms := strings.Fields(strings.ToLower(query))

    // Get all products
    iter, _ := db.Scan(context.Background(), []byte("product:"), []byte("product:~"))
    defer iter.Close()

    var results []string
    for iter.Valid() {
        key := string(iter.Key())
        value := string(iter.Value())

        // Check if product matches all terms
        match := true
        for _, term := range terms {
            if !strings.Contains(strings.ToLower(value), term) {
                match = false
                break
            }
        }

        if match {
            results = append(results, key)
        }

        iter.Next()
    }

    return results
}
```

### Atomic Counter Pattern

```go
// Atomic increment without locks
package main

import (
    "strconv"
    "github.com/distributed-db/pkg/client"
)

func incrementCounter(db *client.Client, key string) (int64, error) {
    ctx := context.Background()

    for {
        // Read current value
        currentBytes, _ := db.Get(ctx, []byte(key))
        current := int64(0)
        if currentBytes != nil {
            current, _ = strconv.ParseInt(string(currentBytes), 10, 64)
        }

        // Increment
        next := current + 1

        // Try to update (CAS - Compare and Swap)
        // If another goroutine updated, retry
        // (In real implementation, would use versioning)
        db.Put(ctx, []byte(key), []byte(strconv.FormatInt(next, 10)))

        return next, nil
    }
}
```

### Rate Limiting Pattern

```go
// Rate limiter using time-windowed counters
package main

import (
    "time"
    "strconv"
    "github.com/distributed-db/pkg/client"
)

func isRateLimited(db *client.Client, userID string, limit int) bool {
    ctx := context.Background()
    now := time.Now()
    window := now.Truncate(time.Minute) // 1-minute window

    key := "ratelimit:" + userID + ":" + window.Format(time.RFC3339)

    countBytes, _ := db.Get(ctx, []byte(key))
    count := 0
    if countBytes != nil {
        count, _ = strconv.Atoi(string(countBytes))
    }

    if count >= limit {
        return true  // Rate limited
    }

    // Increment counter
    newCount := count + 1
    db.Put(ctx, []byte(key), []byte(strconv.Itoa(newCount)))

    // Set expiration (remove key after 2 minutes)
    // Note: Expiration not yet implemented, needs application cleanup

    return false
}
```

## Performance Characteristics

### Query Performance Expectations

| Query Type | Current Perf | After SQL | Notes |
|------------|--------------|-----------|-------|
| Point lookup | <100μs | <1ms | Key-value optimal |
| Range scan | 100μs/key | 1-10ms | Linear in result size |
| Join | N/A | 50-100ms | Cross-shard only in v2.0 |
| Aggregation | N/A | 10-1000ms | Depends on data size |
| Full-text search | N/A | 100-5000ms | Application-level or v2.0 |
| Sorting | N/A | 10-1000ms | Requires full scan |

## Migration Path to SQL

When SQL support is released:

```go
// Pre-SQL (current)
db.Put(ctx, []byte("user:1:name"), []byte("Alice"))
db.Put(ctx, []byte("user:1:email"), []byte("alice@example.com"))

// Post-SQL (future)
db.Execute(ctx, `
  INSERT INTO users (id, name, email)
  VALUES (1, 'Alice', 'alice@example.com')
`)
```

---

**Note**: These patterns demonstrate best practices for working with the current key-value interface. Once SQL support is released, these patterns will be replaced by standard SQL queries.
