Python Indexing Practice Problems & Exercises

Practice: Indexing and Query Optimization

11 problems4 Easy4 Medium3 Hard⏱ 55–75 min

#1Create a Single-Column IndexEasy

CREATE INDEXindexperformance

Create an index on a city column and verify its existence in the schema catalog.

Solution:

def create_city_index(conn):
    conn.execute("CREATE INDEX idx_customers_city ON customers(city)")
    conn.commit()

def list_indexes(conn, table):
    cur = conn.execute("""
        SELECT name FROM sqlite_master
        WHERE type = 'index'
          AND tbl_name = ?
          AND name NOT LIKE 'sqlite_autoindex%'
        ORDER BY name
    """, (table,))
    return [row[0] for row in cur.fetchall()]

Starter Code

import sqlite3

conn = sqlite3.connect(':memory:')
conn.execute("""
CREATE TABLE customers (
  id INTEGER PRIMARY KEY,
  email TEXT UNIQUE,
  name TEXT,
  city TEXT
)
""")
# Insert 10,000 rows
import random, string
random.seed(42)
rows = [
  (i, f'user{i}@example.com', f'User {i}',
   random.choice(['NYC', 'LA', 'Chicago', 'Houston', 'Phoenix']))
  for i in range(10000)
]
conn.executemany("INSERT INTO customers VALUES (?,?,?,?)", rows)
conn.commit()

# TODO: Create an index on the city column.
# Then verify the index exists using sqlite_master.
# Return a list of index names on the customers table.

def create_city_index(conn):
  pass

def list_indexes(conn, table):
  pass

create_city_index(conn)
print(list_indexes(conn, 'customers'))

Expected Output

['idx_customers_city']

Hints

Hint 1: CREATE INDEX idx_customers_city ON customers(city) creates the index.

Hint 2: SELECT name FROM sqlite_master WHERE type='index' AND tbl_name=? lists indexes.

Hint 3: Exclude auto-created indexes (those that start with 'sqlite_autoindex') if needed.

#2EXPLAIN QUERY PLAN — Index vs Full ScanEasy

EXPLAIN QUERY PLANfull scanindex scan

Use EXPLAIN QUERY PLAN to observe the difference between a full table scan and an index seek.

Solution:

def check_query_plans(conn):
    def get_plan(conn):
        rows = conn.execute(
            "EXPLAIN QUERY PLAN SELECT * FROM orders WHERE customer_id = 42"
        ).fetchall()
        return ' '.join(str(col) for row in rows for col in row).upper()

    before = get_plan(conn)
    conn.execute("CREATE INDEX idx_orders_customer ON orders(customer_id)")
    conn.commit()
    after = get_plan(conn)
    return before, after

Starter Code

import sqlite3

conn = sqlite3.connect(':memory:')
conn.execute("CREATE TABLE orders (id INTEGER PRIMARY KEY, customer_id INTEGER, total REAL)")
# Insert sample data
conn.executemany("INSERT INTO orders VALUES (?,?,?)",
  [(i, i % 100, float(i * 10)) for i in range(5000)])
conn.commit()

# TODO:
# 1. Run EXPLAIN QUERY PLAN on a query filtering by customer_id WITHOUT an index.
#    Check if the plan says 'SCAN' (full table scan).
# 2. Create an index on customer_id.
# 3. Run EXPLAIN QUERY PLAN again.
#    Check if the plan now says 'SEARCH' (index scan).
# Return (plan_before, plan_after) as strings.

def check_query_plans(conn):
  pass

before, after = check_query_plans(conn)
print('Before index:', 'SCAN' in before.upper())
print('After index:', 'SEARCH' in after.upper() or 'INDEX' in after.upper())

Expected Output

Before index: True
After index: True

Hints

Hint 1: EXPLAIN QUERY PLAN SELECT ... returns rows describing the query plan.

Hint 2: Join all detail columns into a string to check for keywords like SCAN or SEARCH.

Hint 3: SQLite uses 'SCAN' for full table scans and 'SEARCH' for index seeks.

#3Unique Index and Constraint EnforcementEasy

UNIQUE INDEXIntegrityErrorconstraint

Create a unique index on an email column and verify it rejects duplicate values.

Solution:

def test_unique_email_index(conn):
    conn.execute("CREATE UNIQUE INDEX idx_users_email ON users(email)")
    conn.execute("INSERT INTO users VALUES (1, 'alice', '[email protected]')")
    conn.execute("INSERT INTO users VALUES (2, 'bob', '[email protected]')")
    conn.commit()
    try:
        conn.execute("INSERT INTO users VALUES (3, 'alice2', '[email protected]')")
        conn.commit()
        return False  # Should not reach here
    except sqlite3.IntegrityError:
        return True

Starter Code

import sqlite3

conn = sqlite3.connect(':memory:')
conn.execute("""
CREATE TABLE users (
  id INTEGER PRIMARY KEY,
  username TEXT,
  email TEXT
)
""")
conn.commit()

# TODO: Create a unique index on the email column.
# Insert two users with different emails (should succeed).
# Try inserting a duplicate email (should raise IntegrityError).
# Return True if the duplicate was correctly rejected.

def test_unique_email_index(conn):
  pass

print(test_unique_email_index(conn))

Expected Output

True

Hints

Hint 1: CREATE UNIQUE INDEX idx_users_email ON users(email) enforces uniqueness.

Hint 2: sqlite3.IntegrityError is raised when a UNIQUE constraint is violated.

Hint 3: Catch the exception to verify the constraint is working.

#4DROP INDEX and Re-IndexEasy

DROP INDEXREINDEXindex maintenance

Drop a specific index and verify the remaining index set with sqlite_master queries.

Solution:

def manage_indexes(conn):
    def get_indexes():
        cur = conn.execute("""
            SELECT name FROM sqlite_master
            WHERE type = 'index' AND tbl_name = 'products'
              AND name NOT LIKE 'sqlite_autoindex%'
        """)
        return [row[0] for row in cur.fetchall()]

    before = get_indexes()
    conn.execute("DROP INDEX idx_price")
    conn.commit()
    after = get_indexes()
    return before, after

Starter Code

import sqlite3

conn = sqlite3.connect(':memory:')
conn.execute("CREATE TABLE products (id INTEGER PRIMARY KEY, sku TEXT, price REAL)")
conn.execute("CREATE INDEX idx_sku ON products(sku)")
conn.execute("CREATE INDEX idx_price ON products(price)")
conn.executemany("INSERT INTO products VALUES (?,?,?)",
  [(i, f'SKU-{i:04d}', float(i * 2)) for i in range(1000)])
conn.commit()

# TODO:
# 1. List current indexes (excluding auto-indexes)
# 2. Drop idx_price
# 3. List indexes again
# Return (before_list, after_list)

def manage_indexes(conn):
  pass

before, after = manage_indexes(conn)
print('Before:', sorted(before))
print('After:', sorted(after))

Expected Output

Before: ['idx_price', 'idx_sku']
After: ['idx_sku']

Hints

Hint 1: DROP INDEX idx_price removes the index without affecting table data.

Hint 2: Query sqlite_master WHERE type='index' AND tbl_name='products' to list indexes.

Hint 3: REINDEX products rebuilds all indexes on the table (useful after bulk loads).

#5Composite Index Column OrderMedium

composite indexcolumn orderleftmost prefix

Design a composite index with the correct column order to support an equality+range query efficiently.

Solution:

def create_optimal_composite_index(conn):
    conn.execute("CREATE INDEX idx_logs_composite ON logs(service, level, ts)")
    conn.commit()

def verify_plan(conn):
    rows = conn.execute("""
        EXPLAIN QUERY PLAN
        SELECT * FROM logs
        WHERE service = 'api' AND level = 'ERROR' AND ts > 1700040000
    """).fetchall()
    return ' '.join(str(col) for row in rows for col in row)

Starter Code

import sqlite3
import time

conn = sqlite3.connect(':memory:')
conn.execute("""
CREATE TABLE logs (
  id INTEGER PRIMARY KEY,
  service TEXT,
  level TEXT,
  ts INTEGER,
  message TEXT
)
""")
import random
random.seed(1)
services = ['api', 'worker', 'scheduler']
levels = ['DEBUG', 'INFO', 'WARN', 'ERROR']
conn.executemany("INSERT INTO logs VALUES (?,?,?,?,?)", [
  (i, random.choice(services), random.choice(levels), 1700000000 + i, f'msg {i}')
  for i in range(50000)
])
conn.commit()

# TODO: Create a composite index that makes this query fast:
#   SELECT * FROM logs WHERE service = ? AND level = ? AND ts > ?
# The correct column order matters for the leftmost-prefix rule.
# Return the EXPLAIN QUERY PLAN output to verify index use.

def create_optimal_composite_index(conn):
  pass

def verify_plan(conn):
  pass

create_optimal_composite_index(conn)
plan = verify_plan(conn)
print('Uses index:', 'SCAN TABLE' not in plan.upper() or 'USING INDEX' in plan.upper())

Expected Output

Uses index: True

Hints

Hint 1: Put equality columns first in the composite index: (service, level, ts).

Hint 2: The leftmost prefix rule means the index helps WHERE service=? AND level=? AND ts>?.

Hint 3: EXPLAIN QUERY PLAN should show SEARCH ... USING INDEX, not SCAN TABLE.

#6Covering Index — Avoid Table LookupMedium

covering indexindex-only scanINCLUDE

Create a covering index that allows the query engine to satisfy a SELECT entirely from the index.

Solution:

def create_covering_index(conn):
    conn.execute("""
        CREATE INDEX idx_orders_covering
        ON orders(customer_id, status, total, created_at)
    """)
    conn.commit()

Starter Code

import sqlite3

conn = sqlite3.connect(':memory:')
conn.execute("""
CREATE TABLE orders (
  id INTEGER PRIMARY KEY,
  customer_id INTEGER,
  status TEXT,
  total REAL,
  created_at INTEGER
)
""")
import random
random.seed(2)
conn.executemany("INSERT INTO orders VALUES (?,?,?,?,?)", [
  (i, random.randint(1, 1000),
   random.choice(['pending', 'shipped', 'delivered', 'cancelled']),
   random.uniform(10, 500),
   1700000000 + i)
  for i in range(100000)
])
conn.commit()

# The query: SELECT total, created_at FROM orders WHERE customer_id = ? AND status = 'shipped'
# TODO: Create a covering index that includes all needed columns so
# SQLite can satisfy the query entirely from the index (no table lookup).
# Verify with EXPLAIN QUERY PLAN.

def create_covering_index(conn):
  pass

create_covering_index(conn)
plan_rows = conn.execute("""
  EXPLAIN QUERY PLAN
  SELECT total, created_at FROM orders
  WHERE customer_id = 42 AND status = 'shipped'
""").fetchall()
plan = ' '.join(str(c) for row in plan_rows for c in row)
print('Covering index active:', 'COVERING INDEX' in plan.upper() or 'USING INDEX' in plan.upper())

Expected Output

Covering index active: True

Hints

Hint 1: A covering index includes all columns referenced in WHERE, SELECT, and ORDER BY.

Hint 2: CREATE INDEX idx ON orders(customer_id, status, total, created_at) covers the whole query.

Hint 3: SQLite shows 'USING COVERING INDEX' in the query plan when no table row fetch is needed.

#7Partial Index — Index a Subset of RowsMedium

partial indexWHERE clausefiltered index

Create a partial (filtered) index that only indexes active tasks, saving storage and improving query speed.

Solution:

def create_partial_index(conn):
    conn.execute("""
        CREATE INDEX idx_active_tasks_priority
        ON tasks(priority)
        WHERE status IN ('pending', 'in_progress')
    """)
    conn.commit()

Starter Code

import sqlite3

conn = sqlite3.connect(':memory:')
conn.execute("""
CREATE TABLE tasks (
  id INTEGER PRIMARY KEY,
  title TEXT,
  status TEXT,  -- 'pending', 'in_progress', 'done'
  priority INTEGER,
  assigned_to INTEGER
)
""")
import random
random.seed(3)
statuses = ['pending'] * 1 + ['in_progress'] * 1 + ['done'] * 8  # 80% done
conn.executemany("INSERT INTO tasks VALUES (?,?,?,?,?)", [
  (i, f'Task {i}', random.choice(statuses), random.randint(1, 5), random.randint(1, 50))
  for i in range(100000)
])
conn.commit()

# Most queries filter active tasks: WHERE status IN ('pending', 'in_progress')
# Only ~20% of rows are active — a partial index saves space and speeds queries.
# TODO: Create a partial index only on active tasks.
# Then verify it's used when filtering for active tasks.

def create_partial_index(conn):
  pass

create_partial_index(conn)
plan_rows = conn.execute("""
  EXPLAIN QUERY PLAN
  SELECT * FROM tasks
  WHERE status = 'pending' AND priority >= 3
""").fetchall()
plan = ' '.join(str(c) for row in plan_rows for c in row)
print('Partial index used:', 'INDEX' in plan.upper())

Expected Output

Partial index used: True

Hints

Hint 1: CREATE INDEX ... ON tasks(priority) WHERE status IN ('pending', 'in_progress') is a partial index.

Hint 2: The WHERE clause on the index means only matching rows are indexed.

Hint 3: SQLite will use this index when the query's WHERE clause matches the index predicate.

#8Benchmark: Index vs No IndexMedium

benchmarktimeitindex performancemeasurement

Empirically measure the query time speedup from adding an index on a large table.

Solution:

import time

def benchmark_index(n_rows=100000):
    conn = sqlite3.connect(':memory:')
    conn.execute("CREATE TABLE events (id INTEGER PRIMARY KEY, user_id INTEGER, action TEXT)")
    import random
    random.seed(42)
    conn.executemany("INSERT INTO events VALUES (?,?,?)", [
        (i, random.randint(1, 10000), random.choice(['click', 'view', 'buy']))
        for i in range(n_rows)
    ])
    conn.commit()

    def time_query(conn, iterations=5):
        times = []
        for _ in range(iterations):
            t0 = time.perf_counter()
            conn.execute(
                "SELECT COUNT(*) FROM events WHERE user_id = 42 AND action = 'buy'"
            ).fetchone()
            times.append(time.perf_counter() - t0)
        return min(times) * 1000

    without_ms = time_query(conn)
    conn.execute("CREATE INDEX idx_events_user ON events(user_id, action)")
    conn.commit()
    with_ms = time_query(conn)
    conn.close()
    return without_ms, with_ms

Starter Code

import sqlite3
import time

# TODO: Create a table with 100,000 rows.
# Measure query time WITHOUT an index.
# Add an index.
# Measure query time WITH the index.
# Return (time_without_index, time_with_index) in milliseconds.
# The indexed query should be significantly faster.

def benchmark_index(n_rows=100000):
  pass

without_ms, with_ms = benchmark_index()
print(f'Without index: {without_ms:.2f}ms')
print(f'With index: {with_ms:.2f}ms')
print(f'Speedup: {without_ms / with_ms:.1f}x faster')

Expected Output

Without index: X.XXms
With index: X.XXms
Speedup: X.Xx faster

Hints

Hint 1: Use time.perf_counter() before and after the query for high-resolution timing.

Hint 2: Run the same SELECT WHERE query with and without the index.

Hint 3: Run each measurement multiple times and take the minimum to reduce noise.

#9Index-Aware Query RewritingHard

query rewritefunction on columnindex killer

Rewrite queries that apply functions to indexed columns, restoring index usability.

Solution:

def find_user_by_email(conn, email):
    # Use COLLATE NOCASE — SQLite can still use a NOCASE index, or store as lower
    cur = conn.execute(
        "SELECT * FROM users WHERE email = ? COLLATE NOCASE",
        (email,)
    )
    return cur.fetchall()

def find_january_signups(conn):
    cur = conn.execute("""
        SELECT * FROM users
        WHERE signup_date >= '2024-01-01' AND signup_date < '2024-02-01'
        ORDER BY signup_date
    """)
    return cur.fetchall()

Starter Code

import sqlite3

conn = sqlite3.connect(':memory:')
conn.execute("""
CREATE TABLE users (
  id INTEGER PRIMARY KEY,
  email TEXT,
  signup_date TEXT  -- stored as 'YYYY-MM-DD'
)
""")
conn.execute("CREATE INDEX idx_email ON users(email)")
conn.execute("CREATE INDEX idx_signup ON users(signup_date)")
conn.executemany("INSERT INTO users VALUES (?,?,?)", [
  (1, '[email protected]', '2024-01-15'),
  (2, '[email protected]', '2024-02-20'),
  (3, '[email protected]', '2024-03-10'),
  (4, '[email protected]', '2024-01-05'),
])
conn.commit()

# The following queries are "index killers" — they prevent index use.
# Bad: WHERE LOWER(email) = ? (function on indexed column)
# Bad: WHERE strftime('%Y', signup_date) = '2024' (function on indexed column)

# TODO: Rewrite both queries to use the index:
# 1. Find user with email '[email protected]' (case-insensitive, index-friendly)
# 2. Find users who signed up in January 2024 (range query, index-friendly)

def find_user_by_email(conn, email):
  pass

def find_january_signups(conn):
  pass

print(find_user_by_email(conn, '[email protected]'))
print(find_january_signups(conn))

Expected Output

[(1, '[email protected]', '2024-01-15')]
[(4, '[email protected]', '2024-01-05'), (1, '[email protected]', '2024-01-15')]

Hints

Hint 1: Store emails in lowercase at insert time, so WHERE email = lower(?) can use the index.

Hint 2: For date ranges: WHERE signup_date >= '2024-01-01' AND signup_date < '2024-02-01'.

Hint 3: Using functions on the indexed column (LOWER(email)) prevents the index from being used.

#10Index Usage AnalyzerHard

query analyzerEXPLAINindex coveragetooling

Build a query plan analyzer that classifies each step as an index seek or a full table scan.

Solution:

def analyze_query(conn, sql, params=()):
    plan_rows = conn.execute(f"EXPLAIN QUERY PLAN {sql}", params).fetchall()
    details = [str(row[-1]) for row in plan_rows]

    scan_count = sum(1 for d in details if 'SCAN' in d.upper() and 'INDEX' not in d.upper())
    index_count = sum(1 for d in details if 'SEARCH' in d.upper() or 'USING INDEX' in d.upper() or 'COVERING INDEX' in d.upper())

    return {
        'uses_index': index_count > 0,
        'scan_count': scan_count,
        'index_count': index_count,
        'plan_details': details,
    }

Starter Code

import sqlite3

# TODO: Build a query analyzer that:
# 1. Runs EXPLAIN QUERY PLAN on a given SQL query
# 2. Reports whether each step uses an index or a full scan
# 3. Returns a dict with:
#    - 'uses_index': bool
#    - 'scan_count': number of full table scans
#    - 'index_count': number of index seeks
#    - 'plan_details': list of detail strings

def analyze_query(conn, sql, params=()):
  pass

conn = sqlite3.connect(':memory:')
conn.execute("CREATE TABLE t (id INTEGER PRIMARY KEY, x INTEGER, y TEXT)")
conn.executemany("INSERT INTO t VALUES (?,?,?)", [(i, i*2, f'val{i}') for i in range(10000)])
conn.commit()

# Without index
result1 = analyze_query(conn, "SELECT * FROM t WHERE x = 100")
print('No index - uses_index:', result1['uses_index'])

# With index
conn.execute("CREATE INDEX idx_x ON t(x)")
result2 = analyze_query(conn, "SELECT * FROM t WHERE x = 100")
print('With index - uses_index:', result2['uses_index'])

Expected Output

No index - uses_index: False
With index - uses_index: True

Hints

Hint 1: EXPLAIN QUERY PLAN returns rows with columns: id, parent, notused, detail.

Hint 2: The 'detail' column contains strings like 'SCAN TABLE t' or 'SEARCH t USING INDEX'.

Hint 3: Check each detail string for keywords 'SCAN' vs 'SEARCH' or 'INDEX'.

#11Auto Index AdvisorHard

index advisorworkload analysisDDL generation

Build an index advisor that analyzes a query workload and recommends CREATE INDEX statements for unindexed hot columns.

Solution:

import re
from collections import Counter

def index_advisor(conn, queries, top_n=3):
    # Extract column names from WHERE clauses
    col_pattern = re.compile(r'\bWHERE\b.*?\b(\w+)\s*[=><]', re.IGNORECASE)
    and_pattern = re.compile(r'\bAND\b\s+(\w+)\s*[=><]', re.IGNORECASE)

    col_counts = Counter()
    for q in queries:
        for m in col_pattern.finditer(q):
            col_counts[m.group(1).lower()] += 1
        for m in and_pattern.finditer(q):
            col_counts[m.group(1).lower()] += 1

    # Get already-indexed columns
    indexed_cols = set()
    rows = conn.execute("""
        SELECT sql FROM sqlite_master
        WHERE type = 'index' AND tbl_name = 'orders'
          AND sql IS NOT NULL
    """).fetchall()
    for (sql,) in rows:
        m = re.search(r'\((\w+)', sql)
        if m:
            indexed_cols.add(m.group(1).lower())

    # Skip primary key and already indexed
    skip = indexed_cols | {'id'}

    recommendations = []
    for col, _ in col_counts.most_common():
        if col in skip:
            continue
        recommendations.append(f"CREATE INDEX idx_orders_{col} ON orders({col});")
        if len(recommendations) >= top_n:
            break

    return recommendations

Starter Code

import sqlite3
import re
from collections import Counter

# TODO: Build a basic index advisor that:
# 1. Analyzes a list of SQL queries (looking for WHERE column = ?)
# 2. Counts how often each column appears in WHERE clauses
# 3. Checks which columns already have indexes
# 4. Recommends CREATE INDEX statements for the top N unindexed columns

def index_advisor(conn, queries, top_n=3):
  pass

conn = sqlite3.connect(':memory:')
conn.execute("CREATE TABLE orders (id INTEGER PRIMARY KEY, customer_id INTEGER, status TEXT, region TEXT, amount REAL)")
conn.execute("CREATE INDEX idx_customer ON orders(customer_id)")
conn.commit()

workload = [
  "SELECT * FROM orders WHERE customer_id = ?",
  "SELECT * FROM orders WHERE status = ? AND customer_id = ?",
  "SELECT * FROM orders WHERE region = ?",
  "SELECT * FROM orders WHERE status = ?",
  "SELECT * FROM orders WHERE region = ? AND status = ?",
  "SELECT * FROM orders WHERE amount > ?",
  "SELECT * FROM orders WHERE status = ?",
]

recommendations = index_advisor(conn, workload, top_n=3)
for rec in recommendations:
  print(rec)

Expected Output

CREATE INDEX idx_orders_status ON orders(status);
CREATE INDEX idx_orders_region ON orders(region);
CREATE INDEX idx_orders_amount ON orders(amount);

Hints

Hint 1: Use a regex like r'WHERE\s+(\w+)\s*=' to extract column names from queries.

Hint 2: Count column frequencies with collections.Counter.

Hint 3: Check existing indexes via sqlite_master, then recommend for unindexed high-frequency columns.