Python JSON Serialization Practice Problems & Exercises
Practice: JSON Serialization
← Back to lessonSerialize a Python dict to JSON and deserialize it back, verifying the round-trip is lossless.Solution
import json
data = {
'name': 'Alice',
'age': 30,
'active': True,
'scores': [95, 87, 92],
'address': None
}
# TODO: Serialize data to a JSON string and then deserialize it back.
# After the round-trip, verify the result equals the original.
json_str = None # serialize data
restored = None # deserialize json_str
print(f"type of json_str: {type(json_str).__name__}")
print(f"round-trip equal: {restored == data}")
print(f"name: {restored['name']}")
print(f"active: {restored['active']}")
print(f"address is None: {restored['address'] is None}")
Expected Output
type of json_str: str
round-trip equal: True
name: Alice
active: True
address is None: TrueHints
Hint 1: Use json.dumps(data) to serialize a Python dict to a JSON string.
Hint 2: Use json.loads(json_str) to parse a JSON string back to a Python dict.
Hint 3: JSON null becomes Python None; JSON true/false become Python True/False.
Produce both a pretty-printed and a compact (no-spaces) JSON string from the same dict.Solution
import json
config = {
'host': 'localhost',
'port': 5432,
'database': 'mydb',
'options': {'timeout': 30, 'pool_size': 5}
}
# TODO: Produce two JSON strings:
# pretty: indent=2, default separators
# compact: no spaces — separators=(',', ':')
pretty = None
compact = None
print("pretty (first 3 lines):")
for line in pretty.splitlines()[:3]:
print(line)
print(f"compact has spaces: {' ' in compact}")
print(f"compact starts with: {compact[:20]}")
Expected Output
pretty (first 3 lines):
{
"host": "localhost",
"port": 5432,
compact has spaces: False
compact starts with: {"host":"localhost"Hints
Hint 1: Pass indent=2 to json.dumps() for human-readable output with 2-space indentation.
Hint 2: Pass separators=(',', ':') to json.dumps() to strip all whitespace from separators.
Hint 3: The default separators are (', ', ': ') which include spaces.
Write a default= handler that serializes datetime, date, and set types that json.dumps cannot handle natively.Solution
import json
from datetime import datetime, date
data = {
'created_at': datetime(2024, 3, 15, 10, 30, 0),
'birth_date': date(1990, 6, 1),
'tags': {'python', 'api', 'json'},
'count': 42
}
# TODO: Serialize data using a default= function that handles:
# - datetime/date: convert to ISO format string using .isoformat()
# - set: convert to a sorted list
# - anything else: raise TypeError
def json_default(obj):
pass
result = json.dumps(data, default=json_default, sort_keys=True)
parsed = json.loads(result)
print(f"created_at: {parsed['created_at']}")
print(f"birth_date: {parsed['birth_date']}")
print(f"tags sorted: {parsed['tags']}")
print(f"count: {parsed['count']}")
Expected Output
created_at: 2024-03-15T10:30:00
birth_date: 1990-06-01
tags sorted: ['api', 'json', 'python']
count: 42Hints
Hint 1: Check isinstance(obj, (datetime, date)) first — datetime is a subclass of date, so check datetime first.
Hint 2: Return obj.isoformat() for datetime and date objects.
Hint 3: Return sorted(list(obj)) for sets to produce deterministic output.
Use json.load() to parse JSON directly from a file-like object (StringIO) instead of a string.Solution
import json
from io import StringIO
# Simulate reading JSON from a file or network stream using a file-like object.
json_source = StringIO('{"service": "payments", "version": 3, "enabled": true}')
# TODO: Use json.load() (not json.loads()) to parse directly from the file-like object.
config = None
print(f"service: {config['service']}")
print(f"version: {config['version']}")
print(f"enabled: {config['enabled']}")
print(f"type of version: {type(config['version']).__name__}")
Expected Output
service: payments
version: 3
enabled: True
type of version: intHints
Hint 1: json.load(fp) reads from any file-like object with a .read() method.
Hint 2: StringIO wraps a string as a readable file-like object — use it in tests instead of real files.
Hint 3: json.load() is equivalent to json.loads(fp.read()) but avoids loading the entire file into memory.
Subclass json.JSONEncoder to handle Decimal, UUID, and datetime types automatically.Solution
import json
from decimal import Decimal
from uuid import UUID
from datetime import datetime
class AppJSONEncoder(json.JSONEncoder):
"""Custom encoder that handles:
- Decimal: serialize as float (use float(obj))
- UUID: serialize as string (use str(obj))
- datetime: serialize as ISO format string
- All other types: fall back to the parent class default()
"""
def default(self, obj):
pass
data = {
'id': UUID('12345678-1234-5678-1234-567812345678'),
'price': Decimal('19.99'),
'created': datetime(2024, 1, 15, 12, 0, 0),
'name': 'Widget'
}
result = json.dumps(data, cls=AppJSONEncoder)
parsed = json.loads(result)
print(f"id type: {type(parsed['id']).__name__}")
print(f"id: {parsed['id']}")
print(f"price: {parsed['price']}")
print(f"created: {parsed['created']}")
print(f"name: {parsed['name']}")
Expected Output
id type: str
id: 12345678-1234-5678-1234-567812345678
price: 19.99
created: 2024-01-15T12:00:00
name: WidgetHints
Hint 1: Override the default(self, obj) method in your JSONEncoder subclass.
Hint 2: Use isinstance() checks in the same order as the docstring — Decimal, UUID, datetime.
Hint 3: Call super().default(obj) at the end to raise TypeError for unhandled types.
Serialize a nested dataclass structure (with a date field) to JSON using dataclasses.asdict().Solution
import json
from dataclasses import dataclass, field, asdict
from typing import List
from datetime import date
@dataclass
class Address:
street: str
city: str
country: str
@dataclass
class User:
id: int
name: str
email: str
joined: date
address: Address
roles: List[str] = field(default_factory=list)
# TODO: Implement serialize_user(user) that converts a User dataclass to a JSON string.
# Use dataclasses.asdict() to convert the nested structure to a dict first.
# Then handle the date field using a custom default= function.
def serialize_user(user: User) -> str:
pass
user = User(
id=1,
name='Bob',
email='[email protected]',
joined=date(2023, 5, 20),
address=Address('123 Main St', 'Portland', 'US'),
roles=['admin', 'editor']
)
json_str = serialize_user(user)
data = json.loads(json_str)
print(f"name: {data['name']}")
print(f"joined: {data['joined']}")
print(f"city: {data['address']['city']}")
print(f"roles: {data['roles']}")
Expected Output
name: Bob
joined: 2023-05-20
city: Portland
roles: ['admin', 'editor']Hints
Hint 1: Call dataclasses.asdict(user) to recursively convert the dataclass (and nested dataclasses) to a plain dict.
Hint 2: The date field becomes a date object inside the dict — handle it with a default= function.
Hint 3: Pass both the converted dict and default= to json.dumps().
Build a JSONEncoder that supports a __json__() protocol, allowing any object to declare its own serialization.Solution
import json
class JSONSerializable:
"""Mixin that adds a __json__() method returning a serializable dict."""
def __json__(self):
raise NotImplementedError
class Money:
def __init__(self, amount, currency):
self.amount = amount
self.currency = currency
def __json__(self):
return {'amount': str(self.amount), 'currency': self.currency}
class Product:
def __init__(self, sku, name, price, tags):
self.sku = sku
self.name = name
self.price = price # Money instance
self.tags = tags # list of strings
def __json__(self):
return {
'sku': self.sku,
'name': self.name,
'price': self.price.__json__(),
'tags': self.tags
}
# TODO: Write a JSONEncoder subclass that checks if an object has a __json__() method
# and calls it; otherwise falls back to the parent default().
class ProtocolEncoder(json.JSONEncoder):
def default(self, obj):
pass
product = Product('SKU-001', 'Notebook', Money('12.99', 'USD'), ['stationery', 'paper'])
result = json.dumps(product, cls=ProtocolEncoder)
data = json.loads(result)
print(f"sku: {data['sku']}")
print(f"price amount: {data['price']['amount']}")
print(f"price currency: {data['price']['currency']}")
print(f"tags: {data['tags']}")
Expected Output
sku: SKU-001
price amount: 12.99
price currency: USD
tags: ['stationery', 'paper']Hints
Hint 1: In ProtocolEncoder.default(), check hasattr(obj, '__json__') before calling it.
Hint 2: If __json__ exists, return obj.__json__() — this will be re-processed by the encoder.
Hint 3: Call super().default(obj) at the end to handle truly unserializable objects with TypeError.
Implement a lightweight JSON schema validator that checks types and required fields without external libraries.Solution
import json
# A lightweight JSON schema validator — no external libraries needed.
# Schema format: {'type': 'object', 'required': [...], 'properties': {field: {'type': ...}}}
# Supported types: 'string', 'number', 'boolean', 'array', 'object', 'null'
TYPE_MAP = {
'string': str,
'number': (int, float),
'boolean': bool,
'array': list,
'object': dict,
'null': type(None),
}
def validate_json(data, schema):
"""Validate data against schema. Return list of error strings (empty = valid).
Check:
1. Top-level type matches schema['type']
2. All 'required' fields are present
3. Each property in schema['properties'] has the correct type (if present in data)
"""
errors = []
# TODO: implement validation
return errors
user_schema = {
'type': 'object',
'required': ['id', 'name', 'email'],
'properties': {
'id': {'type': 'number'},
'name': {'type': 'string'},
'email': {'type': 'string'},
'age': {'type': 'number'},
'active': {'type': 'boolean'},
}
}
valid = {'id': 1, 'name': 'Alice', 'email': '[email protected]', 'active': True}
print(f"valid errors: {validate_json(valid, user_schema)}")
missing_email = {'id': 2, 'name': 'Bob'}
errs = validate_json(missing_email, user_schema)
print(f"missing field error: {'email' in str(errs)}")
wrong_type = {'id': 'not-a-number', 'name': 'Carol', 'email': '[email protected]'}
errs2 = validate_json(wrong_type, user_schema)
print(f"type error: {'id' in str(errs2)}")
Expected Output
valid errors: []
missing field error: True
type error: TrueHints
Hint 1: First check the top-level type: isinstance(data, TYPE_MAP[schema['type']]).
Hint 2: Then check each field in schema.get('required', []) — add an error if it's missing from data.
Hint 3: Then for each key in schema.get('properties', {}), if that key exists in data, check its type.
Build a streaming NDJSON parser that processes one JSON object per line and raises useful errors on bad input.Solution
import json
from io import StringIO
def stream_json_objects(file_like):
"""Parse a Newline-Delimited JSON (NDJSON) stream line by line.
Each line is a complete JSON object (or empty/whitespace — skip those).
Yield each parsed object as a Python dict.
Raise ValueError with the line number if a line fails to parse.
"""
pass
# Simulate an NDJSON file (one JSON object per line)
ndjson_data = """
{"id": 1, "event": "login", "user": "alice"}
{"id": 2, "event": "purchase", "user": "bob", "amount": 49.99}
{"id": 3, "event": "logout", "user": "alice"}
""".strip()
stream = StringIO(ndjson_data)
events = list(stream_json_objects(stream))
print(f"parsed {len(events)} events")
print(f"first event: {events[0]['event']}")
print(f"second amount: {events[1]['amount']}")
print(f"third user: {events[2]['user']}")
# Test error on invalid line
bad_stream = StringIO('{"valid": true}
{broken json}
{"also": "fine"}')
try:
list(stream_json_objects(bad_stream))
except ValueError as e:
print(f"parse error caught: {'line' in str(e).lower()}")
Expected Output
parsed 3 events
first event: login
second amount: 49.99
third user: alice
parse error caught: TrueHints
Hint 1: Iterate over the file-like object line by line using enumerate(file_like, start=1).
Hint 2: Skip lines where line.strip() is empty.
Hint 3: Wrap json.loads(line) in a try/except json.JSONDecodeError and re-raise as ValueError with the line number.
Implement a pre-serialization circular reference detector that traverses dicts and lists by identity.Solution
import json
def has_circular_reference(obj, seen=None):
"""Detect circular references in a Python structure before serializing.
Only check mutable containers (dict, list) by identity using id().
Return True if a circular reference is found, False otherwise.
"""
if seen is None:
seen = set()
pass
# No circular reference
safe = {'a': 1, 'b': [2, 3], 'c': {'d': 4}}
print(f"safe has circular: {has_circular_reference(safe)}")
# Direct circular: list points to itself
circular_list = [1, 2, 3]
circular_list.append(circular_list)
print(f"circular_list detected: {has_circular_reference(circular_list)}")
# Indirect circular: a -> b -> a
a = {}
b = {'ref': a}
a['ref'] = b
print(f"indirect circular detected: {has_circular_reference(a)}")
# Safe nested (same value object reused, not same container)
shared_val = 42
safe2 = {'x': shared_val, 'y': shared_val}
print(f"shared value (not circular): {has_circular_reference(safe2)}")
Expected Output
safe has circular: False
circular_list detected: True
indirect circular detected: True
shared value (not circular): FalseHints
Hint 1: Track containers by id() in the 'seen' set — only mutable containers (dict, list) can be circular.
Hint 2: When entering a container, add id(obj) to seen; remove it when leaving (backtrack).
Hint 3: For dicts, recurse on values; for lists, recurse on elements; return False for all other types.
Build a production API response encoder that cleanly handles datetime, Decimal, Enum, UUID, and set types in a single JSONEncoder subclass.Solution
import json
from datetime import datetime, timezone
from decimal import Decimal
from enum import Enum
from uuid import UUID
class OrderStatus(Enum):
PENDING = 'pending'
PAID = 'paid'
SHIPPED = 'shipped'
DELIVERED = 'delivered'
class APIResponseEncoder(json.JSONEncoder):
"""Production-grade encoder for API responses.
Handle:
- datetime with timezone: ISO format string (include timezone offset)
- Decimal: string representation (preserves precision, avoids float errors)
- Enum: use .value
- UUID: use str()
- set/frozenset: sorted list
- objects with __dict__: serialize as their __dict__
"""
def default(self, obj):
pass
order = {
'order_id': UUID('aaaabbbb-cccc-dddd-eeee-ffffaaaabbbb'),
'status': OrderStatus.PAID,
'total': Decimal('149.95'),
'tax': Decimal('12.50'),
'created_at': datetime(2024, 6, 1, 14, 30, 0, tzinfo=timezone.utc),
'items': [
{'sku': 'A1', 'qty': 2, 'price': Decimal('49.99')},
{'sku': 'B2', 'qty': 1, 'price': Decimal('50.00')},
],
'tags': frozenset(['express', 'gift']),
}
result = json.dumps(order, cls=APIResponseEncoder)
data = json.loads(result)
print(f"status: {data['status']}")
print(f"total: {data['total']}")
print(f"created_at ends with Z or +00:00: {data['created_at'].endswith(('+00:00', 'Z'))}")
print(f"order_id type: {type(data['order_id']).__name__}")
print(f"tags sorted: {sorted(data['tags'])}")
print(f"item price type: {type(data['items'][0]['price']).__name__}")
Expected Output
status: paid
total: 149.95
created_at ends with Z or +00:00: True
order_id type: str
tags sorted: ['express', 'gift']
item price type: strHints
Hint 1: Check types in order: datetime (before date), Decimal, Enum, UUID, set/frozenset, then objects with __dict__.
Hint 2: For Decimal, use str(obj) to preserve '149.95' exactly — float(obj) would give floating-point imprecision.
Hint 3: For Enum, return obj.value; for UUID, return str(obj); for set/frozenset, return sorted(list(obj)).
