Skip to content

Best Practices

Production-ready patterns and recommendations for using rsylla effectively.

Connection Management

Reuse Sessions

# GOOD: Create session once
session = await Session.connect(["node:9042"])

async def get_user(user_id):
    return await session.execute("SELECT ...", {"id": user_id})

# BAD: Create new session per query
async def get_user_bad(user_id):
    session = await Session.connect(["node:9042"])  # Slow!
    return await session.execute("SELECT ...", {"id": user_id})

Configure for Production

session = await (
    SessionBuilder()
    .known_nodes(["node1:9042", "node2:9042", "node3:9042"])
    .pool_size(20)
    .connection_timeout(10000)
    .compression("lz4")
    .tcp_nodelay(True)
    .user("app", "password")
    .build()
)

Query Optimization

Use Prepared Statements

# GOOD: Prepare once, execute many
stmt = await session.prepare("INSERT INTO users ...")
for user in users:
    await session.execute_prepared(stmt, user)

# BAD: Parse query every time
for user in users:
    await session.execute("INSERT INTO users ...", user)

Select Only Needed Columns

# GOOD
result = await session.execute("SELECT id, name FROM users WHERE id = ?")

# BAD
result = await session.execute("SELECT * FROM users WHERE id = ?")

Set Appropriate Consistency

# Fast reads
read_query = Query("SELECT ...").with_consistency("LOCAL_ONE")

# Durable writes
write_query = Query("INSERT ...").with_consistency("LOCAL_QUORUM")

Data Modeling

Design for Queries

# GOOD: Table designed for query pattern
CREATE TABLE posts_by_user (
    user_id int,
    created_at timestamp,
    post_id uuid,
    title text,
    PRIMARY KEY (user_id, created_at)
) WITH CLUSTERING ORDER BY (created_at DESC)

# Query: "Get latest posts for user"
SELECT * FROM posts_by_user WHERE user_id = ? LIMIT 10

# BAD: Requires ALLOW FILTERING
SELECT * FROM posts WHERE user_id = ? ALLOW FILTERING

Avoid Large Partitions

# GOOD: Partition by user + date
PRIMARY KEY ((user_id, event_date), event_time)

# BAD: Unbounded partition
PRIMARY KEY (user_id, event_time)  # Grows forever!

Batch Operations

Batch Same Partition

# GOOD: Same partition key
batch = Batch("logged")
batch.append_statement("INSERT INTO user_data (user_id, ...) VALUES (?, ...)")
batch.append_statement("UPDATE user_stats SET ... WHERE user_id = ?")
await session.batch(batch, [...])

# BAD: Different partitions
for user_id in range(1000):  # 1000 partitions!
    batch.append_statement("INSERT INTO users ...")

Use Unlogged When Possible

# If atomicity not required, unlogged is faster
batch = Batch("unlogged")

Error Handling

Implement Retry Logic

async def execute_with_retry(session, query, values, retries=3):
    for attempt in range(retries):
        try:
            return await session.execute(query, values)
        except ScyllaError as e:
            if attempt == retries - 1:
                raise
            await asyncio.sleep(2 ** attempt)

Handle Specific Errors

try:
    await session.execute("...")
except ScyllaError as e:
    logger.error(f"Query failed: {e}")
    # Handle gracefully

Performance

Enable Compression

session = await (
    SessionBuilder()
    .compression("lz4")  # Or "snappy"
    .build()
)

Use Paging for Large Results

query = Query("SELECT * FROM large_table").with_page_size(1000)
result = await session.query(query)

for row in result:  # Handles paging automatically
    process(row)

Mark Idempotent Queries

# Enables safe retries
query = Query("SELECT ...").set_idempotent(True)

Anti-Patterns to Avoid

Never Use ALLOW FILTERING

# BAD
await session.execute("SELECT * FROM users WHERE email = ? ALLOW FILTERING")

# GOOD: Create proper table
CREATE TABLE users_by_email (email text PRIMARY KEY, user_id int)

Avoid SELECT COUNT(*)

# BAD: Scans entire table
result = await session.execute("SELECT COUNT(*) FROM users")

# GOOD: Use counters
UPDATE user_count SET count = count + 1 WHERE type = 'total'

Don't Use IN with Many Values

# BAD
SELECT * FROM users WHERE id IN (1, 2, 3, ..., 1000)

# GOOD: Individual queries or batches
for user_id in ids:
    result = await session.execute("SELECT * FROM users WHERE id = ?", {"id": user_id})

Production Checklist

Configuration

  • [ ] Use multiple known nodes
  • [ ] Set appropriate pool size
  • [ ] Enable compression
  • [ ] Configure timeouts
  • [ ] Enable TCP nodelay

Queries

  • [ ] Use prepared statements
  • [ ] Set appropriate consistency levels
  • [ ] Mark idempotent queries
  • [ ] Use paging for large results

Error Handling

  • [ ] Implement retry logic
  • [ ] Log errors appropriately
  • [ ] Handle connection failures

Monitoring

  • [ ] Track query latencies
  • [ ] Monitor error rates
  • [ ] Alert on failures