Backend DevelopmentDatabasesORMs
Learn SQLAlchemy
A comprehensive SQLAlchemy Guide
Welcome to the comprehensive SQLAlchemy course. SQLAlchemy is Python's most popular SQL toolkit and ORM.
Table of contents
-
Getting Started
-
Chapter I
-
Chapter II
-
Chapter III
-
Chapter IV
-
Chapter V
-
Appendix
What is SQLAlchemy?
SQLAlchemy is a Python SQL toolkit and Object-Relational Mapper that provides flexibility and power for database operations.
Key Components
- Core: SQL expression language and Engine
- ORM: Object Relational Mapper built on Core
- Alembic: Database migration tool
Philosophy
SQLAlchemy treats SQL as a relational algebra engine, not just a string-based query language.
Why SQLAlchemy?
1. Database Agnostic
Write code once, run on PostgreSQL, MySQL, SQLite, etc.
2. Pythonic
Pythonic API that feels natural in Python.
3. Flexible
Use as little or as much of the ORM as you need.
4. Production Ready
Used by major companies worldwide.
5. Well Documented
Excellent documentation and community.
Installation and Setup
Installation
pip install sqlalchemy
pip install alembic
pip install psycopg2-binary # PostgreSQL
pip install pymysql # MySQL
pip install pysqlite3 # SQLiteBasic Setup
from sqlalchemy import create_engine
# SQLite
engine = create_engine("sqlite:///mydb.db")
# PostgreSQL
engine = create_engine(
"postgresql://user:password@localhost:5432/mydb"
)
# MySQL
engine = create_engine(
"mysql+pymysql://user:password@localhost:3306/mydb"
)
# With connection pool
engine = create_engine(
"postgresql://user:password@localhost/mydb",
pool_size=5,
max_overflow=10,
pool_pre_ping=True
)Engine and Connection
Engine
from sqlalchemy import create_engine
engine = create_engine("sqlite:///mydb.db", echo=True)
# echo=True prints SQL to stdoutConnection
# Using engine
with engine.connect() as connection:
result = connection.execute(text("SELECT * FROM users"))
for row in result:
print(row)Working with Transactions
with engine.connect() as connection:
with connection.begin():
connection.execute(text("INSERT INTO users VALUES (:name)"), {"name": "John"})
connection.execute(text("INSERT INTO users VALUES (:name)"), {"name": "Jane"})Tables and Metadata
Defining Tables
from sqlalchemy import Table, Column, Integer, String, MetaData
metadata = MetaData()
users = Table(
"users",
metadata,
Column("id", Integer, primary_key=True),
Column("name", String(100), nullable=False),
Column("email", String(100), unique=True),
Column("age", Integer),
Column("created_at", DateTime, default=datetime.utcnow)
)Column Types
from sqlalchemy import (
Integer, String, Boolean, Float, DateTime,
Date, Time, LargeBinary, JSON, Enum, ARRAY,
Text, Numeric, Interval
)
Column("id", Integer, primary_key=True)
Column("name", String(50))
Column("active", Boolean, default=True)
Column("price", Numeric(10, 2))
Column("data", JSON)
Column("tags", ARRAY(String))Constraints
from sqlalchemy import PrimaryKeyConstraint, UniqueConstraint, CheckConstraint
users = Table(
"users",
metadata,
Column("id", Integer),
Column("email", String(100)),
Column("age", Integer),
PrimaryKeyConstraint("id"),
UniqueConstraint("email"),
CheckConstraint("age >= 0")
)Indexes
Column("email", String(100), index=True)
# Or explicit
from sqlalchemy import Index
Index("idx_name_email", "name", "email")Models and Declarative
Declarative Base
from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column
from sqlalchemy import String, Integer, Boolean, DateTime
from datetime import datetime
class Base(DeclarativeBase):
pass
class User(Base):
__tablename__ = "users"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(String(100))
email: Mapped[str] = mapped_column(String(100), unique=True)
age: Mapped[int] = mapped_column(default=0)
is_active: Mapped[bool] = mapped_column(Boolean, default=True)
created_at: Mapped[datetime] = mapped_column(
DateTime, default=datetime.utcnow
)Creating Tables
# Create all tables
Base.metadata.create_all(engine)
# Drop all tables
Base.metadata.drop_all(engine)
# Create specific table
users_table.create(engine, checkfirst=True)Type Annotations
from sqlalchemy.orm import Mapped, mapped_column
from sqlalchemy import String, Integer
# Python 3.9+ with generic types
from __future__ import annotationsSessions
Creating Sessions
from sqlalchemy.orm import Session, sessionmaker
# Create session factory
SessionLocal = sessionmaker(bind=engine)
# Create session
with SessionLocal() as session:
# use session
pass
# Or manually
session = SessionLocal()
try:
# use session
session.commit()
except:
session.rollback()
finally:
session.close()Session Operations
session.add(user) # Add single
session.add_all([user1, user2]) # Add multiple
session.commit() # Commit transaction
session.rollback() # Rollback
session.close() # Close session
session.flush() # Flush to DBQuerying with Session
session.query(User).all() # All users
session.query(User).filter(User.id == 1) # Filter
session.query(User).first() # First result
session.query(User).one() # Exactly one
session.query(User).one_or_none() # One or noneCRUD Operations
Create
# Create new user
user = User(name="John", email="john@example.com")
session.add(user)
session.commit()
# Multiple
users = [User(name="John"), User(name="Jane")]
session.add_all(users)
session.commit()
# Get generated ID
session.add(user)
session.flush() # Flush to get ID without commit
print(user.id)Read
# Get by primary key
user = session.get(User, 1)
# Query
user = session.query(User).filter(User.id == 1).first()
user = session.query(User).filter_by(id=1).first()Update
user = session.get(User, 1)
user.name = "Updated Name"
session.commit()
# Bulk update (SQLAlchemy 1.4+)
from sqlalchemy import update
stmt = update(User).where(User.id == 1).values(name="New Name")
session.execute(stmt)
session.commit()Delete
user = session.get(User, 1)
session.delete(user)
session.commit()
# Bulk delete
from sqlalchemy import delete
stmt = delete(User).where(User.is_active == False)
session.execute(stmt)
session.commit()Querying
Basic Queries
# All users
session.query(User).all()
# First result
session.query(User).first()
# Single result
session.query(User).filter(User.id == 1).one()
# Count
session.query(User).count()
session.query(func.count(User.id)).scalar()Selecting Columns
from sqlalchemy import select
stmt = select(User.name, User.email)
result = session.execute(stmt)
for name, email in result:
print(name, email)
# Using scalars
stmt = select(User.name).where(User.id == 1)
name = session.execute(stmt).scalar_one()Ordering
session.query(User).order_by(User.name)
session.query(User).order_by(User.name.desc())
session.query(User).order_by(User.name.asc())
session.query(User).order_by(User.created_at.desc(), User.name)Limiting
session.query(User).limit(10).all()
session.query(User).offset(20).limit(10).all()
session.query(User).slice(0, 10).all()Filtering
Filter Operators
# Equality
session.query(User).filter(User.id == 1)
session.query(User).filter(User.id == None) # NULL check
# Comparison
session.query(User).filter(User.age > 18)
session.query(User).filter(User.age >= 18)
session.query(User).filter(User.age < 65)
session.query(User).filter(User.age <= 65)
session.query(User).filter(User.age.between(18, 65))
# IN
session.query(User).filter(User.id.in_([1, 2, 3]))
session.query(User).filter(~User.id.in_([4, 5, 6]))
# LIKE
session.query(User).filter(User.name.like('%John%'))
session.query(User).filter(User.name.ilike('%john%')) # Case insensitive
# String functions
session.query(User).filter(User.name.startswith('J'))
session.query(User).filter(User.name.endswith('n'))
session.query(User).filter(User.name.contains('ohn'))Boolean Operations
# AND
session.query(User).filter(and_(User.age > 18, User.is_active == True))
session.query(User).filter(User.age > 18, User.is_active == True) # Implicit
session.query(User).filter(and_(
User.age > 18,
or_(User.is_active == True, User.name == "Admin")
))
# OR
session.query(User).filter(or_(User.id == 1, User.id == 2))
# NOT
session.query(User).filter(~User.is_active)
session.query(User).filter(User.is_active == False)NULL Handling
session.query(User).filter(User.phone.is_(None))
session.query(User).filter(User.phone.isnot(None))
session.query(User).filter(User.phone.is_distinct_from(None))Text Expressions
from sqlalchemy import text
session.query(User).filter(text("id < 10"))
session.query(User).filter(text("name LIKE :name")).params(name="%John%")Joins and Relationships
Defining Relationships
from sqlalchemy.orm import relationship, Mapped, mapped_column
from sqlalchemy import String, Integer, ForeignKey
from datetime import datetime
class User(Base):
__tablename__ = "users"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column(String(100))
# One-to-many relationship
orders: Mapped[List["Order"]] = relationship(
"Order", back_populates="user", cascade="all, delete-orphan"
)
class Order(Base):
__tablename__ = "orders"
id: Mapped[int] = mapped_column(primary_key=True)
user_id: Mapped[int] = mapped_column(ForeignKey("users.id"))
total: Mapped[float] = mapped_column(default=0)
user: Mapped["User"] = relationship("User", back_populates="orders")Types of Relationships
# One-to-One
class User(Base):
__tablename__ = "users"
profile: Mapped["Profile"] = relationship(
"Profile", back_populates="user", uselist=False
)
# Many-to-Many
tags = Table(
"article_tags", Base.metadata,
Column("article_id", ForeignKey("articles.id"), primary_key=True),
Column("tag_id", ForeignKey("tags.id"), primary_key=True)
)
class Article(Base):
__tablename__ = "articles"
tags: Mapped[List["Tag"]] = relationship("Tag", secondary=tags)
class Tag(Base):
__tablename__ = "tags"Querying with Joins
# Simple join
for user, order in session.query(User, Order).filter(User.id == Order.user_id):
print(user.name, order.total)
# Using relationship
session.query(User).join(Order).filter(Order.total > 100)
# Explicit join
session.query(User).join(Order, User.id == Order.user_id)
# Outer join
session.query(User).outerjoin(Order)Eager Loading
from sqlalchemy.orm import selectinload, joinedload, subqueryload
# joinedload - uses LEFT OUTER JOIN
session.query(User).options(joinedload(User.orders)).all()
# selectinload - uses separate IN query
session.query(User).options(selectinload(User.orders)).all()
# subqueryload - uses subquery
session.query(User).options(subqueryload(User.orders)).all()ORM Mappings
Back Populates
class Parent(Base):
__tablename__ = "parent"
id: Mapped[int] = mapped_column(primary_key=True)
children: Mapped[List["Child"]] = relationship("Child", back_populates="parent")
class Child(Base):
__tablename__ = "child"
id: Mapped[int] = mapped_column(primary_key=True)
parent_id: Mapped[int] = mapped_column(ForeignKey("parent.id"))
parent: Mapped["Parent"] = relationship("Child", back_populates="parent")Back Reference
class User(Base):
__tablename__ = "users"
orders: Mapped[List["Order"]] = relationship(
back_populates="user",
cascade="all, delete-orphan"
)
class Order(Base):
__tablename__ = "orders"
user_id: Mapped[int] = mapped_column(ForeignKey("users.id"))
user: Mapped["User"] = relationship(back_populates="orders")Lazy Loading
# Default is 'select' (lazy)
orders: Mapped[List["Order"]] = relationship("Order", lazy="select")
# Options
orders: Mapped[List["Order"]] = relationship("Order", lazy="joined") # Eager
orders: Mapped[List["Order"]] = relationship("Order", lazy="subquery")
orders: Mapped[List["Order"]] = relationship("Order", lazy="selectin")
orders: Mapped[List["Order"]] = relationship("Order", lazy="raise")
orders: Mapped[List["Order"]] = relationship("Order", lazy="raise_on_default")
orders: Mapped[List["Order"]] = relationship("Order", lazy="noload") # NeverCascade Options
orders: Mapped[List["Order"]] = relationship(
"Order",
cascade="all, delete-orphan", # Common setting
# cascade="save-update, merge" # Only these
)Inheritance
Single Table Inheritance
class Employee(Base):
__tablename__ = "employees"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column()
employee_type: Mapped[str] = mapped_column() # Discriminator
__mapper_args__ = {
"polymorphic_on": "employee_type",
"polymorphic_identity": "employee"
}
class Manager(Employee):
__mapper_args__ = {"polymorphic_identity": "manager"}
department_size: Mapped[int] = mapped_column()
class Engineer(Employee):
__mapper_args__ = {"polymorphic_identity": "engineer"}
programming_language: Mapped[str] = mapped_column()Concrete Table Inheritance
class Employee(Base):
__tablename__ = "employees"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column()
class Manager(Employee):
__tablename__ = "managers"
__mapper_args__ = {"concrete": True}
id: Mapped[int] = mapped_column(ForeignKey("employees.id"), primary_key=True)
department_size: Mapped[int] = mapped_column()Async Support
Async Engine
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
engine = create_async_engine("sqlite+aiosqlite:///mydb.db")
async_session = async_sessionmaker(
engine,
class_=AsyncSession,
expire_on_commit=False
)
# Or with PostgreSQL
engine = create_async_engine(
"postgresql+asyncpg://user:pass@localhost/db"
)Async Session
async with async_session() as session:
async with session.begin():
user = User(name="John", email="john@example.com")
session.add(user)
# Or
session = async_session()
try:
result = await session.execute(select(User))
users = result.scalars().all()
finally:
await session.close()Async Queries
async with async_session() as session:
# Query
result = await session.execute(select(User).where(User.id == 1))
user = result.scalar_one_or_none()
# Async iteration
result = await session.stream(select(User))
async for user in result.scalars():
print(user.name)Transactions
Basic Transaction
with engine.connect() as conn:
with conn.begin():
conn.execute(text("INSERT INTO users VALUES (:name)"), {"name": "John"})Session Transaction
# Each commit is its own transaction
session.add(user)
session.commit() # Commits transaction
# Multiple operations in one transaction
try:
session.add(user1)
session.add(user2)
session.flush() # Execute without commit
session.add(order)
session.commit()
except:
session.rollback()
raiseSavepoints
session.begin_nested() # Creates savepoint
session.rollback_to_savepoint() # Rollback to savepoint
session.commit() # Commits savepoint transactionRaw SQL
text()
from sqlalchemy import text
result = session.execute(text("SELECT * FROM users WHERE id = :id"), {"id": 1})
for row in result:
print(row)Executing Raw SQL
# Select
result = session.execute(text("SELECT * FROM users"))
for row in result:
print(row)
# Insert
session.execute(text("INSERT INTO users (name) VALUES (:name)"), {"name": "John"})
# Update
session.execute(
text("UPDATE users SET name = :name WHERE id = :id"),
{"name": "Jane", "id": 1}
)
# Delete
session.execute(text("DELETE FROM users WHERE id = :id"), {"id": 1})SELECT with Core
from sqlalchemy import select
stmt = select(users).where(users.c.id == 1)
result = session.execute(stmt)Migrations with Alembic
Installation
pip install alembic
alembic init alembicConfiguration
# alembic.ini
sqlalchemy.url = postgresql://user:pass@localhost/mydb
# alembic/env.py
from yourapp.database import Base
target_metadata = Base.metadataCreating Migrations
alembic revision --autogenerate -m "Add users table"
alembic upgrade head
alembic downgrade -1
alembic historyMigration Example
# revisions/abc123.py
from alembic import op
import sqlalchemy as sa
revision = 'abc123'
down_revision = 'previous_revision'
alembic_version.table_name = 'alembic_version'
def upgrade():
op.create_table(
'users',
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('name', sa.String(100), nullable=False),
sa.PrimaryKeyConstraint('id')
)
def downgrade():
op.drop_table('users')Performance
Query Logging
engine = create_engine("sqlite:///mydb.db", echo=True)Bulk Operations
# Bulk insert
session.bulk_insert_mappings(User, [
{"name": "John", "email": "john@example.com"},
{"name": "Jane", "email": "jane@example.com"}
])
# Bulk update
session.bulk_update_mappings(User, [
{"id": 1, "name": "Updated"},
{"id": 2, "name": "Updated 2"}
])
# Core insert
from sqlalchemy import insert
stmt = insert(users).values([
{"name": "John"},
{"name": "Jane"}
])
session.execute(stmt)Batch Inserts
from sqlalchemy.dialects.postgresql import insert
stmt = insert(users).values([
{"name": "John", "email": "john@example.com"},
{"name": "Jane", "email": "jane@example.com"}
])
stmt = stmt.on_conflict_do_nothing(index_elements=["email"])
session.execute(stmt)N+1 Prevention
# Always use eager loading for known relationships
session.query(User).options(selectinload(User.orders))
# Or configure in relationship
class User(Base):
orders: Mapped[List["Order"]] = relationship(lazy="selectin")Caching
Query Caching
from sqlalchemy.orm import query
# Query cache is built-in for compiled queries
result = session.query(User).filter(User.id == 1).all()Second Level Cache
# Using dogpile.cache
from sqlalchemy.ext.baked import baked
bakery = baked(session)
bakery += lambda s: s.query(User)
result = bakery.one()Testing
In-Memory SQLite
import pytest
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
@pytest.fixture
def engine():
engine = create_engine("sqlite:///:memory:")
Base.metadata.create_all(engine)
return engine
@pytest.fixture
def session(engine):
Session = sessionmaker(bind=engine)
session = Session()
yield session
session.close()
def test_create_user(session):
user = User(name="John", email="john@example.com")
session.add(user)
session.commit()
assert user.id is not None
assert session.query(User).count() == 1Fixtures with pytest-asyncio
import pytest
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
@pytest.fixture
async def async_session():
engine = create_async_engine("sqlite+aiosqlite:///:memory:")
async with engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
async_session = async_sessionmaker(
engine, class_=AsyncSession, expire_on_commit=False
)
async with async_session() as session:
yield sessionPatterns and Best Practices
Repository Pattern
class UserRepository:
def __init__(self, session):
self.session = session
def get_by_id(self, id):
return self.session.get(User, id)
def get_by_email(self, email):
return self.session.query(User).filter(User.email == email).first()
def get_all(self):
return self.session.query(User).all()
def add(self, user):
self.session.add(user)
self.session.commit()
return user
def delete(self, user):
self.session.delete(user)
self.session.commit()Unit of Work
class UnitOfWork:
def __init__(self):
self.session = None
def __enter__(self):
self.session = Session()
self.users = UserRepository(self.session)
return self
def __exit__(self, *args):
self.session.close()
def commit(self):
self.session.commit()
def rollback(self):
self.session.rollback()
# Usage
with UnitOfWork() as uow:
uow.users.add(User(name="John"))
uow.commit()Common Patterns
Soft Delete
from sqlalchemy import event
class SoftDeleteMixin:
deleted_at: Mapped[datetime] = mapped_column(nullable=True)
@classmethod
def get_active(cls, session):
return session.query(cls).filter(cls.deleted_at.is_(None))
class User(Base, SoftDeleteMixin):
__tablename__ = "users"
id: Mapped[int] = mapped_column(primary_key=True)
name: Mapped[str] = mapped_column()Timestamp Mixin
class TimestampMixin:
created_at: Mapped[datetime] = mapped_column(default=datetime.utcnow)
updated_at: Mapped[datetime] = mapped_column(
default=datetime.utcnow, onupdate=datetime.utcnow
)Pagination
def paginate(query, page=1, per_page=20):
return query.offset((page - 1) * per_page).limit(per_page)
users = paginate(session.query(User).filter(User.is_active), page=2)Next Steps
Now that you know SQLAlchemy fundamentals:
- Learn about more advanced ORM features
- Explore async patterns for web frameworks
- Study performance optimization
- Learn Alembic for production migrations
- Explore integration with FastAPI, Django, Flask