PostgreSQL Reference for Developers
Queries, joins, indexes, transactions, and the things that actually matter when building on Postgres
๐PostgreSQL Reference for Developers
Queries, joins, indexes, transactions, and the things that actually matter when building on Postgres.
๐Data Definition: Tables
-- Create a table
CREATE TABLE users (
id BIGSERIAL PRIMARY KEY, -- auto-incrementing integer PK
email TEXT NOT NULL UNIQUE, -- enforced at DB level
name TEXT NOT NULL,
age INTEGER CHECK (age >= 0),
role TEXT NOT NULL DEFAULT 'user',
created_at TIMESTAMPTZ NOT NULL DEFAULT now(),
deleted_at TIMESTAMPTZ -- NULL = not deleted (soft delete)
);
-- Prefer BIGSERIAL or BIGINT GENERATED ALWAYS AS IDENTITY over SERIAL
id BIGINT GENERATED ALWAYS AS IDENTITY PRIMARY KEY
-- Or use UUID as primary key (requires pgcrypto or gen_random_uuid())
id UUID PRIMARY KEY DEFAULT gen_random_uuid()
-- Alter an existing table
ALTER TABLE users ADD COLUMN bio TEXT;
ALTER TABLE users ALTER COLUMN name SET NOT NULL;
ALTER TABLE users DROP COLUMN bio;
ALTER TABLE users RENAME COLUMN name TO full_name;
ALTER TABLE users RENAME TO accounts;
-- Drop
DROP TABLE users;
DROP TABLE IF EXISTS users CASCADE; -- CASCADE drops dependent views/FKs๐Foreign Keys
CREATE TABLE posts (
id BIGSERIAL PRIMARY KEY,
user_id BIGINT NOT NULL REFERENCES users(id) ON DELETE CASCADE,
title TEXT NOT NULL,
body TEXT,
published BOOLEAN NOT NULL DEFAULT false,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- ON DELETE options:
-- CASCADE โ delete child rows when parent is deleted
-- SET NULL โ set FK column to NULL when parent is deleted
-- SET DEFAULT โ set FK column to its default value
-- RESTRICT โ prevent parent deletion if children exist (default)
-- NO ACTION โ like RESTRICT but deferred until end of transaction
-- Always index foreign key columns (Postgres does NOT do this automatically)
CREATE INDEX ON posts(user_id);Postgres does not automatically create indexes on foreign key columns. Every FK column that you JOIN or filter on needs a manual
CREATE INDEX. Missing FK indexes cause sequential scans and are one of the most common Postgres performance mistakes.
๐INSERT, UPDATE, DELETE
-- Insert a single row
INSERT INTO users (email, name) VALUES ('alice@example.com', 'Alice');
-- Insert and return the generated id
INSERT INTO users (email, name) VALUES ('bob@example.com', 'Bob') RETURNING id;
-- Insert multiple rows
INSERT INTO users (email, name) VALUES
('carol@example.com', 'Carol'),
('dave@example.com', 'Dave');
-- Upsert: insert or update on conflict
INSERT INTO users (email, name)
VALUES ('alice@example.com', 'Alice Updated')
ON CONFLICT (email)
DO UPDATE SET name = EXCLUDED.name, updated_at = now();
-- Ignore on conflict (do nothing)
INSERT INTO users (email, name) VALUES ('alice@example.com', 'Alice')
ON CONFLICT DO NOTHING;
-- Update
UPDATE users SET name = 'Alice Smith', role = 'admin' WHERE id = 1;
-- Update and return modified rows
UPDATE users SET role = 'admin' WHERE email = 'alice@example.com' RETURNING *;
-- Delete
DELETE FROM users WHERE id = 1;
DELETE FROM users WHERE deleted_at < now() - INTERVAL '30 days' RETURNING id;๐SELECT Fundamentals
-- Basic select
SELECT id, name, email FROM users;
SELECT * FROM users; -- avoid * in production queries
-- Filter
SELECT * FROM users WHERE role = 'admin' AND age > 18;
SELECT * FROM users WHERE id IN (1, 2, 3);
SELECT * FROM users WHERE id NOT IN (SELECT user_id FROM banned);
SELECT * FROM users WHERE deleted_at IS NULL; -- NULL check
SELECT * FROM users WHERE deleted_at IS NOT NULL;
-- LIKE and ILIKE (case-insensitive)
SELECT * FROM users WHERE name LIKE 'A%'; -- starts with A
SELECT * FROM users WHERE email ILIKE '%@gmail.com';
-- Sort
SELECT * FROM users ORDER BY created_at DESC;
SELECT * FROM users ORDER BY role ASC, name ASC;
SELECT * FROM users ORDER BY name NULLS LAST; -- push NULLs to the end
-- Limit and offset (pagination)
SELECT * FROM users ORDER BY id LIMIT 20 OFFSET 40; -- page 3 of 20OFFSET pagination is slow on large tables.
OFFSET 10000 LIMIT 20requires scanning 10020 rows to return 20. Use keyset (cursor) pagination instead:WHERE id > :last_id ORDER BY id LIMIT 20. Much faster and consistent under inserts.
๐Aggregates and GROUP BY
-- Aggregate functions
SELECT COUNT(*) FROM users;
SELECT COUNT(*) FROM users WHERE role = 'admin';
SELECT AVG(age), MIN(age), MAX(age), SUM(age) FROM users;
-- GROUP BY
SELECT role, COUNT(*) AS total
FROM users
GROUP BY role
ORDER BY total DESC;
-- HAVING: filter on aggregated values (WHERE runs before aggregation, HAVING after)
SELECT user_id, COUNT(*) AS post_count
FROM posts
GROUP BY user_id
HAVING COUNT(*) > 5
ORDER BY post_count DESC;
-- Count distinct values
SELECT COUNT(DISTINCT user_id) FROM posts;
-- Conditional aggregation
SELECT
COUNT(*) AS total,
COUNT(*) FILTER (WHERE role = 'admin') AS admins,
COUNT(*) FILTER (WHERE deleted_at IS NOT NULL) AS deleted
FROM users;๐Subqueries and CTEs
-- Subquery in WHERE
SELECT * FROM posts
WHERE user_id IN (
SELECT id FROM users WHERE role = 'admin'
);
-- EXISTS: often faster than IN for large subqueries
SELECT * FROM users u
WHERE EXISTS (
SELECT 1 FROM posts p WHERE p.user_id = u.id
);
-- CTE (Common Table Expression): named subquery, run once, referenced multiple times
WITH active_users AS (
SELECT id, name FROM users WHERE deleted_at IS NULL
),
prolific AS (
SELECT user_id, COUNT(*) AS post_count
FROM posts
GROUP BY user_id
HAVING COUNT(*) > 10
)
SELECT u.name, p.post_count
FROM active_users u
JOIN prolific p ON p.user_id = u.id
ORDER BY p.post_count DESC;
-- Recursive CTE: for trees and hierarchies
WITH RECURSIVE org_tree AS (
SELECT id, name, manager_id, 0 AS depth
FROM employees WHERE manager_id IS NULL -- base case: root
UNION ALL
SELECT e.id, e.name, e.manager_id, t.depth + 1
FROM employees e
JOIN org_tree t ON e.manager_id = t.id -- recursive case
)
SELECT * FROM org_tree ORDER BY depth, name;๐JOIN Types
| Join Type | Returns | When to Use |
|---|---|---|
INNER JOIN | Rows where the condition matches in both tables | You only want rows that have a matching partner |
LEFT JOIN | All rows from left table; NULLs where no match in right | Optional relationship โ keep left rows even if no match |
RIGHT JOIN | All rows from right table; NULLs where no match in left | Rarely used; just flip the tables and use LEFT JOIN |
FULL OUTER JOIN | All rows from both tables; NULLs where no match | Find rows that exist in one table but not the other |
CROSS JOIN | Cartesian product โ every combination | Generating test data; matrix comparisons |
-- INNER JOIN: only posts that have a matching user
SELECT p.title, u.name
FROM posts p
JOIN users u ON u.id = p.user_id; -- JOIN = INNER JOIN
-- LEFT JOIN: all users, even those with no posts
SELECT u.name, COUNT(p.id) AS post_count
FROM users u
LEFT JOIN posts p ON p.user_id = u.id
GROUP BY u.id, u.name
ORDER BY post_count DESC;
-- LEFT JOIN to find rows with NO match (anti-join pattern)
SELECT u.* FROM users u
LEFT JOIN posts p ON p.user_id = u.id
WHERE p.id IS NULL; -- users who have never posted
-- Multiple joins
SELECT p.title, u.name AS author, c.body AS comment
FROM posts p
JOIN users u ON u.id = p.user_id
LEFT JOIN comments c ON c.post_id = p.id
WHERE p.published = true
ORDER BY p.created_at DESC;
-- Self-join: join a table to itself (e.g., employee โ manager)
SELECT e.name AS employee, m.name AS manager
FROM employees e
LEFT JOIN employees m ON m.id = e.manager_id;JOIN order doesn't change results, but it hints to the planner. Put the smaller/more-filtered table first for readability. Postgres's query planner will reorder joins for efficiency regardless โ but explicit
JOINinstead of comma-separatedFROMmakes intent clear and avoids accidental cartesian products.
๐UNION, INTERSECT, EXCEPT
-- UNION: combine results, remove duplicates
SELECT email FROM users
UNION
SELECT email FROM pending_users;
-- UNION ALL: combine results, keep duplicates (faster โ no dedup step)
SELECT id, name, 'user' AS source FROM users
UNION ALL
SELECT id, name, 'admin' AS source FROM admins;
-- INTERSECT: rows in both result sets
SELECT email FROM newsletter_subscribers
INTERSECT
SELECT email FROM paying_customers;
-- EXCEPT: rows in first set but not second
SELECT email FROM users
EXCEPT
SELECT email FROM unsubscribed;๐Index Basics
Indexes speed up reads by creating a separate data structure the planner can use instead of scanning the whole table. They cost write overhead and storage. Add them where you filter, sort, or join โ not everywhere.
-- Basic B-tree index (default, best for equality and range queries)
CREATE INDEX idx_users_email ON users(email);
CREATE INDEX ON posts(user_id); -- auto-named
CREATE INDEX ON posts(created_at DESC);
-- Unique index (enforces uniqueness, equivalent to UNIQUE constraint)
CREATE UNIQUE INDEX ON users(email);
-- Composite index: column order matters
-- Useful when you filter on (status, created_at) together
-- Also usable for queries on just `status` (leftmost prefix rule)
CREATE INDEX ON posts(status, created_at DESC);
-- Partial index: only indexes rows matching a condition
-- Smaller, faster for queries that always include that condition
CREATE INDEX ON users(email) WHERE deleted_at IS NULL;
CREATE INDEX ON orders(created_at) WHERE status = 'pending';
-- Concurrent index creation: doesn't lock the table (use in production)
CREATE INDEX CONCURRENTLY idx_posts_title ON posts(title);
-- Drop
DROP INDEX idx_users_email;
DROP INDEX CONCURRENTLY idx_posts_title; -- non-blocking drop
-- List indexes on a table
\d users -- psql: shows table structure and indexes๐Index Types
| Type | Best For | Notes |
|---|---|---|
B-tree | Equality, range, sorting, LIKE 'prefix%' | Default. Use for almost everything. |
Hash | Equality only (=) | Slightly faster than B-tree for pure equality, but B-tree is usually fine. |
GIN | Arrays, JSONB, full-text search, @>, ? | Required for indexing JSONB keys/values and array containment. |
GiST | Geometric types, full-text, ranges | Used by PostGIS. Also for tsvector full-text search. |
BRIN | Very large tables with naturally ordered data (time-series, logs) | Tiny. Fast to build. Poor selectivity. Good for append-only partitioned tables. |
-- GIN index for JSONB (required for @>, ?, ?|, ?& operators)
CREATE INDEX ON events USING gin(data);
-- GIN index for full-text search
CREATE INDEX ON articles USING gin(to_tsvector('english', title || ' ' || body));
-- BRIN index for a large log table (created_at is always increasing)
CREATE INDEX ON logs USING brin(created_at);๐EXPLAIN: Reading the Query Plan
-- Show the query plan (no execution)
EXPLAIN SELECT * FROM users WHERE email = 'alice@example.com';
-- Show plan WITH actual execution stats (run the query)
EXPLAIN (ANALYZE, BUFFERS) SELECT * FROM users WHERE email = 'alice@example.com';
-- Key things to look for in EXPLAIN output:
-- Seq Scan โ full table scan. BAD on large tables. Missing index?
-- Index Scan โ uses index to find rows, fetches from heap. GOOD.
-- Index Only Scan โ all data in index, no heap access. BEST.
-- Bitmap Scan โ index scan for many rows, batch heap access. OK.
-- Nested Loop โ for each row in outer, scan inner. Fast if inner is small.
-- Hash Join โ build hash table from smaller side. Fast for large joins.
-- Merge Join โ merge two sorted inputs. Fast when both sides are sorted.
-- cost=X..Y โ estimated startup cost .. total cost (arbitrary planner units)
-- rows=N โ estimated row count (inaccurate = stale statistics โ ANALYZE)
-- actual time=X..Y rows=N loops=N โ real execution data (with ANALYZE)
-- Update statistics if estimates are way off
ANALYZE users;
ANALYZE; -- analyze all tablesUse explain.dalibo.com. Paste your
EXPLAIN (ANALYZE, BUFFERS, FORMAT JSON)output into explain.dalibo.com for a visual tree with highlighted bottlenecks. Far easier to read than raw text output for complex plans.
๐Transactions
A transaction groups statements into an all-or-nothing unit. Either all statements succeed and commit, or any failure rolls everything back. Postgres transactions are fully ACID compliant.
-- Basic transaction
BEGIN;
UPDATE accounts SET balance = balance - 100 WHERE id = 1;
UPDATE accounts SET balance = balance + 100 WHERE id = 2;
COMMIT; -- both updates committed atomically
-- Rollback on error
BEGIN;
UPDATE accounts SET balance = balance - 1000 WHERE id = 1;
-- something goes wrong in application code...
ROLLBACK; -- undoes the UPDATE, balance unchanged
-- Savepoints: partial rollback within a transaction
BEGIN;
INSERT INTO orders (...) VALUES (...);
SAVEPOINT after_order;
INSERT INTO order_items (...) VALUES (...);
-- if items insert fails:
ROLLBACK TO SAVEPOINT after_order;
-- order still exists, items rolled back
COMMIT;๐Isolation Levels
Isolation levels control what concurrent transactions can see of each other's changes. Higher isolation = fewer anomalies but more contention.
| Level | Dirty Read | Non-repeatable Read | Phantom Read | Use Case |
|---|---|---|---|---|
READ COMMITTED | No | Yes | Yes | Default. Fine for most apps. |
REPEATABLE READ | No | No | No* | Reports, consistent snapshots. |
SERIALIZABLE | No | No | No | Financial ops, strict correctness. |
-- Set isolation level for a transaction
BEGIN TRANSACTION ISOLATION LEVEL REPEATABLE READ;
SELECT SUM(balance) FROM accounts; -- consistent snapshot for this tx
-- other transactions can commit changes, but we don't see them
COMMIT;๐Locking
-- SELECT FOR UPDATE: lock rows for update, block other writers
-- Use when you read a row and intend to update it (prevents lost updates)
BEGIN;
SELECT balance FROM accounts WHERE id = 1 FOR UPDATE;
-- now we own a lock: other transactions block on this row
UPDATE accounts SET balance = balance - 50 WHERE id = 1;
COMMIT;
-- SELECT FOR UPDATE SKIP LOCKED: skip rows locked by others
-- Pattern: job queue โ workers grab available jobs without blocking each other
BEGIN;
SELECT * FROM jobs
WHERE status = 'pending'
ORDER BY created_at
LIMIT 1
FOR UPDATE SKIP LOCKED;
-- process the job...
UPDATE jobs SET status = 'done' WHERE id = :job_id;
COMMIT;
-- Advisory locks: application-level locks (no table/row needed)
SELECT pg_try_advisory_lock(12345); -- returns true if acquired, false if not
SELECT pg_advisory_unlock(12345);Deadlocks happen when two transactions each hold a lock the other needs. Postgres detects them and kills one transaction. Prevent them by always acquiring locks in the same order across transactions, and keeping transactions short.
๐VACUUM and Autovacuum
-- Why VACUUM exists:
-- Postgres uses MVCC: UPDATE and DELETE don't remove old row versions.
-- They mark them dead. Dead rows pile up ("table bloat").
-- VACUUM reclaims dead row space and updates the visibility map.
-- Manual vacuum (autovacuum usually handles this)
VACUUM users;
VACUUM ANALYZE users; -- vacuum + update statistics
VACUUM FULL users; -- rewrite table, reclaim OS space โ locks table, use sparingly
-- Check autovacuum health
SELECT relname, n_dead_tup, n_live_tup, last_autovacuum, last_autoanalyze
FROM pg_stat_user_tables
ORDER BY n_dead_tup DESC;๐Data Types
| Type | Use For | Notes |
|---|---|---|
TEXT | Variable-length strings | Preferred over VARCHAR(n) โ no performance difference, less hassle. |
VARCHAR(n) | Strings with length limit | Only use if you need DB-level length enforcement. |
INTEGER / INT | 32-bit integer | Max ~2.1 billion. Use BIGINT if IDs may exceed that. |
BIGINT | 64-bit integer | Preferred for IDs on tables that will grow. |
NUMERIC(p,s) | Exact decimals (money) | No floating-point errors. Use for currency. Slower than float. |
FLOAT8 / DOUBLE PRECISION | Approximate decimals | Fast. Imprecise. Not for money. |
BOOLEAN | true/false | Accepts true/false, 't'/'f', 'yes'/'no', 1/0. |
TIMESTAMPTZ | Timestamps | Always use TIMESTAMPTZ (with time zone) โ stores UTC internally. |
DATE | Calendar date only | No time component. |
INTERVAL | Duration | INTERVAL '3 days', INTERVAL '1 hour 30 minutes'. |
UUID | Universally unique IDs | Use gen_random_uuid() (built-in since Pg 13). |
JSONB | JSON data | Binary JSON โ indexable, queryable. Prefer over JSON. |
ARRAY | Arrays of any type | TEXT[], INTEGER[]. Indexable with GIN. |
ENUM | Fixed set of string values | Enforced at DB level. Adding values requires ALTER TYPE. |
Always use TIMESTAMPTZ, never TIMESTAMP.
TIMESTAMP(without time zone) stores no timezone info. When you insert a value, Postgres strips the offset. You lose the ability to reason about time correctly across timezones.TIMESTAMPTZalways stores UTC and converts on display.
๐JSONB
-- Store and query semi-structured data
CREATE TABLE events (
id BIGSERIAL PRIMARY KEY,
data JSONB NOT NULL
);
-- Insert JSON
INSERT INTO events (data) VALUES
('{"type": "click", "user_id": 42, "tags": ["mobile", "nav"]}');
-- Access operators
data->'user_id' -- returns JSON value: 42
data->>'user_id' -- returns TEXT value: '42' (use for WHERE comparisons)
data->'address'->'city' -- nested access
data#>'{address,city}' -- path access (array of keys)
data#>>'{address,city}' -- path access returning TEXT
-- Query
SELECT * FROM events WHERE data->>'type' = 'click';
SELECT * FROM events WHERE (data->>'user_id')::INT = 42;
SELECT * FROM events WHERE data @> '{"type": "click"}'; -- containment (use GIN index)
SELECT * FROM events WHERE data ? 'user_id'; -- key exists
SELECT * FROM events WHERE data->'tags' @> '["mobile"]'; -- array contains value
-- Update a key in JSONB
UPDATE events SET data = data || '{"processed": true}' WHERE id = 1;
UPDATE events SET data = jsonb_set(data, '{user_id}', '99') WHERE id = 1;
-- Index for fast JSONB queries
CREATE INDEX ON events USING gin(data);๐ENUMs and Custom Types
-- Create an ENUM type
CREATE TYPE order_status AS ENUM ('pending', 'processing', 'shipped', 'delivered', 'cancelled');
CREATE TABLE orders (
id BIGSERIAL PRIMARY KEY,
status order_status NOT NULL DEFAULT 'pending'
);
-- Add a value to an existing ENUM (can only add, not remove)
ALTER TYPE order_status ADD VALUE 'refunded' AFTER 'delivered';๐Window Functions
Window functions perform a calculation across a set of rows related to the current row โ without collapsing them into a single group like GROUP BY would.
-- ROW_NUMBER: rank each row within a partition
SELECT
name,
department,
salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS rank
FROM employees;
-- RANK and DENSE_RANK: same as ROW_NUMBER but ties get same rank
-- RANK skips numbers after a tie (1,1,3), DENSE_RANK doesn't (1,1,2)
RANK() OVER (ORDER BY score DESC)
DENSE_RANK() OVER (ORDER BY score DESC)
-- LAG / LEAD: access previous / next row's value
SELECT
date,
revenue,
LAG(revenue) OVER (ORDER BY date) AS prev_revenue,
revenue - LAG(revenue) OVER (ORDER BY date) AS change
FROM daily_revenue;
-- Running total with SUM OVER
SELECT
date,
amount,
SUM(amount) OVER (ORDER BY date) AS running_total
FROM transactions;
-- Top N per group (e.g., top 3 posts per user)
SELECT * FROM (
SELECT *,
ROW_NUMBER() OVER (PARTITION BY user_id ORDER BY created_at DESC) AS rn
FROM posts
) sub
WHERE rn <= 3;๐Full-Text Search
-- tsvector: processed text for searching
-- tsquery: a search query
-- Basic full-text search
SELECT title FROM articles
WHERE to_tsvector('english', title || ' ' || body) @@ to_tsquery('english', 'postgres & index');
-- Ranking results by relevance
SELECT title,
ts_rank(to_tsvector('english', body), to_tsquery('postgres')) AS rank
FROM articles
WHERE to_tsvector('english', body) @@ to_tsquery('postgres')
ORDER BY rank DESC;
-- Store tsvector in a column for performance (update with trigger or generated column)
ALTER TABLE articles ADD COLUMN search_vector TSVECTOR
GENERATED ALWAYS AS (to_tsvector('english', coalesce(title, '') || ' ' || coalesce(body, ''))) STORED;
CREATE INDEX ON articles USING gin(search_vector);
-- plainto_tsquery: user input (no operators, just words)
WHERE search_vector @@ plainto_tsquery('english', user_input)
-- websearch_to_tsquery: supports "phrases", -exclusions (Pg 11+)
WHERE search_vector @@ websearch_to_tsquery('english', '"exact phrase" -exclude')๐Generated Columns and Constraints
-- Generated (computed) column: value is always derived from other columns
ALTER TABLE products
ADD COLUMN price_with_tax NUMERIC
GENERATED ALWAYS AS (price * 1.20) STORED;
-- Table-level constraints
CREATE TABLE bookings (
id BIGSERIAL PRIMARY KEY,
start_date DATE NOT NULL,
end_date DATE NOT NULL,
CONSTRAINT valid_dates CHECK (end_date > start_date),
CONSTRAINT unique_booking UNIQUE (user_id, start_date)
);
-- Exclusion constraint: no overlapping date ranges (requires btree_gist extension)
CREATE EXTENSION IF NOT EXISTS btree_gist;
ALTER TABLE bookings
ADD CONSTRAINT no_overlap
EXCLUDE USING gist (room_id WITH =, daterange(start_date, end_date) WITH &&);๐psql CLI
# Connect
psql -U username -d dbname
psql -U username -h hostname -p 5432 -d dbname
psql "postgresql://user:password@host:5432/dbname"
# Meta-commands (no semicolon needed)
\l -- list databases
\c dbname -- connect to database
\dt -- list tables in current schema
\dt *.* -- list tables in all schemas
\d tablename -- describe table (columns, indexes, constraints)
\di -- list indexes
\df -- list functions
\dv -- list views
\dn -- list schemas
\du -- list users/roles
\timing -- toggle query timing
\x -- toggle expanded output (useful for wide tables)
\e -- open query in $EDITOR
\i file.sql -- run SQL file
\copy -- client-side COPY (works over remote connections)
\q -- quit
# Run a query from the shell
psql -U postgres -d mydb -c "SELECT COUNT(*) FROM users;"
# Run a SQL file
psql -U postgres -d mydb -f schema.sql๐Users, Roles, and Permissions
-- Create a role (roles can login, groups cannot โ but the distinction is just flags)
CREATE ROLE app_user LOGIN PASSWORD 'securepassword';
CREATE ROLE readonly NOLOGIN; -- group role
-- Grant connect and usage
GRANT CONNECT ON DATABASE mydb TO app_user;
GRANT USAGE ON SCHEMA public TO app_user;
-- Grant table permissions
GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO app_user;
GRANT USAGE, SELECT ON ALL SEQUENCES IN SCHEMA public TO app_user;
-- Grant on future tables automatically
ALTER DEFAULT PRIVILEGES IN SCHEMA public
GRANT SELECT, INSERT, UPDATE, DELETE ON TABLES TO app_user;
-- Read-only role pattern
GRANT USAGE ON SCHEMA public TO readonly;
GRANT SELECT ON ALL TABLES IN SCHEMA public TO readonly;
GRANT readonly TO reporting_user; -- assign group role to a user
-- Revoke
REVOKE INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public FROM readonly;๐Backup and Restore
# pg_dump: logical backup (SQL or custom format)
pg_dump -U postgres mydb > mydb.sql # plain SQL
pg_dump -U postgres -Fc mydb > mydb.dump # custom format (preferred)
pg_dump -U postgres -Fc -t users mydb > users.dump # single table
pg_dump -U postgres -Fc --schema-only mydb > schema.dump # schema only
pg_dump -U postgres -Fc --data-only mydb > data.dump # data only
# Restore custom format
pg_restore -U postgres -d mydb mydb.dump
pg_restore -U postgres -d mydb --clean mydb.dump # drop objects first
pg_restore -U postgres -d mydb -j 4 mydb.dump # parallel restore (4 workers)
# Restore plain SQL
psql -U postgres -d mydb < mydb.sql
# pg_dumpall: dump all databases + roles + tablespaces
pg_dumpall -U postgres > all.sql
# COPY: fast bulk import/export
# Server-side (must be superuser, reads/writes server filesystem)
COPY users TO '/tmp/users.csv' CSV HEADER;
COPY users FROM '/tmp/users.csv' CSV HEADER;
# Client-side (works over remote connections)
\copy users TO 'users.csv' CSV HEADER
\copy users FROM 'users.csv' CSV HEADER๐Useful Diagnostic Queries
-- Show running queries
SELECT pid, now() - pg_stat_activity.query_start AS duration,
query, state
FROM pg_stat_activity
WHERE state != 'idle'
ORDER BY duration DESC;
-- Kill a query
SELECT pg_cancel_backend(pid); -- graceful cancel
SELECT pg_terminate_backend(pid); -- force kill
-- Show locks and what's blocking what
SELECT blocked.pid, blocked.query,
blocking.pid AS blocking_pid, blocking.query AS blocking_query
FROM pg_stat_activity blocked
JOIN pg_stat_activity blocking
ON blocking.pid = ANY(pg_blocking_pids(blocked.pid));
-- Slowest queries (requires pg_stat_statements extension)
CREATE EXTENSION IF NOT EXISTS pg_stat_statements;
SELECT query, round(mean_exec_time::numeric, 2) AS avg_ms,
calls, round(total_exec_time::numeric, 2) AS total_ms
FROM pg_stat_statements
ORDER BY mean_exec_time DESC
LIMIT 20;
-- Table sizes
SELECT relname AS table,
pg_size_pretty(pg_total_relation_size(relid)) AS total_size,
pg_size_pretty(pg_relation_size(relid)) AS table_size,
pg_size_pretty(pg_total_relation_size(relid) - pg_relation_size(relid)) AS index_size
FROM pg_stat_user_tables
ORDER BY pg_total_relation_size(relid) DESC;
-- Unused indexes (wasting write overhead)
SELECT indexrelname, idx_scan, pg_size_pretty(pg_relation_size(indexrelid)) AS size
FROM pg_stat_user_indexes
WHERE idx_scan = 0
ORDER BY pg_relation_size(indexrelid) DESC;pg_stat_statements is essential. Enable it in
postgresql.confwithshared_preload_libraries = 'pg_stat_statements'and restart. It tracks query statistics across all executions โ the single most useful tool for finding slow queries in production.