ClickHouse System Monitoring Guide
A comprehensive guide for monitoring ClickHouse deployments.
System Health Monitoring
Memory Usage
-- Current memory usage by query
SELECT
    query_id,
    user,
    formatReadableSize(memory_usage) as mem_used,
    formatReadableSize(peak_memory_usage) as peak_mem,
    query
FROM system.processes
ORDER BY memory_usage DESC;
-- Historical memory usage patterns
SELECT
    query_id,
    formatReadableSize(memory_usage) as mem_used,
    formatReadableSize(peak_memory_usage) as peak_mem,
    query_duration_ms,
    query
FROM system.query_log
WHERE type = 'QueryFinish'
  AND event_time >= now() - INTERVAL 1 DAY
ORDER BY memory_usage DESC
LIMIT 10;
CPU Usage
-- Current CPU usage by query
SELECT
    query_id,
    user,
    elapsed,
    read_rows,
    formatReadableSize(read_bytes) as read,
    total_rows_approx,
    query
FROM system.processes
ORDER BY elapsed DESC;
-- CPU usage patterns
SELECT
    query,
    count() as execution_count,
    avg(query_duration_ms) as avg_duration_ms,
    max(query_duration_ms) as max_duration_ms,
    sum(read_rows) as total_rows_read,
    sum(result_rows) as total_rows_returned
FROM system.query_log
WHERE type = 'QueryFinish'
  AND event_time >= now() - INTERVAL 1 DAY
GROUP BY query
ORDER BY avg_duration_ms DESC
LIMIT 10;
Disk Usage
-- Database sizes
SELECT
    name as database,
    formatReadableSize(sum(bytes_on_disk)) as disk_usage,
    sum(rows) as total_rows,
    count() as total_tables
FROM system.tables
GROUP BY database
ORDER BY sum(bytes_on_disk) DESC;
-- Table sizes
SELECT
    database,
    table,
    formatReadableSize(bytes_on_disk) as disk_usage,
    rows as total_rows,
    formatReadableSize(bytes_on_disk / rows) as avg_row_size
FROM system.tables
WHERE database NOT IN ('system')
ORDER BY bytes_on_disk DESC
LIMIT 10;
-- Part sizes and distribution
SELECT
    database,
    table,
    partition,
    formatReadableSize(sum(bytes_on_disk)) as part_size,
    count() as part_count,
    sum(rows) as rows
FROM system.parts
WHERE active
GROUP BY database, table, partition
ORDER BY sum(bytes_on_disk) DESC
LIMIT 10;
Query Performance Monitoring
Slow Queries
-- Recent slow queries
SELECT
    query_id,
    user,
    query_duration_ms,
    read_rows,
    formatReadableSize(read_bytes) as data_read,
    result_rows,
    formatReadableSize(memory_usage) as memory,
    query
FROM system.query_log
WHERE type = 'QueryFinish'
  AND query_duration_ms > 1000  -- Queries taking more than 1 second
  AND event_time >= now() - INTERVAL 1 DAY
  AND LOWER(query) NOT LIKE '%system%'  -- Filter out system queries
ORDER BY query_duration_ms DESC
LIMIT 10;
-- Query patterns by duration
SELECT
    user,
    count() as query_count,
    avg(query_duration_ms) as avg_duration_ms,
    max(query_duration_ms) as max_duration_ms
FROM system.query_log
WHERE type = 'QueryFinish'
  AND event_time >= now() - INTERVAL 1 DAY
GROUP BY user
ORDER BY avg_duration_ms DESC;
Query Errors
-- Recent query errors
SELECT
    user,
    query_id,
    event_time,
    query_duration_ms,
    exception,
    query
FROM system.query_log
WHERE type = 'ExceptionWhileProcessing'
  AND event_time >= now() - INTERVAL 1 DAY
ORDER BY event_time DESC;
-- Error patterns
SELECT
    exception,
    count() as error_count,
    any(query) as sample_query
FROM system.query_log
WHERE type = 'ExceptionWhileProcessing'
  AND event_time >= now() - INTERVAL 1 DAY
GROUP BY exception
ORDER BY error_count DESC;
System Configuration
Settings Check
-- Current system settings
SELECT
    name,
    value,
    changed,
    description
FROM system.settings
WHERE changed = 1
ORDER BY name;
-- User settings
SELECT
    *
FROM system.users
FORMAT Vertical;
Replication Status
-- Replication delays
SELECT
    database,
    table,
    is_leader,
    total_replicas,
    active_replicas,
    formatReadableSize(queue_size) as queue_size,
    absolute_delay
FROM system.replicas
ORDER BY absolute_delay DESC;
-- Replication queue
SELECT
    database,
    table,
    count() as queue_entries,
    max(create_time) as latest_entry
FROM system.replication_queue
GROUP BY database, table
ORDER BY queue_entries DESC;