# Select all time series with metric namehttp_requests_total# With exact label matchhttp_requests_total{job="api",method="GET"}# Label matching operatorshttp_requests_total{status!="200"}# Not equalhttp_requests_total{method=~"GET|POST"}# Regex matchhttp_requests_total{path!~"/admin.*"}# Negative regex# Multiple labelshttp_requests_total{job="api",status="200",method="GET"}# All metrics for a job{job="api"}
Range Vector Selectors
# Last 5 minuteshttp_requests_total[5m]# Last 1 hourhttp_requests_total{job="api"}[1h]# Time units: s (seconds), m (minutes), h (hours), d (days), w (weeks), y (years)http_requests_total[30s]http_requests_total[2h]http_requests_total[7d]
Offset Modifier
# 5 minutes agohttp_requests_totaloffset5m# Compare to 1 hour agohttp_requests_total-(http_requests_totaloffset1h)# Range vector with offsetrate(http_requests_total[5m]offset1h)
@ Modifier (Prometheus 2.25+)
# At specific timestamphttp_requests_total@1609459200# Combine with offsethttp_requests_total@1609459200offset5m# Range vector at specific timerate(http_requests_total[5m]@1609459200)
# Equal, not equalhttp_requests_total==100http_requests_total!=0# Greater than, less thanhttp_requests_total>1000http_requests_total<100# Greater or equal, less or equalhttp_requests_total>=500http_requests_total<=50# Filter valueshttp_requests_total>bool1000# Returns 1 or 0
# One-to-one matching (default)method:requests:rate5m{job="api"}/method:requests:total{job="api"}# Ignoring labelsmethod:requests:rate5m/ignoring(method)method:requests:total# On specific labelsmethod:requests:rate5m/on(job,instance)method:requests:total# Group left/right (many-to-one, one-to-many)method:requests:rate5m/on(job)group_left(instance)method:requests:total
Aggregation Operators
Basic Aggregations
# Sumsum(http_requests_total)# Averageavg(http_requests_total)# Minimum and maximummin(http_requests_total)max(http_requests_total)# Countcount(http_requests_total)# Standard deviationstddev(http_requests_total)stdvar(http_requests_total)# Quantilesquantile(0.95,http_request_duration_seconds)
Grouping
# Sum by labelssumby(job,instance)(http_requests_total)sum(http_requests_total)by(job,instance)# Alternative syntax# Sum without labelssumwithout(method,status)(http_requests_total)# Count unique label valuescount(countby(instance)(up))# Top Ktopk(5,http_requests_total)bottomk(3,http_requests_total)
Examples
# Total requests per second by jobsumby(job)(rate(http_requests_total[5m]))# Average CPU usage per nodeavgby(instance)(rate(node_cpu_seconds_total{mode!="idle"}[5m]))# Memory usage percentage100*(1-avgby(instance)(node_memory_MemAvailable_bytes)/avgby(instance)(node_memory_MemTotal_bytes))# 95th percentile response timehistogram_quantile(0.95,sumby(le)(rate(http_request_duration_seconds_bucket[5m])))
Functions
Rate & Increase
# Rate: per-second average increaserate(http_requests_total[5m])# Irate: instant rate (last 2 points)irate(http_requests_total[5m])# Increase: total increase over time rangeincrease(http_requests_total[1h])# Delta: difference between first and last valuedelta(cpu_temp_celsius[1h])# Idelta: difference between last 2 samplesidelta(cpu_temp_celsius[5m])
Comparison
Function
Best For
Use Case
rate()
Counters
Request rates, error rates
irate()
Volatile metrics
Short-term spikes
increase()
Total count
Total requests in period
delta()
Gauges
Temperature changes
Time Functions
# Current time (Unix timestamp)time()# Day of week (0=Sunday, 6=Saturday)day_of_week()# Day of monthday_of_month()# Hour of dayhour()# Minuteminute()# Monthmonth()# Yearyear()# Examples# Alert only during business hoursALERTS{severity="critical"}andhour()>=9andhour()<=17andday_of_week()>0andday_of_week()<6
Aggregation Over Time
# Average over timeavg_over_time(http_requests_total[5m])# Max/min over timemax_over_time(http_requests_total[1h])min_over_time(http_requests_total[1h])# Sum over timesum_over_time(http_requests_total[5m])# Count over timecount_over_time(http_requests_total[5m])# Quantile over timequantile_over_time(0.95,http_request_duration_seconds[5m])# Standard deviation over timestddev_over_time(http_requests_total[5m])# Last value (most recent)last_over_time(http_requests_total[5m])# First value (oldest)first_over_time(http_requests_total[5m])
Change Functions
# Predict value using linear regressionpredict_linear(node_filesystem_free_bytes[1h],3600*4)# 4 hours ahead# Derivative (rate of change per second)deriv(node_cpu_seconds_total[5m])# Changes: number of times value changedchanges(http_requests_total[1h])# Resets: number of counter resetsresets(http_requests_total[1h])
Math Functions
# Absolute valueabs(delta(cpu_temp_celsius[5m]))# Ceiling/floorceil(http_request_duration_seconds)floor(http_request_duration_seconds)# Roundround(http_request_duration_seconds,0.1)# Round to 0.1# Exponential and logarithmexp(http_requests_total)ln(http_requests_total)log2(http_requests_total)log10(http_requests_total)# Square rootsqrt(http_requests_total)# Clamp (limit to range)clamp(http_requests_total,0,1000)clamp_min(http_requests_total,0)clamp_max(http_requests_total,1000)
Label Functions
# Replace label valueslabel_replace(http_requests_total,"new_label","$1","instance","(.+):.*")# Join labels into new labellabel_join(http_requests_total,"endpoint","/","job","instance")
# Check if metric absentabsent(http_requests_total{job="api"})# Check if metric absent over timeabsent_over_time(http_requests_total[5m])
Histogram Functions
# Calculate quantile from histogramhistogram_quantile(0.95,sumby(le)(rate(http_request_duration_seconds_bucket[5m])))# Multiple quantileshistogram_quantile(0.50,sumby(le)(rate(http_request_duration_seconds_bucket[5m])))histogram_quantile(0.90,sumby(le)(rate(http_request_duration_seconds_bucket[5m])))histogram_quantile(0.99,sumby(le)(rate(http_request_duration_seconds_bucket[5m])))
Common Queries
CPU Metrics
# CPU usage percentage100-(avgby(instance)(rate(node_cpu_seconds_total{mode="idle"}[5m]))*100)# CPU usage by modesumby(mode)(rate(node_cpu_seconds_total[5m]))# Per-core CPU usagerate(node_cpu_seconds_total{mode!="idle"}[5m])*100# CPU load averagenode_load1node_load5node_load15
# Disk usage percentage100-((node_filesystem_avail_bytes{mountpoint="/"}/node_filesystem_size_bytes{mountpoint="/"})*100)# Disk space remainingnode_filesystem_avail_bytes{mountpoint="/"}/1024/1024/1024# GB# Disk I/O raterate(node_disk_read_bytes_total[5m])rate(node_disk_written_bytes_total[5m])# IOPSrate(node_disk_reads_completed_total[5m])rate(node_disk_writes_completed_total[5m])# Predict disk full timepredict_linear(node_filesystem_free_bytes{mountpoint="/"}[1h],3600*24*7)# 1 week
# Request ratesumby(job)(rate(http_requests_total[5m]))# Request rate by status codesumby(status)(rate(http_requests_total[5m]))# Error rate (4xx + 5xx)sum(rate(http_requests_total{status=~"4..|5.."}[5m]))/sum(rate(http_requests_total[5m]))*100# 5xx error ratesum(rate(http_requests_total{status=~"5.."}[5m]))/sum(rate(http_requests_total[5m]))*100# Average response timeavg(rate(http_request_duration_seconds_sum[5m])/rate(http_request_duration_seconds_count[5m]))# 95th percentile response timehistogram_quantile(0.95,sumby(le)(rate(http_request_duration_seconds_bucket[5m])))# Requests per second by endpointsumby(path)(rate(http_requests_total[5m]))
Service Availability
# Service uptime percentageavg_over_time(up{job="api"}[1h])*100# Number of instances upcount(up{job="api"}==1)# Number of instances downcount(up{job="api"}==0)# Alert if service downup{job="api"}==0# SLO: 99.9% availability(1-(sum(rate(http_requests_total{status=~"5.."}[30d]))/sum(rate(http_requests_total[30d]))))*100>99.9
# WRONG: rate() on gaugerate(node_memory_MemAvailable_bytes[5m])# CORRECT: Use for counters onlyrate(http_requests_total[5m])# WRONG: aggregation without by/withoutsum(rate(http_requests_total[5m]))# Loses labels# CORRECT: Specify groupingsumby(job,instance)(rate(http_requests_total[5m]))# WRONG: comparing instant vectorshttp_requests_total>http_requests_totaloffset1h# CORRECT: Use scalar or aggregationsum(http_requests_total)>sum(http_requests_totaloffset1h)