2020-07-28 23:09:34 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
require 'spec_helper'
|
|
|
|
|
|
|
|
RSpec.describe Gitlab::UsageData::Topology do
|
|
|
|
include UsageDataHelpers
|
|
|
|
|
|
|
|
describe '#topology_usage_data' do
|
2020-11-24 15:15:51 +05:30
|
|
|
subject { topology.topology_usage_data }
|
|
|
|
|
|
|
|
let(:topology) { described_class.new }
|
|
|
|
let(:prometheus_client) { Gitlab::PrometheusClient.new('http://localhost:9090') }
|
|
|
|
let(:fallback) { {} }
|
2020-07-28 23:09:34 +05:30
|
|
|
|
|
|
|
before do
|
|
|
|
# this pins down time shifts when benchmarking durations
|
|
|
|
allow(Process).to receive(:clock_gettime).and_return(0)
|
|
|
|
end
|
|
|
|
|
2020-11-24 15:15:51 +05:30
|
|
|
shared_examples 'query topology data from Prometheus' do
|
2020-07-28 23:09:34 +05:30
|
|
|
context 'tracking node metrics' do
|
|
|
|
it 'contains node level metrics for each instance' do
|
2020-11-24 15:15:51 +05:30
|
|
|
expect_prometheus_client_to(
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_app_request_volume_query,
|
2020-11-24 15:15:51 +05:30
|
|
|
receive_query_apdex_ratio_query,
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_memory_query,
|
2020-10-24 23:57:45 +05:30
|
|
|
receive_node_memory_utilization_query,
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_cpu_count_query,
|
2020-10-24 23:57:45 +05:30
|
|
|
receive_node_cpu_utilization_query,
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_uname_info_query,
|
|
|
|
receive_node_service_memory_rss_query,
|
|
|
|
receive_node_service_memory_uss_query,
|
|
|
|
receive_node_service_memory_pss_query,
|
|
|
|
receive_node_service_process_count_query,
|
|
|
|
receive_node_service_app_server_workers_query
|
|
|
|
)
|
|
|
|
|
|
|
|
expect(subject[:topology]).to eq({
|
|
|
|
duration_s: 0,
|
|
|
|
application_requests_per_hour: 36,
|
2020-11-24 15:15:51 +05:30
|
|
|
query_apdex_weekly_average: 0.996,
|
2020-07-28 23:09:34 +05:30
|
|
|
failures: [],
|
|
|
|
nodes: [
|
|
|
|
{
|
|
|
|
node_memory_total_bytes: 512,
|
2020-10-24 23:57:45 +05:30
|
|
|
node_memory_utilization: 0.45,
|
2020-07-28 23:09:34 +05:30
|
|
|
node_cpus: 8,
|
2020-10-24 23:57:45 +05:30
|
|
|
node_cpu_utilization: 0.1,
|
2020-07-28 23:09:34 +05:30
|
|
|
node_uname_info: {
|
|
|
|
machine: 'x86_64',
|
|
|
|
sysname: 'Linux',
|
|
|
|
release: '4.19.76-linuxkit'
|
|
|
|
},
|
|
|
|
node_services: [
|
|
|
|
{
|
|
|
|
name: 'web',
|
|
|
|
process_count: 10,
|
|
|
|
process_memory_rss: 300,
|
|
|
|
process_memory_uss: 301,
|
|
|
|
process_memory_pss: 302,
|
|
|
|
server: 'puma'
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'sidekiq',
|
|
|
|
process_count: 5,
|
|
|
|
process_memory_rss: 303
|
|
|
|
}
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
node_memory_total_bytes: 1024,
|
2020-10-24 23:57:45 +05:30
|
|
|
node_memory_utilization: 0.25,
|
2020-07-28 23:09:34 +05:30
|
|
|
node_cpus: 16,
|
2020-10-24 23:57:45 +05:30
|
|
|
node_cpu_utilization: 0.2,
|
2020-07-28 23:09:34 +05:30
|
|
|
node_uname_info: {
|
|
|
|
machine: 'x86_64',
|
|
|
|
sysname: 'Linux',
|
|
|
|
release: '4.15.0-101-generic'
|
|
|
|
},
|
|
|
|
node_services: [
|
|
|
|
{
|
|
|
|
name: 'sidekiq',
|
|
|
|
process_count: 15,
|
|
|
|
process_memory_rss: 400,
|
|
|
|
process_memory_pss: 401
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'redis',
|
|
|
|
process_count: 1,
|
|
|
|
process_memory_rss: 402
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'registry',
|
|
|
|
process_count: 1
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'web',
|
|
|
|
server: 'unicorn'
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
})
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context 'and some node memory metrics are missing' do
|
|
|
|
it 'removes the respective entries and includes the failures' do
|
2020-11-24 15:15:51 +05:30
|
|
|
expect_prometheus_client_to(
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_app_request_volume_query(result: []),
|
2020-11-24 15:15:51 +05:30
|
|
|
receive_query_apdex_ratio_query(result: []),
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_memory_query(result: []),
|
2020-10-24 23:57:45 +05:30
|
|
|
receive_node_memory_utilization_query(result: []),
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_cpu_count_query,
|
2020-10-24 23:57:45 +05:30
|
|
|
receive_node_cpu_utilization_query,
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_uname_info_query,
|
|
|
|
receive_node_service_memory_rss_query(result: []),
|
|
|
|
receive_node_service_memory_uss_query(result: []),
|
|
|
|
receive_node_service_memory_pss_query,
|
|
|
|
receive_node_service_process_count_query,
|
|
|
|
receive_node_service_app_server_workers_query(result: [])
|
|
|
|
)
|
|
|
|
|
|
|
|
expect(subject[:topology]).to eq({
|
|
|
|
duration_s: 0,
|
|
|
|
failures: [
|
|
|
|
{ 'app_requests' => 'empty_result' },
|
2020-11-24 15:15:51 +05:30
|
|
|
{ 'query_apdex' => 'empty_result' },
|
2020-07-28 23:09:34 +05:30
|
|
|
{ 'node_memory' => 'empty_result' },
|
2020-10-24 23:57:45 +05:30
|
|
|
{ 'node_memory_utilization' => 'empty_result' },
|
2020-07-28 23:09:34 +05:30
|
|
|
{ 'service_rss' => 'empty_result' },
|
|
|
|
{ 'service_uss' => 'empty_result' },
|
|
|
|
{ 'service_workers' => 'empty_result' }
|
|
|
|
],
|
|
|
|
nodes: [
|
|
|
|
{
|
|
|
|
node_cpus: 16,
|
2020-10-24 23:57:45 +05:30
|
|
|
node_cpu_utilization: 0.2,
|
2020-07-28 23:09:34 +05:30
|
|
|
node_uname_info: {
|
|
|
|
machine: 'x86_64',
|
|
|
|
release: '4.15.0-101-generic',
|
|
|
|
sysname: 'Linux'
|
|
|
|
},
|
|
|
|
node_services: [
|
|
|
|
{
|
|
|
|
name: 'sidekiq',
|
|
|
|
process_count: 15,
|
|
|
|
process_memory_pss: 401
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'redis',
|
|
|
|
process_count: 1
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'registry',
|
|
|
|
process_count: 1
|
|
|
|
}
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
node_cpus: 8,
|
2020-10-24 23:57:45 +05:30
|
|
|
node_cpu_utilization: 0.1,
|
2020-07-28 23:09:34 +05:30
|
|
|
node_uname_info: {
|
|
|
|
machine: 'x86_64',
|
|
|
|
release: '4.19.76-linuxkit',
|
|
|
|
sysname: 'Linux'
|
|
|
|
},
|
|
|
|
node_services: [
|
|
|
|
{
|
|
|
|
name: 'web',
|
|
|
|
process_count: 10,
|
|
|
|
process_memory_pss: 302
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'sidekiq',
|
|
|
|
process_count: 5
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
})
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context 'and services run on the same node but report different instance values' do
|
|
|
|
let(:node_memory_response) do
|
|
|
|
[
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'localhost:9100' },
|
|
|
|
'value' => [1000, '512']
|
|
|
|
}
|
|
|
|
]
|
|
|
|
end
|
2020-10-24 23:57:45 +05:30
|
|
|
|
|
|
|
let(:node_memory_utilization_response) do
|
|
|
|
[
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'localhost:9100' },
|
|
|
|
'value' => [1000, '0.35']
|
|
|
|
}
|
|
|
|
]
|
|
|
|
end
|
|
|
|
|
2020-07-28 23:09:34 +05:30
|
|
|
let(:node_uname_info_response) do
|
|
|
|
[
|
|
|
|
{
|
|
|
|
"metric" => {
|
|
|
|
"__name__" => "node_uname_info",
|
|
|
|
"domainname" => "(none)",
|
|
|
|
"instance" => "127.0.0.1:9100",
|
|
|
|
"job" => "node_exporter",
|
|
|
|
"machine" => "x86_64",
|
|
|
|
"nodename" => "127.0.0.1",
|
|
|
|
"release" => "4.19.76-linuxkit",
|
|
|
|
"sysname" => "Linux"
|
|
|
|
},
|
|
|
|
"value" => [1592463033.359, "1"]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
end
|
|
|
|
# The services in this response should all be mapped to localhost i.e. the same node
|
2020-10-24 23:57:45 +05:30
|
|
|
|
2020-07-28 23:09:34 +05:30
|
|
|
let(:service_memory_response) do
|
|
|
|
[
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'localhost:8080', 'job' => 'gitlab-rails' },
|
|
|
|
'value' => [1000, '10']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => '127.0.0.1:8090', 'job' => 'gitlab-sidekiq' },
|
|
|
|
'value' => [1000, '11']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => '0.0.0.0:9090', 'job' => 'prometheus' },
|
|
|
|
'value' => [1000, '12']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => '[::1]:1234', 'job' => 'redis' },
|
|
|
|
'value' => [1000, '13']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => '[::]:1234', 'job' => 'postgres' },
|
|
|
|
'value' => [1000, '14']
|
|
|
|
}
|
|
|
|
]
|
|
|
|
end
|
|
|
|
|
|
|
|
it 'normalizes equivalent instance values and maps them to the same node' do
|
2020-11-24 15:15:51 +05:30
|
|
|
expect_prometheus_client_to(
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_app_request_volume_query(result: []),
|
2020-11-24 15:15:51 +05:30
|
|
|
receive_query_apdex_ratio_query(result: []),
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_memory_query(result: node_memory_response),
|
2020-10-24 23:57:45 +05:30
|
|
|
receive_node_memory_utilization_query(result: node_memory_utilization_response),
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_cpu_count_query(result: []),
|
2020-10-24 23:57:45 +05:30
|
|
|
receive_node_cpu_utilization_query(result: []),
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_uname_info_query(result: node_uname_info_response),
|
|
|
|
receive_node_service_memory_rss_query(result: service_memory_response),
|
|
|
|
receive_node_service_memory_uss_query(result: []),
|
|
|
|
receive_node_service_memory_pss_query(result: []),
|
|
|
|
receive_node_service_process_count_query(result: []),
|
|
|
|
receive_node_service_app_server_workers_query(result: [])
|
|
|
|
)
|
|
|
|
|
|
|
|
expect(subject[:topology]).to eq({
|
|
|
|
duration_s: 0,
|
|
|
|
failures: [
|
|
|
|
{ 'app_requests' => 'empty_result' },
|
2020-11-24 15:15:51 +05:30
|
|
|
{ 'query_apdex' => 'empty_result' },
|
2020-07-28 23:09:34 +05:30
|
|
|
{ 'node_cpus' => 'empty_result' },
|
2020-10-24 23:57:45 +05:30
|
|
|
{ 'node_cpu_utilization' => 'empty_result' },
|
2020-07-28 23:09:34 +05:30
|
|
|
{ 'service_uss' => 'empty_result' },
|
|
|
|
{ 'service_pss' => 'empty_result' },
|
|
|
|
{ 'service_process_count' => 'empty_result' },
|
|
|
|
{ 'service_workers' => 'empty_result' }
|
|
|
|
],
|
|
|
|
nodes: [
|
|
|
|
{
|
|
|
|
node_memory_total_bytes: 512,
|
2020-10-24 23:57:45 +05:30
|
|
|
node_memory_utilization: 0.35,
|
2020-07-28 23:09:34 +05:30
|
|
|
node_uname_info: {
|
|
|
|
machine: 'x86_64',
|
|
|
|
sysname: 'Linux',
|
|
|
|
release: '4.19.76-linuxkit'
|
|
|
|
},
|
|
|
|
node_services: [
|
|
|
|
{
|
|
|
|
name: 'web',
|
|
|
|
process_memory_rss: 10
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'sidekiq',
|
|
|
|
process_memory_rss: 11
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'prometheus',
|
|
|
|
process_memory_rss: 12
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'redis',
|
|
|
|
process_memory_rss: 13
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'postgres',
|
|
|
|
process_memory_rss: 14
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
})
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context 'and node metrics are missing but service metrics exist' do
|
|
|
|
it 'still reports service metrics' do
|
2020-11-24 15:15:51 +05:30
|
|
|
expect_prometheus_client_to(
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_app_request_volume_query(result: []),
|
2020-11-24 15:15:51 +05:30
|
|
|
receive_query_apdex_ratio_query(result: []),
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_memory_query(result: []),
|
2020-10-24 23:57:45 +05:30
|
|
|
receive_node_memory_utilization_query(result: []),
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_cpu_count_query(result: []),
|
2020-10-24 23:57:45 +05:30
|
|
|
receive_node_cpu_utilization_query(result: []),
|
2020-07-28 23:09:34 +05:30
|
|
|
receive_node_uname_info_query(result: []),
|
|
|
|
receive_node_service_memory_rss_query,
|
|
|
|
receive_node_service_memory_uss_query(result: []),
|
|
|
|
receive_node_service_memory_pss_query(result: []),
|
|
|
|
receive_node_service_process_count_query(result: []),
|
|
|
|
receive_node_service_app_server_workers_query(result: [])
|
|
|
|
)
|
|
|
|
|
|
|
|
expect(subject[:topology]).to eq({
|
|
|
|
duration_s: 0,
|
|
|
|
failures: [
|
|
|
|
{ 'app_requests' => 'empty_result' },
|
2020-11-24 15:15:51 +05:30
|
|
|
{ 'query_apdex' => 'empty_result' },
|
2020-07-28 23:09:34 +05:30
|
|
|
{ 'node_memory' => 'empty_result' },
|
2020-10-24 23:57:45 +05:30
|
|
|
{ 'node_memory_utilization' => 'empty_result' },
|
2020-07-28 23:09:34 +05:30
|
|
|
{ 'node_cpus' => 'empty_result' },
|
2020-10-24 23:57:45 +05:30
|
|
|
{ 'node_cpu_utilization' => 'empty_result' },
|
2020-07-28 23:09:34 +05:30
|
|
|
{ 'node_uname_info' => 'empty_result' },
|
|
|
|
{ 'service_uss' => 'empty_result' },
|
|
|
|
{ 'service_pss' => 'empty_result' },
|
|
|
|
{ 'service_process_count' => 'empty_result' },
|
|
|
|
{ 'service_workers' => 'empty_result' }
|
|
|
|
],
|
|
|
|
nodes: [
|
|
|
|
{
|
|
|
|
node_services: [
|
|
|
|
{
|
|
|
|
name: 'web',
|
|
|
|
process_memory_rss: 300
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'sidekiq',
|
|
|
|
process_memory_rss: 303
|
|
|
|
}
|
|
|
|
]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
node_services: [
|
|
|
|
{
|
|
|
|
name: 'sidekiq',
|
|
|
|
process_memory_rss: 400
|
|
|
|
},
|
|
|
|
{
|
|
|
|
name: 'redis',
|
|
|
|
process_memory_rss: 402
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
]
|
|
|
|
})
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-10-24 23:57:45 +05:30
|
|
|
context 'and unknown services are encountered' do
|
|
|
|
let(:unknown_service_process_count_response) do
|
|
|
|
[
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:9000', 'job' => 'unknown-service-A' },
|
|
|
|
'value' => [1000, '42']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:9001', 'job' => 'unknown-service-B' },
|
|
|
|
'value' => [1000, '42']
|
|
|
|
}
|
|
|
|
]
|
|
|
|
end
|
|
|
|
|
|
|
|
it 'filters out unknown service data and reports the unknown services as a failure' do
|
2020-11-24 15:15:51 +05:30
|
|
|
expect_prometheus_client_to(
|
2020-10-24 23:57:45 +05:30
|
|
|
receive_app_request_volume_query(result: []),
|
2020-11-24 15:15:51 +05:30
|
|
|
receive_query_apdex_ratio_query(result: []),
|
2020-10-24 23:57:45 +05:30
|
|
|
receive_node_memory_query(result: []),
|
|
|
|
receive_node_memory_utilization_query(result: []),
|
|
|
|
receive_node_cpu_count_query(result: []),
|
|
|
|
receive_node_cpu_utilization_query(result: []),
|
|
|
|
receive_node_uname_info_query(result: []),
|
|
|
|
receive_node_service_memory_rss_query(result: []),
|
|
|
|
receive_node_service_memory_uss_query(result: []),
|
|
|
|
receive_node_service_memory_pss_query(result: []),
|
|
|
|
receive_node_service_process_count_query(result: unknown_service_process_count_response),
|
|
|
|
receive_node_service_app_server_workers_query(result: [])
|
|
|
|
)
|
|
|
|
|
|
|
|
expect(subject.dig(:topology, :failures)).to include(
|
|
|
|
{ 'service_unknown' => 'unknown-service-A' },
|
|
|
|
{ 'service_unknown' => 'unknown-service-B' }
|
|
|
|
)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-07-28 23:09:34 +05:30
|
|
|
context 'and an error is raised when querying Prometheus' do
|
2020-10-24 23:57:45 +05:30
|
|
|
context 'without timeout failures' do
|
|
|
|
it 'returns empty result and executes subsequent queries as usual' do
|
2020-11-24 15:15:51 +05:30
|
|
|
expect_prometheus_client_to(
|
|
|
|
receive(:query).at_least(:once).and_raise(Gitlab::PrometheusClient::UnexpectedResponseError)
|
|
|
|
)
|
2020-07-28 23:09:34 +05:30
|
|
|
|
2020-10-24 23:57:45 +05:30
|
|
|
expect(subject[:topology]).to eq({
|
|
|
|
duration_s: 0,
|
|
|
|
failures: [
|
2020-11-24 15:15:51 +05:30
|
|
|
{ 'app_requests' => 'Gitlab::PrometheusClient::UnexpectedResponseError' },
|
|
|
|
{ 'query_apdex' => 'Gitlab::PrometheusClient::UnexpectedResponseError' },
|
|
|
|
{ 'node_memory' => 'Gitlab::PrometheusClient::UnexpectedResponseError' },
|
|
|
|
{ 'node_memory_utilization' => 'Gitlab::PrometheusClient::UnexpectedResponseError' },
|
|
|
|
{ 'node_cpus' => 'Gitlab::PrometheusClient::UnexpectedResponseError' },
|
|
|
|
{ 'node_cpu_utilization' => 'Gitlab::PrometheusClient::UnexpectedResponseError' },
|
|
|
|
{ 'node_uname_info' => 'Gitlab::PrometheusClient::UnexpectedResponseError' },
|
|
|
|
{ 'service_rss' => 'Gitlab::PrometheusClient::UnexpectedResponseError' },
|
|
|
|
{ 'service_uss' => 'Gitlab::PrometheusClient::UnexpectedResponseError' },
|
|
|
|
{ 'service_pss' => 'Gitlab::PrometheusClient::UnexpectedResponseError' },
|
|
|
|
{ 'service_process_count' => 'Gitlab::PrometheusClient::UnexpectedResponseError' },
|
|
|
|
{ 'service_workers' => 'Gitlab::PrometheusClient::UnexpectedResponseError' }
|
2020-10-24 23:57:45 +05:30
|
|
|
],
|
|
|
|
nodes: []
|
|
|
|
})
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
context 'with timeout failures' do
|
|
|
|
where(:exception) do
|
|
|
|
described_class::TIMEOUT_ERRORS
|
|
|
|
end
|
|
|
|
|
|
|
|
with_them do
|
|
|
|
it 'returns empty result and cancelled subsequent queries' do
|
2020-11-24 15:15:51 +05:30
|
|
|
expect_prometheus_client_to(
|
|
|
|
receive(:query).and_raise(exception)
|
|
|
|
)
|
2020-10-24 23:57:45 +05:30
|
|
|
|
|
|
|
expect(subject[:topology]).to eq({
|
|
|
|
duration_s: 0,
|
|
|
|
failures: [
|
|
|
|
{ 'app_requests' => exception.to_s },
|
2020-11-24 15:15:51 +05:30
|
|
|
{ 'query_apdex' => 'timeout_cancellation' },
|
2020-10-24 23:57:45 +05:30
|
|
|
{ 'node_memory' => 'timeout_cancellation' },
|
|
|
|
{ 'node_memory_utilization' => 'timeout_cancellation' },
|
|
|
|
{ 'node_cpus' => 'timeout_cancellation' },
|
|
|
|
{ 'node_cpu_utilization' => 'timeout_cancellation' },
|
|
|
|
{ 'node_uname_info' => 'timeout_cancellation' },
|
|
|
|
{ 'service_rss' => 'timeout_cancellation' },
|
|
|
|
{ 'service_uss' => 'timeout_cancellation' },
|
|
|
|
{ 'service_pss' => 'timeout_cancellation' },
|
|
|
|
{ 'service_process_count' => 'timeout_cancellation' },
|
|
|
|
{ 'service_workers' => 'timeout_cancellation' }
|
|
|
|
],
|
|
|
|
nodes: []
|
|
|
|
})
|
|
|
|
end
|
|
|
|
end
|
2020-07-28 23:09:34 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-11-24 15:15:51 +05:30
|
|
|
shared_examples 'returns empty result with no failures' do
|
|
|
|
it do
|
2020-07-28 23:09:34 +05:30
|
|
|
expect(subject[:topology]).to eq({
|
|
|
|
duration_s: 0,
|
|
|
|
failures: []
|
|
|
|
})
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-11-24 15:15:51 +05:30
|
|
|
context 'can reach a ready Prometheus client' do
|
|
|
|
before do
|
|
|
|
expect(topology).to receive(:with_prometheus_client).and_yield(prometheus_client)
|
|
|
|
end
|
|
|
|
|
|
|
|
it_behaves_like 'query topology data from Prometheus'
|
|
|
|
end
|
|
|
|
|
|
|
|
context 'can not reach a ready Prometheus client' do
|
|
|
|
before do
|
|
|
|
expect(topology).to receive(:with_prometheus_client).and_return(fallback)
|
|
|
|
end
|
|
|
|
|
|
|
|
it_behaves_like 'returns empty result with no failures'
|
|
|
|
end
|
|
|
|
|
2020-07-28 23:09:34 +05:30
|
|
|
context 'when top-level function raises error' do
|
|
|
|
it 'returns empty result with generic failure' do
|
2020-11-24 15:15:51 +05:30
|
|
|
expect(topology).to receive(:with_prometheus_client).and_raise(RuntimeError)
|
2020-07-28 23:09:34 +05:30
|
|
|
|
|
|
|
expect(subject[:topology]).to eq({
|
|
|
|
duration_s: 0,
|
|
|
|
failures: [
|
|
|
|
{ 'other' => 'RuntimeError' }
|
|
|
|
]
|
|
|
|
})
|
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-11-24 15:15:51 +05:30
|
|
|
def receive_ready_check_query(result: nil, raise_error: nil)
|
|
|
|
if raise_error.nil?
|
|
|
|
receive(:ready?).and_return(result.nil? ? true : result)
|
|
|
|
else
|
|
|
|
receive(:ready?).and_raise(raise_error)
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2020-07-28 23:09:34 +05:30
|
|
|
def receive_app_request_volume_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with(/gitlab_usage_ping:ops:rate/)
|
|
|
|
.and_return(result || [
|
|
|
|
{
|
|
|
|
'metric' => { 'component' => 'http_requests', 'service' => 'workhorse' },
|
|
|
|
'value' => [1000, '0.01']
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
|
2020-11-24 15:15:51 +05:30
|
|
|
def receive_query_apdex_ratio_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with(/gitlab_usage_ping:sql_duration_apdex:ratio_rate5m/)
|
|
|
|
.and_return(result || [
|
|
|
|
{
|
|
|
|
'metric' => {},
|
|
|
|
'value' => [1000, '0.996']
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
|
2020-07-28 23:09:34 +05:30
|
|
|
def receive_node_memory_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with(/node_memory_total_bytes/, an_instance_of(Hash))
|
|
|
|
.and_return(result || [
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance1:8080' },
|
|
|
|
'value' => [1000, '512']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:8090' },
|
|
|
|
'value' => [1000, '1024']
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
|
2020-10-24 23:57:45 +05:30
|
|
|
def receive_node_memory_utilization_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with(/node_memory_utilization/, an_instance_of(Hash))
|
|
|
|
.and_return(result || [
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance1:8080' },
|
|
|
|
'value' => [1000, '0.45']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:8090' },
|
|
|
|
'value' => [1000, '0.25']
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
|
2020-07-28 23:09:34 +05:30
|
|
|
def receive_node_cpu_count_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with(/node_cpus/, an_instance_of(Hash))
|
|
|
|
.and_return(result || [
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:8090' },
|
|
|
|
'value' => [1000, '16']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance1:8080' },
|
|
|
|
'value' => [1000, '8']
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
|
2020-10-24 23:57:45 +05:30
|
|
|
def receive_node_cpu_utilization_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with(/node_cpu_utilization/, an_instance_of(Hash))
|
|
|
|
.and_return(result || [
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:8090' },
|
|
|
|
'value' => [1000, '0.2']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance1:8080' },
|
|
|
|
'value' => [1000, '0.1']
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
|
2020-07-28 23:09:34 +05:30
|
|
|
def receive_node_uname_info_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with('node_uname_info')
|
|
|
|
.and_return(result || [
|
|
|
|
{
|
|
|
|
"metric" => {
|
|
|
|
"__name__" => "node_uname_info",
|
|
|
|
"domainname" => "(none)",
|
|
|
|
"instance" => "instance1:9100",
|
|
|
|
"job" => "node_exporter",
|
|
|
|
"machine" => "x86_64",
|
|
|
|
"nodename" => "instance1",
|
|
|
|
"release" => "4.19.76-linuxkit",
|
|
|
|
"sysname" => "Linux"
|
|
|
|
},
|
|
|
|
"value" => [1592463033.359, "1"]
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"metric" => {
|
|
|
|
"__name__" => "node_uname_info",
|
|
|
|
"domainname" => "(none)",
|
|
|
|
"instance" => "instance2:9100",
|
|
|
|
"job" => "node_exporter",
|
|
|
|
"machine" => "x86_64",
|
|
|
|
"nodename" => "instance2",
|
|
|
|
"release" => "4.15.0-101-generic",
|
|
|
|
"sysname" => "Linux"
|
|
|
|
},
|
|
|
|
"value" => [1592463033.359, "1"]
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
|
|
|
|
def receive_node_service_memory_rss_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with(/process_resident_memory_bytes/, an_instance_of(Hash))
|
|
|
|
.and_return(result || [
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails' },
|
|
|
|
'value' => [1000, '300']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance1:8090', 'job' => 'gitlab-sidekiq' },
|
|
|
|
'value' => [1000, '303']
|
|
|
|
},
|
|
|
|
# instance 2: runs a dedicated Sidekiq + Redis (which uses a different metric name)
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq' },
|
|
|
|
'value' => [1000, '400']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:9121', 'job' => 'redis' },
|
|
|
|
'value' => [1000, '402']
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
|
|
|
|
def receive_node_service_memory_uss_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with(/process_unique_memory_bytes/, an_instance_of(Hash))
|
|
|
|
.and_return(result || [
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails' },
|
|
|
|
'value' => [1000, '301']
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
|
|
|
|
def receive_node_service_memory_pss_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with(/process_proportional_memory_bytes/, an_instance_of(Hash))
|
|
|
|
.and_return(result || [
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails' },
|
|
|
|
'value' => [1000, '302']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq' },
|
|
|
|
'value' => [1000, '401']
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
|
|
|
|
def receive_node_service_process_count_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with(/service_process:count/, an_instance_of(Hash))
|
|
|
|
.and_return(result || [
|
|
|
|
# instance 1
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails' },
|
|
|
|
'value' => [1000, '10']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance1:8090', 'job' => 'gitlab-sidekiq' },
|
|
|
|
'value' => [1000, '5']
|
|
|
|
},
|
|
|
|
# instance 2
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:8090', 'job' => 'gitlab-sidekiq' },
|
|
|
|
'value' => [1000, '15']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:9121', 'job' => 'redis' },
|
|
|
|
'value' => [1000, '1']
|
|
|
|
},
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:8080', 'job' => 'registry' },
|
|
|
|
'value' => [1000, '1']
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
|
|
|
|
def receive_node_service_app_server_workers_query(result: nil)
|
|
|
|
receive(:query)
|
|
|
|
.with(/app_server_workers/, an_instance_of(Hash))
|
|
|
|
.and_return(result || [
|
|
|
|
# instance 1
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance1:8080', 'job' => 'gitlab-rails', 'server' => 'puma' },
|
|
|
|
'value' => [1000, '2']
|
|
|
|
},
|
|
|
|
# instance 2
|
|
|
|
{
|
|
|
|
'metric' => { 'instance' => 'instance2:8080', 'job' => 'gitlab-rails', 'server' => 'unicorn' },
|
|
|
|
'value' => [1000, '1']
|
|
|
|
}
|
|
|
|
])
|
|
|
|
end
|
|
|
|
end
|