Add archive metrics reporting

This commit is contained in:
Eugene Burmakin 2026-01-09 00:27:32 +01:00
parent 6b60eff90d
commit f74828cc6b
16 changed files with 744 additions and 4 deletions

View file

@ -0,0 +1,22 @@
# frozen_string_literal: true
class Metrics::Archives::CompressionRatio
def initialize(original_size:, compressed_size:)
@ratio = compressed_size.to_f / original_size.to_f
end
def call
return unless DawarichSettings.prometheus_exporter_enabled?
metric_data = {
type: 'histogram',
name: 'dawarich_archive_compression_ratio',
value: @ratio,
buckets: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
}
PrometheusExporter::Client.default.send_json(metric_data)
rescue StandardError => e
Rails.logger.error("Failed to send compression ratio metric: #{e.message}")
end
end

View file

@ -0,0 +1,42 @@
# frozen_string_literal: true
class Metrics::Archives::CountMismatch
def initialize(user_id:, year:, month:, expected:, actual:)
@user_id = user_id
@year = year
@month = month
@expected = expected
@actual = actual
end
def call
return unless DawarichSettings.prometheus_exporter_enabled?
# Counter for critical errors
counter_data = {
type: 'counter',
name: 'dawarich_archive_count_mismatches_total',
value: 1,
labels: {
year: @year.to_s,
month: @month.to_s
}
}
PrometheusExporter::Client.default.send_json(counter_data)
# Gauge showing the difference
gauge_data = {
type: 'gauge',
name: 'dawarich_archive_count_difference',
value: (@expected - @actual).abs,
labels: {
user_id: @user_id.to_s
}
}
PrometheusExporter::Client.default.send_json(gauge_data)
rescue StandardError => e
Rails.logger.error("Failed to send count mismatch metric: #{e.message}")
end
end

View file

@ -0,0 +1,30 @@
# frozen_string_literal: true
class Metrics::Archives::Operation
OPERATIONS = %w[archive verify clear restore].freeze
def initialize(operation:, status:, user_id: nil, points_count: 0)
@operation = operation
@status = status # 'success' or 'failure'
@user_id = user_id
@points_count = points_count
end
def call
return unless DawarichSettings.prometheus_exporter_enabled?
metric_data = {
type: 'counter',
name: 'dawarich_archive_operations_total',
value: 1,
labels: {
operation: @operation,
status: @status
}
}
PrometheusExporter::Client.default.send_json(metric_data)
rescue StandardError => e
Rails.logger.error("Failed to send archive operation metric: #{e.message}")
end
end

View file

@ -0,0 +1,25 @@
# frozen_string_literal: true
class Metrics::Archives::PointsArchived
def initialize(count:, operation:)
@count = count
@operation = operation # 'added' or 'removed'
end
def call
return unless DawarichSettings.prometheus_exporter_enabled?
metric_data = {
type: 'counter',
name: 'dawarich_archive_points_total',
value: @count,
labels: {
operation: @operation
}
}
PrometheusExporter::Client.default.send_json(metric_data)
rescue StandardError => e
Rails.logger.error("Failed to send points archived metric: #{e.message}")
end
end

View file

@ -0,0 +1,29 @@
# frozen_string_literal: true
class Metrics::Archives::Size
def initialize(size_bytes:)
@size_bytes = size_bytes
end
def call
return unless DawarichSettings.prometheus_exporter_enabled?
metric_data = {
type: 'histogram',
name: 'dawarich_archive_size_bytes',
value: @size_bytes,
buckets: [
1_000_000, # 1 MB
10_000_000, # 10 MB
50_000_000, # 50 MB
100_000_000, # 100 MB
500_000_000, # 500 MB
1_000_000_000 # 1 GB
]
}
PrometheusExporter::Client.default.send_json(metric_data)
rescue StandardError => e
Rails.logger.error("Failed to send archive size metric: #{e.message}")
end
end

View file

@ -0,0 +1,42 @@
# frozen_string_literal: true
class Metrics::Archives::Verification
def initialize(duration_seconds:, status:, check_name: nil)
@duration_seconds = duration_seconds
@status = status
@check_name = check_name
end
def call
return unless DawarichSettings.prometheus_exporter_enabled?
# Duration histogram
histogram_data = {
type: 'histogram',
name: 'dawarich_archive_verification_duration_seconds',
value: @duration_seconds,
labels: {
status: @status
},
buckets: [0.1, 0.5, 1, 2, 5, 10, 30, 60]
}
PrometheusExporter::Client.default.send_json(histogram_data)
# Failed check counter (if failure)
if @status == 'failure' && @check_name
counter_data = {
type: 'counter',
name: 'dawarich_archive_verification_failures_total',
value: 1,
labels: {
check: @check_name # e.g., 'count_mismatch', 'checksum_mismatch'
}
}
PrometheusExporter::Client.default.send_json(counter_data)
end
rescue StandardError => e
Rails.logger.error("Failed to send verification metric: #{e.message}")
end
end

View file

@ -79,6 +79,14 @@ module Points
lock_acquired = ActiveRecord::Base.with_advisory_lock(lock_key, timeout_seconds: 0) do
archive_month(user_id, year, month)
@stats[:processed] += 1
# Report successful archive operation
Metrics::Archives::Operation.new(
operation: 'archive',
status: 'success',
user_id: user_id
).call
true
end
@ -87,6 +95,13 @@ module Points
ExceptionReporter.call(e, "Failed to archive points for user #{user_id}, #{year}-#{month}")
@stats[:failed] += 1
# Report failed archive operation
Metrics::Archives::Operation.new(
operation: 'archive',
status: 'failure',
user_id: user_id
).call
end
def archive_month(user_id, year, month)
@ -109,6 +124,12 @@ module Points
mark_points_as_archived(point_ids, archive.id)
update_stats(point_ids.count)
log_archival_success(archive)
# Report points archived
Metrics::Archives::PointsArchived.new(
count: point_ids.count,
operation: 'added'
).call
end
def find_archivable_points(user_id, year, month)
@ -161,6 +182,15 @@ module Points
# Validate count: critical data integrity check
expected_count = point_ids.count
if actual_count != expected_count
# Report count mismatch to metrics
Metrics::Archives::CountMismatch.new(
user_id: user_id,
year: year,
month: month,
expected: expected_count,
actual: actual_count
).call
error_msg = "Archive count mismatch for user #{user_id} #{year}-#{format('%02d', month)}: " \
"expected #{expected_count} points, but only #{actual_count} were compressed"
Rails.logger.error(error_msg)
@ -198,6 +228,21 @@ module Points
key: "raw_data_archives/#{user_id}/#{year}/#{format('%02d', month)}/#{format('%03d', chunk_number)}.jsonl.gz"
)
# Report archive size
if archive.file.attached?
Metrics::Archives::Size.new(
size_bytes: archive.file.blob.byte_size
).call
# Report compression ratio (estimate original size from JSON)
# Rough estimate: each point as JSON ~100-200 bytes
estimated_original_size = actual_count * 150
Metrics::Archives::CompressionRatio.new(
original_size: estimated_original_size,
compressed_size: archive.file.blob.byte_size
).call
end
archive
end
@ -208,9 +253,11 @@ module Points
def verify_archive_immediately(archive, expected_point_ids)
# Lightweight verification immediately after archiving
# Ensures archive is valid before marking points as archived
start_time = Time.current
# 1. Verify file is attached
unless archive.file.attached?
report_verification_metric(start_time, 'failure', 'file_not_attached')
return { success: false, error: 'File not attached' }
end
@ -218,11 +265,13 @@ module Points
begin
compressed_content = archive.file.blob.download
rescue StandardError => e
report_verification_metric(start_time, 'failure', 'download_failed')
return { success: false, error: "File download failed: #{e.message}" }
end
# 3. Verify file size is reasonable
if compressed_content.bytesize.zero?
report_verification_metric(start_time, 'failure', 'empty_file')
return { success: false, error: 'File is empty' }
end
@ -239,11 +288,13 @@ module Points
gz.close
rescue StandardError => e
report_verification_metric(start_time, 'failure', 'decompression_failed')
return { success: false, error: "Decompression/parsing failed: #{e.message}" }
end
# 5. Verify point count matches
if archived_point_ids.count != expected_point_ids.count
report_verification_metric(start_time, 'failure', 'count_mismatch')
return {
success: false,
error: "Point count mismatch in archive: expected #{expected_point_ids.count}, found #{archived_point_ids.count}"
@ -254,12 +305,24 @@ module Points
archived_checksum = calculate_checksum(archived_point_ids)
expected_checksum = calculate_checksum(expected_point_ids)
if archived_checksum != expected_checksum
report_verification_metric(start_time, 'failure', 'checksum_mismatch')
return { success: false, error: 'Point IDs checksum mismatch in archive' }
end
Rails.logger.info("✓ Immediate verification passed for archive #{archive.id}")
report_verification_metric(start_time, 'success')
{ success: true }
end
def report_verification_metric(start_time, status, check_name = nil)
duration = Time.current - start_time
Metrics::Archives::Verification.new(
duration_seconds: duration,
status: status,
check_name: check_name
).call
end
end
end
end

View file

@ -74,9 +74,30 @@ module Points
cleared_count = clear_points_in_batches(point_ids)
@stats[:cleared] += cleared_count
Rails.logger.info("✓ Cleared #{cleared_count} points for archive #{archive.id}")
# Report successful clear operation
Metrics::Archives::Operation.new(
operation: 'clear',
status: 'success',
user_id: archive.user_id,
points_count: cleared_count
).call
# Report points removed (cleared from database)
Metrics::Archives::PointsArchived.new(
count: cleared_count,
operation: 'removed'
).call
rescue StandardError => e
ExceptionReporter.call(e, "Failed to clear points for archive #{archive.id}")
Rails.logger.error("✗ Failed to clear archive #{archive.id}: #{e.message}")
# Report failed clear operation
Metrics::Archives::Operation.new(
operation: 'clear',
status: 'failure',
user_id: archive.user_id
).call
end
def clear_points_in_batches(point_ids)

View file

@ -9,12 +9,38 @@ module Points
raise "No archives found for user #{user_id}, #{year}-#{month}" if archives.empty?
Rails.logger.info("Restoring #{archives.count} archives to database...")
total_points = archives.sum(:point_count)
Point.transaction do
archives.each { restore_archive_to_db(_1) }
begin
Point.transaction do
archives.each { restore_archive_to_db(_1) }
end
Rails.logger.info("✓ Restored #{total_points} points")
# Report successful restore operation
Metrics::Archives::Operation.new(
operation: 'restore',
status: 'success',
user_id: user_id,
points_count: total_points
).call
# Report points restored (removed from archived state)
Metrics::Archives::PointsArchived.new(
count: total_points,
operation: 'removed'
).call
rescue StandardError => e
# Report failed restore operation
Metrics::Archives::Operation.new(
operation: 'restore',
status: 'failure',
user_id: user_id
).call
raise
end
Rails.logger.info("✓ Restored #{archives.sum(:point_count)} points")
end
def restore_to_memory(user_id, year, month)

View file

@ -40,6 +40,7 @@ module Points
def verify_archive(archive)
Rails.logger.info("Verifying archive #{archive.id} (#{archive.month_display}, chunk #{archive.chunk_number})...")
start_time = Time.current
verification_result = perform_verification(archive)
@ -47,6 +48,16 @@ module Points
archive.update!(verified_at: Time.current)
@stats[:verified] += 1
Rails.logger.info("✓ Archive #{archive.id} verified successfully")
# Report successful verification operation
Metrics::Archives::Operation.new(
operation: 'verify',
status: 'success',
user_id: archive.user_id
).call
# Report verification duration
report_verification_metric(start_time, 'success')
else
@stats[:failed] += 1
Rails.logger.error("✗ Archive #{archive.id} verification failed: #{verification_result[:error]}")
@ -54,11 +65,32 @@ module Points
StandardError.new(verification_result[:error]),
"Archive verification failed for archive #{archive.id}"
)
# Report failed verification operation
Metrics::Archives::Operation.new(
operation: 'verify',
status: 'failure',
user_id: archive.user_id
).call
# Report verification duration with check name
check_name = extract_check_name_from_error(verification_result[:error])
report_verification_metric(start_time, 'failure', check_name)
end
rescue StandardError => e
@stats[:failed] += 1
ExceptionReporter.call(e, "Failed to verify archive #{archive.id}")
Rails.logger.error("✗ Archive #{archive.id} verification error: #{e.message}")
# Report failed verification operation
Metrics::Archives::Operation.new(
operation: 'verify',
status: 'failure',
user_id: archive.user_id
).call
# Report verification duration
report_verification_metric(start_time, 'failure', 'exception')
end
def perform_verification(archive)
@ -194,6 +226,39 @@ module Points
def calculate_checksum(point_ids)
Digest::SHA256.hexdigest(point_ids.sort.join(','))
end
def report_verification_metric(start_time, status, check_name = nil)
duration = Time.current - start_time
Metrics::Archives::Verification.new(
duration_seconds: duration,
status: status,
check_name: check_name
).call
end
def extract_check_name_from_error(error_message)
case error_message
when /File not attached/i
'file_not_attached'
when /File download failed/i
'download_failed'
when /File is empty/i
'empty_file'
when /MD5 checksum mismatch/i
'md5_checksum_mismatch'
when /Decompression\/parsing failed/i
'decompression_failed'
when /Point count mismatch/i
'count_mismatch'
when /Point IDs checksum mismatch/i
'checksum_mismatch'
when /Raw data mismatch/i
'raw_data_mismatch'
else
'unknown'
end
end
end
end
end

View file

@ -0,0 +1,51 @@
# frozen_string_literal: true
require 'rails_helper'
require 'prometheus_exporter/client'
RSpec.describe Metrics::Archives::CompressionRatio do
describe '#call' do
subject(:compression_ratio) do
described_class.new(
original_size: original_size,
compressed_size: compressed_size
).call
end
let(:original_size) { 10_000 }
let(:compressed_size) { 3_000 }
let(:expected_ratio) { 0.3 }
let(:prometheus_client) { instance_double(PrometheusExporter::Client) }
before do
allow(PrometheusExporter::Client).to receive(:default).and_return(prometheus_client)
allow(prometheus_client).to receive(:send_json)
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(true)
end
it 'sends compression ratio histogram metric to prometheus' do
expect(prometheus_client).to receive(:send_json).with(
{
type: 'histogram',
name: 'dawarich_archive_compression_ratio',
value: expected_ratio,
buckets: [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]
}
)
compression_ratio
end
context 'when prometheus exporter is disabled' do
before do
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(false)
end
it 'does not send metric' do
expect(prometheus_client).not_to receive(:send_json)
compression_ratio
end
end
end
end

View file

@ -0,0 +1,74 @@
# frozen_string_literal: true
require 'rails_helper'
require 'prometheus_exporter/client'
RSpec.describe Metrics::Archives::CountMismatch do
describe '#call' do
subject(:count_mismatch) do
described_class.new(
user_id: user_id,
year: year,
month: month,
expected: expected,
actual: actual
).call
end
let(:user_id) { 123 }
let(:year) { 2025 }
let(:month) { 1 }
let(:expected) { 100 }
let(:actual) { 95 }
let(:prometheus_client) { instance_double(PrometheusExporter::Client) }
before do
allow(PrometheusExporter::Client).to receive(:default).and_return(prometheus_client)
allow(prometheus_client).to receive(:send_json)
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(true)
end
it 'sends count mismatch counter metric' do
expect(prometheus_client).to receive(:send_json).with(
{
type: 'counter',
name: 'dawarich_archive_count_mismatches_total',
value: 1,
labels: {
year: year.to_s,
month: month.to_s
}
}
)
count_mismatch
end
it 'sends count difference gauge metric' do
expect(prometheus_client).to receive(:send_json).with(
{
type: 'gauge',
name: 'dawarich_archive_count_difference',
value: 5,
labels: {
user_id: user_id.to_s
}
}
)
count_mismatch
end
context 'when prometheus exporter is disabled' do
before do
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(false)
end
it 'does not send metrics' do
expect(prometheus_client).not_to receive(:send_json)
count_mismatch
end
end
end
end

View file

@ -0,0 +1,63 @@
# frozen_string_literal: true
require 'rails_helper'
require 'prometheus_exporter/client'
RSpec.describe Metrics::Archives::Operation do
describe '#call' do
subject(:operation) { described_class.new(operation: operation_type, status: status, user_id: user_id).call }
let(:user_id) { 123 }
let(:operation_type) { 'archive' }
let(:status) { 'success' }
let(:prometheus_client) { instance_double(PrometheusExporter::Client) }
before do
allow(PrometheusExporter::Client).to receive(:default).and_return(prometheus_client)
allow(prometheus_client).to receive(:send_json)
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(true)
end
it 'sends operation metric to prometheus' do
expect(prometheus_client).to receive(:send_json).with(
{
type: 'counter',
name: 'dawarich_archive_operations_total',
value: 1,
labels: {
operation: operation_type,
status: status
}
}
)
operation
end
context 'when prometheus exporter is disabled' do
before do
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(false)
end
it 'does not send metric' do
expect(prometheus_client).not_to receive(:send_json)
operation
end
end
context 'when operation fails' do
let(:status) { 'failure' }
it 'sends failure metric' do
expect(prometheus_client).to receive(:send_json).with(
hash_including(
labels: hash_including(status: 'failure')
)
)
operation
end
end
end
end

View file

@ -0,0 +1,61 @@
# frozen_string_literal: true
require 'rails_helper'
require 'prometheus_exporter/client'
RSpec.describe Metrics::Archives::PointsArchived do
describe '#call' do
subject(:points_archived) { described_class.new(count: count, operation: operation).call }
let(:count) { 250 }
let(:operation) { 'added' }
let(:prometheus_client) { instance_double(PrometheusExporter::Client) }
before do
allow(PrometheusExporter::Client).to receive(:default).and_return(prometheus_client)
allow(prometheus_client).to receive(:send_json)
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(true)
end
it 'sends points archived metric to prometheus' do
expect(prometheus_client).to receive(:send_json).with(
{
type: 'counter',
name: 'dawarich_archive_points_total',
value: count,
labels: {
operation: operation
}
}
)
points_archived
end
context 'when operation is removed' do
let(:operation) { 'removed' }
it 'sends removed operation metric' do
expect(prometheus_client).to receive(:send_json).with(
hash_including(
labels: { operation: 'removed' }
)
)
points_archived
end
end
context 'when prometheus exporter is disabled' do
before do
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(false)
end
it 'does not send metric' do
expect(prometheus_client).not_to receive(:send_json)
points_archived
end
end
end
end

View file

@ -0,0 +1,51 @@
# frozen_string_literal: true
require 'rails_helper'
require 'prometheus_exporter/client'
RSpec.describe Metrics::Archives::Size do
describe '#call' do
subject(:size) { described_class.new(size_bytes: size_bytes).call }
let(:size_bytes) { 5_000_000 }
let(:prometheus_client) { instance_double(PrometheusExporter::Client) }
before do
allow(PrometheusExporter::Client).to receive(:default).and_return(prometheus_client)
allow(prometheus_client).to receive(:send_json)
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(true)
end
it 'sends archive size histogram metric to prometheus' do
expect(prometheus_client).to receive(:send_json).with(
{
type: 'histogram',
name: 'dawarich_archive_size_bytes',
value: size_bytes,
buckets: [
1_000_000,
10_000_000,
50_000_000,
100_000_000,
500_000_000,
1_000_000_000
]
}
)
size
end
context 'when prometheus exporter is disabled' do
before do
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(false)
end
it 'does not send metric' do
expect(prometheus_client).not_to receive(:send_json)
size
end
end
end
end

View file

@ -0,0 +1,75 @@
# frozen_string_literal: true
require 'rails_helper'
require 'prometheus_exporter/client'
RSpec.describe Metrics::Archives::Verification do
describe '#call' do
subject(:verification) do
described_class.new(
duration_seconds: duration_seconds,
status: status,
check_name: check_name
).call
end
let(:duration_seconds) { 2.5 }
let(:status) { 'success' }
let(:check_name) { nil }
let(:prometheus_client) { instance_double(PrometheusExporter::Client) }
before do
allow(PrometheusExporter::Client).to receive(:default).and_return(prometheus_client)
allow(prometheus_client).to receive(:send_json)
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(true)
end
it 'sends verification duration histogram metric' do
expect(prometheus_client).to receive(:send_json).with(
{
type: 'histogram',
name: 'dawarich_archive_verification_duration_seconds',
value: duration_seconds,
labels: {
status: status
},
buckets: [0.1, 0.5, 1, 2, 5, 10, 30, 60]
}
)
verification
end
context 'when verification fails with check name' do
let(:status) { 'failure' }
let(:check_name) { 'count_mismatch' }
it 'sends verification failure counter metric' do
expect(prometheus_client).to receive(:send_json).with(
hash_including(
type: 'counter',
name: 'dawarich_archive_verification_failures_total',
value: 1,
labels: {
check: check_name
}
)
)
verification
end
end
context 'when prometheus exporter is disabled' do
before do
allow(DawarichSettings).to receive(:prometheus_exporter_enabled?).and_return(false)
end
it 'does not send metrics' do
expect(prometheus_client).not_to receive(:send_json)
verification
end
end
end
end