mirror of
https://github.com/Freika/dawarich.git
synced 2026-01-11 09:41:40 -05:00
Add actual verification of raw data archives after creation, and only clear raw_data for verified archives.
This commit is contained in:
parent
393619051a
commit
9fb4f908ad
2 changed files with 63 additions and 6 deletions
|
|
@ -87,13 +87,15 @@ module Points
|
|||
end
|
||||
end
|
||||
|
||||
# 5. Verify file can be decompressed and is valid JSONL
|
||||
# 5. Verify file can be decompressed and is valid JSONL, extract data
|
||||
begin
|
||||
point_ids = decompress_and_extract_point_ids(compressed_content)
|
||||
archived_data = decompress_and_extract_data(compressed_content)
|
||||
rescue StandardError => e
|
||||
return { success: false, error: "Decompression/parsing failed: #{e.message}" }
|
||||
end
|
||||
|
||||
point_ids = archived_data.keys
|
||||
|
||||
# 6. Verify point count matches
|
||||
if point_ids.count != archive.point_count
|
||||
return {
|
||||
|
|
@ -117,21 +119,58 @@ module Points
|
|||
}
|
||||
end
|
||||
|
||||
# 9. Verify archived raw_data matches current database raw_data
|
||||
verification_result = verify_raw_data_matches(archived_data)
|
||||
return verification_result unless verification_result[:success]
|
||||
|
||||
{ success: true }
|
||||
end
|
||||
|
||||
def decompress_and_extract_point_ids(compressed_content)
|
||||
def decompress_and_extract_data(compressed_content)
|
||||
io = StringIO.new(compressed_content)
|
||||
gz = Zlib::GzipReader.new(io)
|
||||
point_ids = []
|
||||
archived_data = {}
|
||||
|
||||
gz.each_line do |line|
|
||||
data = JSON.parse(line)
|
||||
point_ids << data['id']
|
||||
archived_data[data['id']] = data['raw_data']
|
||||
end
|
||||
|
||||
gz.close
|
||||
point_ids
|
||||
archived_data
|
||||
end
|
||||
|
||||
def verify_raw_data_matches(archived_data)
|
||||
# Sample verification: check random points to ensure archived data matches database
|
||||
# For performance, we'll verify a sample rather than all points
|
||||
sample_size = [archived_data.size, 100].min
|
||||
point_ids_to_check = archived_data.keys.sample(sample_size)
|
||||
|
||||
mismatches = []
|
||||
|
||||
Point.where(id: point_ids_to_check).find_each do |point|
|
||||
archived_raw_data = archived_data[point.id]
|
||||
current_raw_data = point.raw_data
|
||||
|
||||
# Compare the raw_data (both should be hashes)
|
||||
if archived_raw_data != current_raw_data
|
||||
mismatches << {
|
||||
point_id: point.id,
|
||||
archived: archived_raw_data,
|
||||
current: current_raw_data
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
if mismatches.any?
|
||||
return {
|
||||
success: false,
|
||||
error: "Raw data mismatch detected in #{mismatches.count} point(s). " \
|
||||
"First mismatch: Point #{mismatches.first[:point_id]}"
|
||||
}
|
||||
end
|
||||
|
||||
{ success: true }
|
||||
end
|
||||
|
||||
def calculate_checksum(point_ids)
|
||||
|
|
|
|||
|
|
@ -69,6 +69,24 @@ RSpec.describe Points::RawData::Verifier do
|
|||
verifier.verify_specific_archive(archive.id)
|
||||
end.not_to change { archive.reload.verified_at }
|
||||
end
|
||||
|
||||
it 'detects raw_data mismatch between archive and database' do
|
||||
# Modify raw_data in database after archiving
|
||||
points.first.update_column(:raw_data, { lon: 999.0, lat: 999.0 })
|
||||
|
||||
expect do
|
||||
verifier.verify_specific_archive(archive.id)
|
||||
end.not_to change { archive.reload.verified_at }
|
||||
end
|
||||
|
||||
it 'verifies raw_data matches between archive and database' do
|
||||
# Ensure data hasn't changed
|
||||
expect(points.first.raw_data).to eq({ 'lon' => 13.4, 'lat' => 52.5 })
|
||||
|
||||
verifier.verify_specific_archive(archive.id)
|
||||
|
||||
expect(archive.reload.verified_at).to be_present
|
||||
end
|
||||
end
|
||||
|
||||
describe '#verify_month' do
|
||||
|
|
|
|||
Loading…
Reference in a new issue