Use secure file downloader for imports

This commit is contained in:
Eugene Burmakin 2025-04-23 23:27:55 +02:00
parent 45a310319f
commit e433ed4d1c
12 changed files with 42 additions and 61 deletions

View file

@ -30,7 +30,7 @@ class Point < ApplicationRecord
after_create :async_reverse_geocode after_create :async_reverse_geocode
after_create_commit :broadcast_coordinates after_create_commit :broadcast_coordinates
after_commit -> { Import::UpdatePointsCountJob.perform_later(import_id) }, on: :destroy, if: -> { import_id.present? } # after_commit -> { Import::UpdatePointsCountJob.perform_later(import_id) }, on: :destroy, if: -> { import_id.present? }
def self.without_raw_data def self.without_raw_data
select(column_names - ['raw_data']) select(column_names - ['raw_data'])

View file

@ -12,8 +12,8 @@ class Geojson::ImportParser
end end
def call def call
import.file.download do |file| file_content = Imports::SecureFileDownloader.new(import.file).download_with_verification
json = Oj.load(file) json = Oj.load(file_content)
data = Geojson::Params.new(json).call data = Geojson::Params.new(json).call
@ -25,5 +25,4 @@ class Geojson::ImportParser
broadcast_import_progress(import, index) broadcast_import_progress(import, index)
end end
end end
end
end end

View file

@ -51,7 +51,7 @@ class GoogleMaps::PhoneTakeoutParser
raw_signals = [] raw_signals = []
raw_array = [] raw_array = []
file_content = SecureFileDownloader.new(import.file).download_with_verification file_content = Imports::SecureFileDownloader.new(import.file).download_with_verification
json = Oj.load(file_content) json = Oj.load(file_content)

View file

@ -23,7 +23,7 @@ class GoogleMaps::RecordsStorageImporter
attr_reader :import, :user attr_reader :import, :user
def process_file_in_batches def process_file_in_batches
file_content = SecureFileDownloader.new(import.file).download_with_verification file_content = Imports::SecureFileDownloader.new(import.file).download_with_verification
locations = parse_file(file_content) locations = parse_file(file_content)
process_locations_in_batches(locations) if locations.present? process_locations_in_batches(locations) if locations.present?
end end

View file

@ -61,19 +61,14 @@ class GoogleMaps::SemanticHistoryParser
end end
def points_data def points_data
data = nil file_content = Imports::SecureFileDownloader.new(import.file).download_with_verification
json = Oj.load(file_content)
import.file.download do |f| json['timelineObjects'].flat_map do |timeline_object|
json = Oj.load(f)
data = json['timelineObjects'].flat_map do |timeline_object|
parse_timeline_object(timeline_object) parse_timeline_object(timeline_object)
end.compact end.compact
end end
data
end
def parse_timeline_object(timeline_object) def parse_timeline_object(timeline_object)
if timeline_object['activitySegment'].present? if timeline_object['activitySegment'].present?
parse_activity_segment(timeline_object['activitySegment']) parse_activity_segment(timeline_object['activitySegment'])

View file

@ -13,8 +13,8 @@ class Gpx::TrackImporter
end end
def call def call
import.file.download do |file| file_content = Imports::SecureFileDownloader.new(import.file).download_with_verification
json = Hash.from_xml(file) json = Hash.from_xml(file_content)
tracks = json['gpx']['trk'] tracks = json['gpx']['trk']
tracks_arr = tracks.is_a?(Array) ? tracks : [tracks] tracks_arr = tracks.is_a?(Array) ? tracks : [tracks]
@ -24,7 +24,6 @@ class Gpx::TrackImporter
bulk_insert_points(points_data) bulk_insert_points(points_data)
end end
end
private private

View file

@ -1,6 +1,6 @@
# frozen_string_literal: true # frozen_string_literal: true
class SecureFileDownloader class Imports::SecureFileDownloader
DOWNLOAD_TIMEOUT = 300 # 5 minutes timeout DOWNLOAD_TIMEOUT = 300 # 5 minutes timeout
MAX_RETRIES = 3 MAX_RETRIES = 3

View file

@ -11,8 +11,8 @@ class OwnTracks::Importer
end end
def call def call
import.file.download do |file| file_content = Imports::SecureFileDownloader.new(import.file).download_with_verification
parsed_data = OwnTracks::RecParser.new(file).call parsed_data = OwnTracks::RecParser.new(file_content).call
points_data = parsed_data.map do |point| points_data = parsed_data.map do |point|
OwnTracks::Params.new(point).call.merge( OwnTracks::Params.new(point).call.merge(
@ -25,7 +25,6 @@ class OwnTracks::Importer
bulk_insert_points(points_data) bulk_insert_points(points_data)
end end
end
private private

View file

@ -11,12 +11,11 @@ class Photos::ImportParser
end end
def call def call
import.file.download do |file| file_content = Imports::SecureFileDownloader.new(import.file).download_with_verification
json = Oj.load(file) json = Oj.load(file_content)
json.each.with_index(1) { |point, index| create_point(point, index) } json.each.with_index(1) { |point, index| create_point(point, index) }
end end
end
def create_point(point, index) def create_point(point, index)
return 0 if point['latitude'].blank? || point['longitude'].blank? || point['timestamp'].blank? return 0 if point['latitude'].blank? || point['longitude'].blank? || point['timestamp'].blank?

View file

@ -79,14 +79,4 @@ RSpec.describe Point, type: :model do
end end
end end
end end
describe 'callbacks' do
describe '#update_import_points_count' do
let(:point) { create(:point, import_id: 1) }
it 'updates the import points count' do
expect { point.destroy }.to have_enqueued_job(Import::UpdatePointsCountJob).with(1)
end
end
end
end end

View file

@ -148,10 +148,10 @@ RSpec.describe GoogleMaps::RecordsStorageImporter do
# Create a mock that will return a successful result # Create a mock that will return a successful result
# The internal retries are implemented inside SecureFileDownloader, # The internal retries are implemented inside SecureFileDownloader,
# not in the RecordsStorageImporter # not in the RecordsStorageImporter
downloader = instance_double(SecureFileDownloader) downloader = instance_double(Imports::SecureFileDownloader)
# Create the downloader mock before it gets used # Create the downloader mock before it gets used
expect(SecureFileDownloader).to receive(:new).with(import.file).and_return(downloader) expect(Imports::SecureFileDownloader).to receive(:new).with(import.file).and_return(downloader)
# The SecureFileDownloader handles all the retries internally # The SecureFileDownloader handles all the retries internally
# From the perspective of the importer, it just gets the file content # From the perspective of the importer, it just gets the file content
@ -164,10 +164,10 @@ RSpec.describe GoogleMaps::RecordsStorageImporter do
it 'fails after max retries' do it 'fails after max retries' do
# The retry mechanism is in SecureFileDownloader, not RecordsStorageImporter # The retry mechanism is in SecureFileDownloader, not RecordsStorageImporter
# So we need to simulate that the method throws the error after internal retries # So we need to simulate that the method throws the error after internal retries
downloader = instance_double(SecureFileDownloader) downloader = instance_double(Imports::SecureFileDownloader)
# Create the downloader mock before it gets used - expect only one call from the importer # Create the downloader mock before it gets used - expect only one call from the importer
expect(SecureFileDownloader).to receive(:new).with(import.file).and_return(downloader) expect(Imports::SecureFileDownloader).to receive(:new).with(import.file).and_return(downloader)
# This should be called once, and the internal retries should have been attempted # This should be called once, and the internal retries should have been attempted
# After the max retries, it will still raise the Timeout::Error that bubbles up # After the max retries, it will still raise the Timeout::Error that bubbles up

View file

@ -2,7 +2,7 @@
require 'rails_helper' require 'rails_helper'
RSpec.describe SecureFileDownloader do RSpec.describe Imports::SecureFileDownloader do
let(:file_content) { 'test content' } let(:file_content) { 'test content' }
let(:file_size) { file_content.bytesize } let(:file_size) { file_content.bytesize }
let(:checksum) { Base64.strict_encode64(Digest::MD5.digest(file_content)) } let(:checksum) { Base64.strict_encode64(Digest::MD5.digest(file_content)) }