Update exporting process to use minimal compression for speed/size balance

This commit is contained in:
Eugene Burmakin 2025-06-26 00:31:21 +02:00
parent dd87f57971
commit 22a7d662c9
4 changed files with 157 additions and 72 deletions

View file

@ -159,43 +159,66 @@ class Users::ExportData
end end
def export def export
# TODO: Implement timestamp = Time.current.strftime('%Y%m%d_%H%M%S')
# 1. Export user settings export_directory = Rails.root.join('tmp', "#{user.email.gsub(/[^0-9A-Za-z._-]/, '_')}_#{timestamp}")
# 2. Export user points files_directory = export_directory.join('files')
# 4. Export user visits
# 8. Export user places
# 11. Zip all the files
FileUtils.mkdir_p(files_directory) FileUtils.mkdir_p(files_directory)
begin begin
data = {} # Temporarily disable SQL logging for better performance
old_logger = ActiveRecord::Base.logger
data[:settings] = user.safe_settings.settings ActiveRecord::Base.logger = nil if Rails.env.production?
data[:areas] = Users::ExportData::Areas.new(user).call
data[:imports] = Users::ExportData::Imports.new(user, files_directory).call
data[:exports] = Users::ExportData::Exports.new(user, files_directory).call
data[:trips] = Users::ExportData::Trips.new(user).call
data[:stats] = Users::ExportData::Stats.new(user).call
data[:notifications] = Users::ExportData::Notifications.new(user).call
data[:points] = Users::ExportData::Points.new(user).call
data[:visits] = Users::ExportData::Visits.new(user).call
data[:places] = Users::ExportData::Places.new(user).call
json_file_path = export_directory.join('data.json') json_file_path = export_directory.join('data.json')
File.write(json_file_path, data.to_json)
# Stream JSON writing instead of building in memory
File.open(json_file_path, 'w') do |file|
file.write('{"settings":')
file.write(user.safe_settings.settings.to_json)
file.write(',"areas":')
file.write(Users::ExportData::Areas.new(user).call.to_json)
file.write(',"imports":')
file.write(Users::ExportData::Imports.new(user, files_directory).call.to_json)
file.write(',"exports":')
file.write(Users::ExportData::Exports.new(user, files_directory).call.to_json)
file.write(',"trips":')
file.write(Users::ExportData::Trips.new(user).call.to_json)
file.write(',"stats":')
file.write(Users::ExportData::Stats.new(user).call.to_json)
file.write(',"notifications":')
file.write(Users::ExportData::Notifications.new(user).call.to_json)
file.write(',"points":')
file.write(Users::ExportData::Points.new(user).call.to_json)
file.write(',"visits":')
file.write(Users::ExportData::Visits.new(user).call.to_json)
file.write(',"places":')
file.write(Users::ExportData::Places.new(user).call.to_json)
file.write('}')
end
zip_file_path = export_directory.join('export.zip') zip_file_path = export_directory.join('export.zip')
create_zip_archive(zip_file_path) create_zip_archive(export_directory, zip_file_path)
# Move the zip file to a final location (e.g., tmp root) before cleanup # Move the zip file to a safe location before cleanup
final_zip_path = Rails.root.join('tmp', "#{user.email}_export_#{Time.current.strftime('%Y%m%d_%H%M%S')}.zip") final_zip_path = Rails.root.join('tmp', "export_#{timestamp}.zip")
FileUtils.mv(zip_file_path, final_zip_path) FileUtils.mv(zip_file_path, final_zip_path)
final_zip_path final_zip_path
ensure ensure
cleanup_temporary_files # Restore logger
ActiveRecord::Base.logger = old_logger if old_logger
cleanup_temporary_files(export_directory) if export_directory&.exist?
end end
end end
@ -211,7 +234,8 @@ class Users::ExportData
@files_directory ||= export_directory.join('files') @files_directory ||= export_directory.join('files')
end end
def create_zip_archive(zip_file_path) def create_zip_archive(export_directory, zip_file_path)
# Create zip archive with standard compression
Zip::File.open(zip_file_path, Zip::File::CREATE) do |zipfile| Zip::File.open(zip_file_path, Zip::File::CREATE) do |zipfile|
Dir.glob(export_directory.join('**', '*')).each do |file| Dir.glob(export_directory.join('**', '*')).each do |file|
next if File.directory?(file) || file == zip_file_path.to_s next if File.directory?(file) || file == zip_file_path.to_s
@ -222,7 +246,7 @@ class Users::ExportData
end end
end end
def cleanup_temporary_files def cleanup_temporary_files(export_directory)
return unless File.directory?(export_directory) return unless File.directory?(export_directory)
Rails.logger.info "Cleaning up temporary export directory: #{export_directory}" Rails.logger.info "Cleaning up temporary export directory: #{export_directory}"

View file

@ -1,5 +1,7 @@
# frozen_string_literal: true # frozen_string_literal: true
require 'parallel'
class Users::ExportData::Exports class Users::ExportData::Exports
def initialize(user, files_directory) def initialize(user, files_directory)
@user = user @user = user
@ -7,9 +9,18 @@ class Users::ExportData::Exports
end end
def call def call
user.exports.includes(:file_attachment).map do |export| exports_with_files = user.exports.includes(:file_attachment).to_a
# Only use parallel processing if we have multiple exports
if exports_with_files.size > 1
# Use fewer threads to avoid database connection issues
results = Parallel.map(exports_with_files, in_threads: 2) do |export|
process_export(export) process_export(export)
end end
results
else
exports_with_files.map { |export| process_export(export) }
end
end end
private private

View file

@ -1,5 +1,7 @@
# frozen_string_literal: true # frozen_string_literal: true
require 'parallel'
class Users::ExportData::Imports class Users::ExportData::Imports
def initialize(user, files_directory) def initialize(user, files_directory)
@user = user @user = user
@ -7,9 +9,18 @@ class Users::ExportData::Imports
end end
def call def call
user.imports.includes(:file_attachment).map do |import| imports_with_files = user.imports.includes(:file_attachment).to_a
# Only use parallel processing if we have multiple imports
if imports_with_files.size > 1
# Use fewer threads to avoid database connection issues
results = Parallel.map(imports_with_files, in_threads: 2) do |import|
process_import(import) process_import(import)
end end
results
else
imports_with_files.map { |import| process_import(import) }
end
end end
private private

View file

@ -6,59 +6,98 @@ class Users::ExportData::Points
end end
def call def call
points_data = Point.where(user_id: user.id).order(id: :asc).as_json(except: %w[user_id]) # Single optimized query with all joins to avoid N+1 queries
points_sql = <<-SQL
SELECT
p.battery_status, p.battery, p.timestamp, p.altitude, p.velocity, p.accuracy,
p.ping, p.tracker_id, p.topic, p.trigger, p.bssid, p.ssid, p.connection,
p.vertical_accuracy, p.mode, p.inrids, p.in_regions, p.raw_data, p.city, p.country,
p.geodata, p.reverse_geocoded_at, p.course, p.course_accuracy, p.external_track_id,
p.created_at, p.updated_at,
ST_X(p.lonlat::geometry) as longitude,
ST_Y(p.lonlat::geometry) as latitude,
-- Import reference
i.name as import_name,
i.source as import_source,
i.created_at as import_created_at,
-- Country info
c.name as country_name,
c.iso_a2 as country_iso_a2,
c.iso_a3 as country_iso_a3,
-- Visit reference
v.name as visit_name,
v.started_at as visit_started_at,
v.ended_at as visit_ended_at
FROM points p
LEFT JOIN imports i ON p.import_id = i.id
LEFT JOIN countries c ON p.country_id = c.id
LEFT JOIN visits v ON p.visit_id = v.id
WHERE p.user_id = $1
ORDER BY p.id
SQL
return [] if points_data.empty? result = ActiveRecord::Base.connection.exec_query(points_sql, 'Points Export', [user.id])
# Get unique IDs for batch loading Rails.logger.info "Processing #{result.count} points for export..."
import_ids = points_data.filter_map { |row| row['import_id'] }.uniq
country_ids = points_data.filter_map { |row| row['country_id'] }.uniq
visit_ids = points_data.filter_map { |row| row['visit_id'] }.uniq
# Load all imports in one query # Process results efficiently
imports_map = {} result.map do |row|
if import_ids.any? point_hash = {
Import.where(id: import_ids).find_each do |import| 'battery_status' => row['battery_status'],
imports_map[import.id] = { 'battery' => row['battery'],
'name' => import.name, 'timestamp' => row['timestamp'],
'source' => import.source, 'altitude' => row['altitude'],
'created_at' => import.created_at.iso8601 'velocity' => row['velocity'],
'accuracy' => row['accuracy'],
'ping' => row['ping'],
'tracker_id' => row['tracker_id'],
'topic' => row['topic'],
'trigger' => row['trigger'],
'bssid' => row['bssid'],
'ssid' => row['ssid'],
'connection' => row['connection'],
'vertical_accuracy' => row['vertical_accuracy'],
'mode' => row['mode'],
'inrids' => row['inrids'],
'in_regions' => row['in_regions'],
'raw_data' => row['raw_data'],
'city' => row['city'],
'country' => row['country'],
'geodata' => row['geodata'],
'reverse_geocoded_at' => row['reverse_geocoded_at'],
'course' => row['course'],
'course_accuracy' => row['course_accuracy'],
'external_track_id' => row['external_track_id'],
'created_at' => row['created_at'],
'updated_at' => row['updated_at'],
'longitude' => row['longitude'],
'latitude' => row['latitude']
}
# Add relationship references only if they exist
if row['import_name']
point_hash['import_reference'] = {
'name' => row['import_name'],
'source' => row['import_source'],
'created_at' => row['import_created_at']
} }
end end
end
# Load all countries in one query if row['country_name']
countries_map = {} point_hash['country_info'] = {
if country_ids.any? 'name' => row['country_name'],
Country.where(id: country_ids).find_each do |country| 'iso_a2' => row['country_iso_a2'],
countries_map[country.id] = { 'iso_a3' => row['country_iso_a3']
'name' => country.name,
'iso_a2' => country.iso_a2,
'iso_a3' => country.iso_a3
} }
end end
end
# Load all visits in one query if row['visit_name']
visits_map = {} point_hash['visit_reference'] = {
if visit_ids.any? 'name' => row['visit_name'],
Visit.where(id: visit_ids).find_each do |visit| 'started_at' => row['visit_started_at'],
visits_map[visit.id] = { 'ended_at' => row['visit_ended_at']
'name' => visit.name,
'started_at' => visit.started_at&.iso8601,
'ended_at' => visit.ended_at&.iso8601
} }
end end
end
# Build the final result
points_data.map do |row|
point_hash = row.except('import_id', 'country_id', 'visit_id', 'id').to_h
# Add relationship references
point_hash['import_reference'] = imports_map[row['import_id']]
point_hash['country_info'] = countries_map[row['country_id']]
point_hash['visit_reference'] = visits_map[row['visit_id']]
point_hash point_hash
end end