Update exporting process to use minimal compression for speed/size balance

This commit is contained in:
Eugene Burmakin 2025-06-26 00:31:21 +02:00
parent dd87f57971
commit 22a7d662c9
4 changed files with 157 additions and 72 deletions

View file

@ -159,43 +159,66 @@ class Users::ExportData
end
def export
# TODO: Implement
# 1. Export user settings
# 2. Export user points
# 4. Export user visits
# 8. Export user places
# 11. Zip all the files
timestamp = Time.current.strftime('%Y%m%d_%H%M%S')
export_directory = Rails.root.join('tmp', "#{user.email.gsub(/[^0-9A-Za-z._-]/, '_')}_#{timestamp}")
files_directory = export_directory.join('files')
FileUtils.mkdir_p(files_directory)
begin
data = {}
data[:settings] = user.safe_settings.settings
data[:areas] = Users::ExportData::Areas.new(user).call
data[:imports] = Users::ExportData::Imports.new(user, files_directory).call
data[:exports] = Users::ExportData::Exports.new(user, files_directory).call
data[:trips] = Users::ExportData::Trips.new(user).call
data[:stats] = Users::ExportData::Stats.new(user).call
data[:notifications] = Users::ExportData::Notifications.new(user).call
data[:points] = Users::ExportData::Points.new(user).call
data[:visits] = Users::ExportData::Visits.new(user).call
data[:places] = Users::ExportData::Places.new(user).call
# Temporarily disable SQL logging for better performance
old_logger = ActiveRecord::Base.logger
ActiveRecord::Base.logger = nil if Rails.env.production?
json_file_path = export_directory.join('data.json')
File.write(json_file_path, data.to_json)
# Stream JSON writing instead of building in memory
File.open(json_file_path, 'w') do |file|
file.write('{"settings":')
file.write(user.safe_settings.settings.to_json)
file.write(',"areas":')
file.write(Users::ExportData::Areas.new(user).call.to_json)
file.write(',"imports":')
file.write(Users::ExportData::Imports.new(user, files_directory).call.to_json)
file.write(',"exports":')
file.write(Users::ExportData::Exports.new(user, files_directory).call.to_json)
file.write(',"trips":')
file.write(Users::ExportData::Trips.new(user).call.to_json)
file.write(',"stats":')
file.write(Users::ExportData::Stats.new(user).call.to_json)
file.write(',"notifications":')
file.write(Users::ExportData::Notifications.new(user).call.to_json)
file.write(',"points":')
file.write(Users::ExportData::Points.new(user).call.to_json)
file.write(',"visits":')
file.write(Users::ExportData::Visits.new(user).call.to_json)
file.write(',"places":')
file.write(Users::ExportData::Places.new(user).call.to_json)
file.write('}')
end
zip_file_path = export_directory.join('export.zip')
create_zip_archive(zip_file_path)
create_zip_archive(export_directory, zip_file_path)
# Move the zip file to a final location (e.g., tmp root) before cleanup
final_zip_path = Rails.root.join('tmp', "#{user.email}_export_#{Time.current.strftime('%Y%m%d_%H%M%S')}.zip")
# Move the zip file to a safe location before cleanup
final_zip_path = Rails.root.join('tmp', "export_#{timestamp}.zip")
FileUtils.mv(zip_file_path, final_zip_path)
final_zip_path
ensure
cleanup_temporary_files
# Restore logger
ActiveRecord::Base.logger = old_logger if old_logger
cleanup_temporary_files(export_directory) if export_directory&.exist?
end
end
@ -211,7 +234,8 @@ class Users::ExportData
@files_directory ||= export_directory.join('files')
end
def create_zip_archive(zip_file_path)
def create_zip_archive(export_directory, zip_file_path)
# Create zip archive with standard compression
Zip::File.open(zip_file_path, Zip::File::CREATE) do |zipfile|
Dir.glob(export_directory.join('**', '*')).each do |file|
next if File.directory?(file) || file == zip_file_path.to_s
@ -222,7 +246,7 @@ class Users::ExportData
end
end
def cleanup_temporary_files
def cleanup_temporary_files(export_directory)
return unless File.directory?(export_directory)
Rails.logger.info "Cleaning up temporary export directory: #{export_directory}"

View file

@ -1,5 +1,7 @@
# frozen_string_literal: true
require 'parallel'
class Users::ExportData::Exports
def initialize(user, files_directory)
@user = user
@ -7,8 +9,17 @@ class Users::ExportData::Exports
end
def call
user.exports.includes(:file_attachment).map do |export|
process_export(export)
exports_with_files = user.exports.includes(:file_attachment).to_a
# Only use parallel processing if we have multiple exports
if exports_with_files.size > 1
# Use fewer threads to avoid database connection issues
results = Parallel.map(exports_with_files, in_threads: 2) do |export|
process_export(export)
end
results
else
exports_with_files.map { |export| process_export(export) }
end
end

View file

@ -1,5 +1,7 @@
# frozen_string_literal: true
require 'parallel'
class Users::ExportData::Imports
def initialize(user, files_directory)
@user = user
@ -7,8 +9,17 @@ class Users::ExportData::Imports
end
def call
user.imports.includes(:file_attachment).map do |import|
process_import(import)
imports_with_files = user.imports.includes(:file_attachment).to_a
# Only use parallel processing if we have multiple imports
if imports_with_files.size > 1
# Use fewer threads to avoid database connection issues
results = Parallel.map(imports_with_files, in_threads: 2) do |import|
process_import(import)
end
results
else
imports_with_files.map { |import| process_import(import) }
end
end

View file

@ -6,59 +6,98 @@ class Users::ExportData::Points
end
def call
points_data = Point.where(user_id: user.id).order(id: :asc).as_json(except: %w[user_id])
# Single optimized query with all joins to avoid N+1 queries
points_sql = <<-SQL
SELECT
p.battery_status, p.battery, p.timestamp, p.altitude, p.velocity, p.accuracy,
p.ping, p.tracker_id, p.topic, p.trigger, p.bssid, p.ssid, p.connection,
p.vertical_accuracy, p.mode, p.inrids, p.in_regions, p.raw_data, p.city, p.country,
p.geodata, p.reverse_geocoded_at, p.course, p.course_accuracy, p.external_track_id,
p.created_at, p.updated_at,
ST_X(p.lonlat::geometry) as longitude,
ST_Y(p.lonlat::geometry) as latitude,
-- Import reference
i.name as import_name,
i.source as import_source,
i.created_at as import_created_at,
-- Country info
c.name as country_name,
c.iso_a2 as country_iso_a2,
c.iso_a3 as country_iso_a3,
-- Visit reference
v.name as visit_name,
v.started_at as visit_started_at,
v.ended_at as visit_ended_at
FROM points p
LEFT JOIN imports i ON p.import_id = i.id
LEFT JOIN countries c ON p.country_id = c.id
LEFT JOIN visits v ON p.visit_id = v.id
WHERE p.user_id = $1
ORDER BY p.id
SQL
return [] if points_data.empty?
result = ActiveRecord::Base.connection.exec_query(points_sql, 'Points Export', [user.id])
# Get unique IDs for batch loading
import_ids = points_data.filter_map { |row| row['import_id'] }.uniq
country_ids = points_data.filter_map { |row| row['country_id'] }.uniq
visit_ids = points_data.filter_map { |row| row['visit_id'] }.uniq
Rails.logger.info "Processing #{result.count} points for export..."
# Load all imports in one query
imports_map = {}
if import_ids.any?
Import.where(id: import_ids).find_each do |import|
imports_map[import.id] = {
'name' => import.name,
'source' => import.source,
'created_at' => import.created_at.iso8601
# Process results efficiently
result.map do |row|
point_hash = {
'battery_status' => row['battery_status'],
'battery' => row['battery'],
'timestamp' => row['timestamp'],
'altitude' => row['altitude'],
'velocity' => row['velocity'],
'accuracy' => row['accuracy'],
'ping' => row['ping'],
'tracker_id' => row['tracker_id'],
'topic' => row['topic'],
'trigger' => row['trigger'],
'bssid' => row['bssid'],
'ssid' => row['ssid'],
'connection' => row['connection'],
'vertical_accuracy' => row['vertical_accuracy'],
'mode' => row['mode'],
'inrids' => row['inrids'],
'in_regions' => row['in_regions'],
'raw_data' => row['raw_data'],
'city' => row['city'],
'country' => row['country'],
'geodata' => row['geodata'],
'reverse_geocoded_at' => row['reverse_geocoded_at'],
'course' => row['course'],
'course_accuracy' => row['course_accuracy'],
'external_track_id' => row['external_track_id'],
'created_at' => row['created_at'],
'updated_at' => row['updated_at'],
'longitude' => row['longitude'],
'latitude' => row['latitude']
}
# Add relationship references only if they exist
if row['import_name']
point_hash['import_reference'] = {
'name' => row['import_name'],
'source' => row['import_source'],
'created_at' => row['import_created_at']
}
end
end
# Load all countries in one query
countries_map = {}
if country_ids.any?
Country.where(id: country_ids).find_each do |country|
countries_map[country.id] = {
'name' => country.name,
'iso_a2' => country.iso_a2,
'iso_a3' => country.iso_a3
if row['country_name']
point_hash['country_info'] = {
'name' => row['country_name'],
'iso_a2' => row['country_iso_a2'],
'iso_a3' => row['country_iso_a3']
}
end
end
# Load all visits in one query
visits_map = {}
if visit_ids.any?
Visit.where(id: visit_ids).find_each do |visit|
visits_map[visit.id] = {
'name' => visit.name,
'started_at' => visit.started_at&.iso8601,
'ended_at' => visit.ended_at&.iso8601
if row['visit_name']
point_hash['visit_reference'] = {
'name' => row['visit_name'],
'started_at' => row['visit_started_at'],
'ended_at' => row['visit_ended_at']
}
end
end
# Build the final result
points_data.map do |row|
point_hash = row.except('import_id', 'country_id', 'visit_id', 'id').to_h
# Add relationship references
point_hash['import_reference'] = imports_map[row['import_id']]
point_hash['country_info'] = countries_map[row['country_id']]
point_hash['visit_reference'] = visits_map[row['visit_id']]
point_hash
end