diff --git a/.app_version b/.app_version index 93d4c1ef..19199bcc 100644 --- a/.app_version +++ b/.app_version @@ -1 +1 @@ -0.36.0 +0.36.1 diff --git a/CHANGELOG.md b/CHANGELOG.md index 6c022da7..b3e01da2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,13 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](http://keepachangelog.com/) and this project adheres to [Semantic Versioning](http://semver.org/). +[0.36.1] - 2025-11-29 + +## Fixed + +- Exporting user data now works a lot faster and consumes less memory. +- Fix the restart loop. #1937 #1975 + # [0.36.0] - 2025-11-24 ## OIDC and KML support release diff --git a/Gemfile b/Gemfile index 36cf0d9c..de3aafef 100644 --- a/Gemfile +++ b/Gemfile @@ -55,6 +55,7 @@ gem 'stimulus-rails' gem 'tailwindcss-rails', '= 3.3.2' gem 'turbo-rails', '>= 2.0.17' gem 'tzinfo-data', platforms: %i[mingw mswin x64_mingw jruby] +gem 'foreman' group :development, :test, :staging do gem 'brakeman', require: false @@ -81,7 +82,6 @@ end group :development do gem 'database_consistency', '>= 2.0.5', require: false - gem 'foreman' gem 'rubocop-rails', '>= 2.33.4', require: false gem 'strong_migrations', '>= 2.4.0' end diff --git a/app/services/users/export_data.rb b/app/services/users/export_data.rb index 29caa8dd..80e6c486 100644 --- a/app/services/users/export_data.rb +++ b/app/services/users/export_data.rb @@ -273,7 +273,7 @@ class Users::ExportData file.write(Users::ExportData::Notifications.new(user).call.to_json) file.write(',"points":') - file.write(Users::ExportData::Points.new(user).call.to_json) + Users::ExportData::Points.new(user, file).call file.write(',"visits":') file.write(Users::ExportData::Visits.new(user).call.to_json) diff --git a/app/services/users/export_data/points.rb b/app/services/users/export_data/points.rb index ef98e30c..cf224afa 100644 --- a/app/services/users/export_data/points.rb +++ b/app/services/users/export_data/points.rb @@ -1,12 +1,75 @@ # frozen_string_literal: true class Users::ExportData::Points - def initialize(user) + BATCH_SIZE = 10_000 + PROGRESS_LOG_INTERVAL = 50_000 + + def initialize(user, output_file = nil) @user = user + @output_file = output_file end + # For backward compatibility: returns array when no output_file provided + # For streaming mode: writes directly to file when output_file provided def call - points_sql = <<-SQL + if @output_file + stream_to_file + nil # Don't return array in streaming mode + else + # Legacy mode: load all into memory (deprecated for large datasets) + load_all_points + end + end + + private + + attr_reader :user, :output_file + + def stream_to_file + total_count = user.points.count + processed = 0 + first_record = true + + Rails.logger.info "Streaming #{total_count} points to file..." + puts "Starting export of #{total_count} points..." + + output_file.write('[') + + user.points.find_in_batches(batch_size: BATCH_SIZE).with_index do |batch, batch_index| + batch_sql = build_batch_query(batch.map(&:id)) + result = ActiveRecord::Base.connection.exec_query(batch_sql, 'Points Export Batch') + + result.each do |row| + point_hash = build_point_hash(row) + next unless point_hash # Skip points without coordinates + + output_file.write(',') unless first_record + output_file.write(point_hash.to_json) + first_record = false + processed += 1 + + log_progress(processed, total_count) if (processed % PROGRESS_LOG_INTERVAL).zero? + end + + # Show progress after each batch + percentage = (processed.to_f / total_count * 100).round(1) + puts "Exported #{processed}/#{total_count} points (#{percentage}%)" + end + + output_file.write(']') + Rails.logger.info "Completed streaming #{processed} points to file" + puts "Export completed: #{processed} points written" + end + + def load_all_points + result = ActiveRecord::Base.connection.exec_query(build_full_query, 'Points Export', [user.id]) + Rails.logger.info "Processing #{result.count} points for export..." + + result.filter_map { |row| build_point_hash(row) } + end + + def build_full_query + <<-SQL SELECT p.id, p.battery_status, p.battery, p.timestamp, p.altitude, p.velocity, p.accuracy, p.ping, p.tracker_id, p.topic, p.trigger, p.bssid, p.ssid, p.connection, @@ -14,18 +77,14 @@ class Users::ExportData::Points p.city, p.country, p.geodata, p.reverse_geocoded_at, p.course, p.course_accuracy, p.external_track_id, p.created_at, p.updated_at, p.lonlat, p.longitude, p.latitude, - -- Extract coordinates from lonlat if individual fields are missing COALESCE(p.longitude, ST_X(p.lonlat::geometry)) as computed_longitude, COALESCE(p.latitude, ST_Y(p.lonlat::geometry)) as computed_latitude, - -- Import reference i.name as import_name, i.source as import_source, i.created_at as import_created_at, - -- Country info c.name as country_name, c.iso_a2 as country_iso_a2, c.iso_a3 as country_iso_a3, - -- Visit reference v.name as visit_name, v.started_at as visit_started_at, v.ended_at as visit_ended_at @@ -36,85 +95,112 @@ class Users::ExportData::Points WHERE p.user_id = $1 ORDER BY p.id SQL + end - result = ActiveRecord::Base.connection.exec_query(points_sql, 'Points Export', [user.id]) + def build_batch_query(point_ids) + <<-SQL + SELECT + p.id, p.battery_status, p.battery, p.timestamp, p.altitude, p.velocity, p.accuracy, + p.ping, p.tracker_id, p.topic, p.trigger, p.bssid, p.ssid, p.connection, + p.vertical_accuracy, p.mode, p.inrids, p.in_regions, p.raw_data, + p.city, p.country, p.geodata, p.reverse_geocoded_at, p.course, + p.course_accuracy, p.external_track_id, p.created_at, p.updated_at, + p.lonlat, p.longitude, p.latitude, + COALESCE(p.longitude, ST_X(p.lonlat::geometry)) as computed_longitude, + COALESCE(p.latitude, ST_Y(p.lonlat::geometry)) as computed_latitude, + i.name as import_name, + i.source as import_source, + i.created_at as import_created_at, + c.name as country_name, + c.iso_a2 as country_iso_a2, + c.iso_a3 as country_iso_a3, + v.name as visit_name, + v.started_at as visit_started_at, + v.ended_at as visit_ended_at + FROM points p + LEFT JOIN imports i ON p.import_id = i.id + LEFT JOIN countries c ON p.country_id = c.id + LEFT JOIN visits v ON p.visit_id = v.id + WHERE p.id IN (#{point_ids.join(',')}) + ORDER BY p.id + SQL + end - Rails.logger.info "Processing #{result.count} points for export..." + def build_point_hash(row) + has_lonlat = row['lonlat'].present? + has_coordinates = row['computed_longitude'].present? && row['computed_latitude'].present? - result.filter_map do |row| - has_lonlat = row['lonlat'].present? - has_coordinates = row['computed_longitude'].present? && row['computed_latitude'].present? + unless has_lonlat || has_coordinates + Rails.logger.debug "Skipping point without coordinates: id=#{row['id'] || 'unknown'}" + return nil + end - unless has_lonlat || has_coordinates - Rails.logger.debug "Skipping point without coordinates: id=#{row['id'] || 'unknown'}" - next - end + point_hash = { + 'battery_status' => row['battery_status'], + 'battery' => row['battery'], + 'timestamp' => row['timestamp'], + 'altitude' => row['altitude'], + 'velocity' => row['velocity'], + 'accuracy' => row['accuracy'], + 'ping' => row['ping'], + 'tracker_id' => row['tracker_id'], + 'topic' => row['topic'], + 'trigger' => row['trigger'], + 'bssid' => row['bssid'], + 'ssid' => row['ssid'], + 'connection' => row['connection'], + 'vertical_accuracy' => row['vertical_accuracy'], + 'mode' => row['mode'], + 'inrids' => row['inrids'] || [], + 'in_regions' => row['in_regions'] || [], + 'raw_data' => row['raw_data'], + 'city' => row['city'], + 'country' => row['country'], + 'geodata' => row['geodata'], + 'reverse_geocoded_at' => row['reverse_geocoded_at'], + 'course' => row['course'], + 'course_accuracy' => row['course_accuracy'], + 'external_track_id' => row['external_track_id'], + 'created_at' => row['created_at'], + 'updated_at' => row['updated_at'] + } - point_hash = { - 'battery_status' => row['battery_status'], - 'battery' => row['battery'], - 'timestamp' => row['timestamp'], - 'altitude' => row['altitude'], - 'velocity' => row['velocity'], - 'accuracy' => row['accuracy'], - 'ping' => row['ping'], - 'tracker_id' => row['tracker_id'], - 'topic' => row['topic'], - 'trigger' => row['trigger'], - 'bssid' => row['bssid'], - 'ssid' => row['ssid'], - 'connection' => row['connection'], - 'vertical_accuracy' => row['vertical_accuracy'], - 'mode' => row['mode'], - 'inrids' => row['inrids'] || [], - 'in_regions' => row['in_regions'] || [], - 'raw_data' => row['raw_data'], - 'city' => row['city'], - 'country' => row['country'], - 'geodata' => row['geodata'], - 'reverse_geocoded_at' => row['reverse_geocoded_at'], - 'course' => row['course'], - 'course_accuracy' => row['course_accuracy'], - 'external_track_id' => row['external_track_id'], - 'created_at' => row['created_at'], - 'updated_at' => row['updated_at'] + populate_coordinate_fields(point_hash, row) + add_relationship_references(point_hash, row) + + point_hash + end + + def add_relationship_references(point_hash, row) + if row['import_name'] + point_hash['import_reference'] = { + 'name' => row['import_name'], + 'source' => row['import_source'], + 'created_at' => row['import_created_at'] } + end - # Ensure all coordinate fields are populated - populate_coordinate_fields(point_hash, row) + if row['country_name'] + point_hash['country_info'] = { + 'name' => row['country_name'], + 'iso_a2' => row['country_iso_a2'], + 'iso_a3' => row['country_iso_a3'] + } + end - # Add relationship references only if they exist - if row['import_name'] - point_hash['import_reference'] = { - 'name' => row['import_name'], - 'source' => row['import_source'], - 'created_at' => row['import_created_at'] - } - end - - if row['country_name'] - point_hash['country_info'] = { - 'name' => row['country_name'], - 'iso_a2' => row['country_iso_a2'], - 'iso_a3' => row['country_iso_a3'] - } - end - - if row['visit_name'] - point_hash['visit_reference'] = { - 'name' => row['visit_name'], - 'started_at' => row['visit_started_at'], - 'ended_at' => row['visit_ended_at'] - } - end - - point_hash + if row['visit_name'] + point_hash['visit_reference'] = { + 'name' => row['visit_name'], + 'started_at' => row['visit_started_at'], + 'ended_at' => row['visit_ended_at'] + } end end - private - - attr_reader :user + def log_progress(processed, total) + percentage = (processed.to_f / total * 100).round(1) + Rails.logger.info "Points export progress: #{processed}/#{total} (#{percentage}%)" + end def populate_coordinate_fields(point_hash, row) longitude = row['computed_longitude'] diff --git a/spec/services/users/export_data/points_spec.rb b/spec/services/users/export_data/points_spec.rb index b2fa0a52..3f9ead9a 100644 --- a/spec/services/users/export_data/points_spec.rb +++ b/spec/services/users/export_data/points_spec.rb @@ -264,5 +264,35 @@ RSpec.describe Users::ExportData::Points, type: :service do expect(point_data).to be_nil end end + + context 'streaming mode' do + let!(:points) { create_list(:point, 25, user: user) } + let(:output) { StringIO.new } + let(:streaming_service) { described_class.new(user, output) } + + it 'writes JSON array directly to file without loading all into memory' do + streaming_service.call + output.rewind + json_output = output.read + + expect(json_output).to start_with('[') + expect(json_output).to end_with(']') + + parsed = JSON.parse(json_output) + expect(parsed).to be_an(Array) + expect(parsed.size).to eq(25) + end + + it 'returns nil in streaming mode instead of array' do + expect(streaming_service.call).to be_nil + end + + it 'logs progress for large datasets' do + expect(Rails.logger).to receive(:info).with(/Streaming \d+ points to file.../) + expect(Rails.logger).to receive(:info).with(/Completed streaming \d+ points to file/) + + streaming_service.call + end + end end end