mirror of
https://github.com/Freika/dawarich.git
synced 2026-01-10 01:01:39 -05:00
Update track generation
This commit is contained in:
parent
4044e77fcd
commit
d8033a1e27
17 changed files with 8103 additions and 29 deletions
7
Gemfile
7
Gemfile
|
|
@ -52,6 +52,7 @@ gem 'jwt'
|
|||
|
||||
group :development, :test do
|
||||
gem 'brakeman', require: false
|
||||
gem 'bullet'
|
||||
gem 'bundler-audit', require: false
|
||||
gem 'debug', platforms: %i[mri mingw x64_mingw]
|
||||
gem 'dotenv-rails'
|
||||
|
|
@ -78,3 +79,9 @@ group :development do
|
|||
gem 'foreman'
|
||||
gem 'rubocop-rails', require: false
|
||||
end
|
||||
|
||||
# group :production do
|
||||
# gem 'uglifier'
|
||||
# end
|
||||
|
||||
# gem 'sassc-rails'
|
||||
|
|
|
|||
|
|
@ -113,6 +113,9 @@ GEM
|
|||
brakeman (7.0.2)
|
||||
racc
|
||||
builder (3.3.0)
|
||||
bullet (8.0.8)
|
||||
activesupport (>= 3.0.0)
|
||||
uniform_notifier (~> 1.11)
|
||||
bundler-audit (0.9.2)
|
||||
bundler (>= 1.2.0, < 3)
|
||||
thor (~> 1.0)
|
||||
|
|
@ -486,6 +489,7 @@ GEM
|
|||
unicode-display_width (3.1.4)
|
||||
unicode-emoji (~> 4.0, >= 4.0.4)
|
||||
unicode-emoji (4.0.4)
|
||||
uniform_notifier (1.17.0)
|
||||
uri (1.0.3)
|
||||
useragent (0.16.11)
|
||||
warden (1.2.9)
|
||||
|
|
@ -519,6 +523,7 @@ DEPENDENCIES
|
|||
aws-sdk-s3 (~> 1.177.0)
|
||||
bootsnap
|
||||
brakeman
|
||||
bullet
|
||||
bundler-audit
|
||||
capybara
|
||||
chartkick
|
||||
|
|
|
|||
File diff suppressed because one or more lines are too long
|
|
@ -50,16 +50,43 @@ module Distanceable
|
|||
|
||||
return 0 if points.length < 2
|
||||
|
||||
total_meters = points.each_cons(2).sum do |point1, point2|
|
||||
connection.select_value(
|
||||
'SELECT ST_Distance(ST_GeomFromEWKT($1)::geography, ST_GeomFromEWKT($2)::geography)',
|
||||
nil,
|
||||
[point1.lonlat, point2.lonlat]
|
||||
)
|
||||
end
|
||||
# OPTIMIZED: Single SQL query instead of N individual queries
|
||||
total_meters = calculate_batch_distances(points).sum
|
||||
|
||||
total_meters.to_f / ::DISTANCE_UNITS[unit.to_sym]
|
||||
end
|
||||
|
||||
# Optimized batch distance calculation using single SQL query
|
||||
def calculate_batch_distances(points)
|
||||
return [] if points.length < 2
|
||||
|
||||
point_pairs = points.each_cons(2).to_a
|
||||
return [] if point_pairs.empty?
|
||||
|
||||
# Create a VALUES clause with all point pairs
|
||||
values_clause = point_pairs.map.with_index do |(p1, p2), index|
|
||||
"(#{index}, ST_GeomFromEWKT('#{p1.lonlat}')::geography, ST_GeomFromEWKT('#{p2.lonlat}')::geography)"
|
||||
end.join(', ')
|
||||
|
||||
# Single query to calculate all distances
|
||||
results = connection.execute(<<-SQL.squish)
|
||||
WITH point_pairs AS (
|
||||
SELECT
|
||||
pair_id,
|
||||
point1,
|
||||
point2
|
||||
FROM (VALUES #{values_clause}) AS t(pair_id, point1, point2)
|
||||
)
|
||||
SELECT
|
||||
pair_id,
|
||||
ST_Distance(point1, point2) as distance_meters
|
||||
FROM point_pairs
|
||||
ORDER BY pair_id
|
||||
SQL
|
||||
|
||||
# Return array of distances in meters
|
||||
results.map { |row| row['distance_meters'].to_f }
|
||||
end
|
||||
end
|
||||
|
||||
def distance_to(other_point, unit = :km)
|
||||
|
|
|
|||
|
|
@ -25,6 +25,112 @@ class Track < ApplicationRecord
|
|||
.first
|
||||
end
|
||||
|
||||
# Optimized SQL segmentation using PostgreSQL window functions
|
||||
def self.segment_points_in_sql(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters, untracked_only: false)
|
||||
time_threshold_seconds = time_threshold_minutes * 60
|
||||
|
||||
where_clause = if untracked_only
|
||||
"WHERE user_id = $1 AND timestamp BETWEEN $2 AND $3 AND track_id IS NULL"
|
||||
else
|
||||
"WHERE user_id = $1 AND timestamp BETWEEN $2 AND $3"
|
||||
end
|
||||
|
||||
sql = <<~SQL
|
||||
WITH points_with_gaps AS (
|
||||
SELECT
|
||||
id,
|
||||
timestamp,
|
||||
lonlat,
|
||||
LAG(lonlat) OVER (ORDER BY timestamp) as prev_lonlat,
|
||||
LAG(timestamp) OVER (ORDER BY timestamp) as prev_timestamp,
|
||||
ST_Distance(
|
||||
lonlat::geography,
|
||||
LAG(lonlat) OVER (ORDER BY timestamp)::geography
|
||||
) as distance_meters,
|
||||
(timestamp - LAG(timestamp) OVER (ORDER BY timestamp)) as time_diff_seconds
|
||||
FROM points
|
||||
#{where_clause}
|
||||
ORDER BY timestamp
|
||||
),
|
||||
segment_breaks AS (
|
||||
SELECT *,
|
||||
CASE
|
||||
WHEN prev_lonlat IS NULL THEN 1
|
||||
WHEN time_diff_seconds > $4 THEN 1
|
||||
WHEN distance_meters > $5 THEN 1
|
||||
ELSE 0
|
||||
END as is_break
|
||||
FROM points_with_gaps
|
||||
),
|
||||
segments AS (
|
||||
SELECT *,
|
||||
SUM(is_break) OVER (ORDER BY timestamp ROWS UNBOUNDED PRECEDING) as segment_id
|
||||
FROM segment_breaks
|
||||
)
|
||||
SELECT
|
||||
segment_id,
|
||||
array_agg(id ORDER BY timestamp) as point_ids,
|
||||
count(*) as point_count,
|
||||
min(timestamp) as start_timestamp,
|
||||
max(timestamp) as end_timestamp,
|
||||
sum(COALESCE(distance_meters, 0)) as total_distance_meters
|
||||
FROM segments
|
||||
GROUP BY segment_id
|
||||
HAVING count(*) >= 2
|
||||
ORDER BY segment_id
|
||||
SQL
|
||||
|
||||
results = Point.connection.exec_query(
|
||||
sql,
|
||||
'segment_points_in_sql',
|
||||
[user_id, start_timestamp, end_timestamp, time_threshold_seconds, distance_threshold_meters]
|
||||
)
|
||||
|
||||
# Convert results to segment data
|
||||
segments_data = []
|
||||
results.each do |row|
|
||||
segments_data << {
|
||||
segment_id: row['segment_id'].to_i,
|
||||
point_ids: parse_postgres_array(row['point_ids']),
|
||||
point_count: row['point_count'].to_i,
|
||||
start_timestamp: row['start_timestamp'].to_i,
|
||||
end_timestamp: row['end_timestamp'].to_i,
|
||||
total_distance_meters: row['total_distance_meters'].to_f
|
||||
}
|
||||
end
|
||||
|
||||
segments_data
|
||||
end
|
||||
|
||||
# Get actual Point objects for each segment with pre-calculated distances
|
||||
def self.get_segments_with_points(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters, untracked_only: false)
|
||||
segments_data = segment_points_in_sql(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters, untracked_only: untracked_only)
|
||||
|
||||
# Get all point IDs we need
|
||||
all_point_ids = segments_data.flat_map { |seg| seg[:point_ids] }
|
||||
|
||||
# Single query to get all points
|
||||
points_by_id = Point.where(id: all_point_ids).index_by(&:id)
|
||||
|
||||
# Build segments with actual Point objects
|
||||
segments_data.map do |seg_data|
|
||||
{
|
||||
points: seg_data[:point_ids].map { |id| points_by_id[id] }.compact,
|
||||
pre_calculated_distance: seg_data[:total_distance_meters],
|
||||
start_timestamp: seg_data[:start_timestamp],
|
||||
end_timestamp: seg_data[:end_timestamp]
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
# Parse PostgreSQL array format like "{1,2,3}" into Ruby array
|
||||
def self.parse_postgres_array(pg_array_string)
|
||||
return [] if pg_array_string.nil? || pg_array_string.empty?
|
||||
|
||||
# Remove curly braces and split by comma
|
||||
pg_array_string.gsub(/[{}]/, '').split(',').map(&:to_i)
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def broadcast_track_created
|
||||
|
|
|
|||
|
|
@ -40,21 +40,32 @@ class Tracks::Generator
|
|||
def call
|
||||
clean_existing_tracks if should_clean_tracks?
|
||||
|
||||
points = load_points
|
||||
Rails.logger.debug "Generator: loaded #{points.size} points for user #{user.id} in #{mode} mode"
|
||||
return 0 if points.empty?
|
||||
# Get timestamp range for SQL query
|
||||
start_timestamp, end_timestamp = get_timestamp_range
|
||||
|
||||
Rails.logger.debug "Generator: querying points for user #{user.id} in #{mode} mode"
|
||||
|
||||
# Use optimized SQL segmentation with pre-calculated distances
|
||||
untracked_only = (mode == :incremental)
|
||||
segments = Track.get_segments_with_points(
|
||||
user.id,
|
||||
start_timestamp,
|
||||
end_timestamp,
|
||||
time_threshold_minutes,
|
||||
distance_threshold_meters,
|
||||
untracked_only: untracked_only
|
||||
)
|
||||
|
||||
segments = split_points_into_segments(points)
|
||||
Rails.logger.debug "Generator: created #{segments.size} segments"
|
||||
Rails.logger.debug "Generator: created #{segments.size} segments via SQL"
|
||||
|
||||
tracks_created = 0
|
||||
|
||||
segments.each do |segment|
|
||||
track = create_track_from_segment(segment)
|
||||
segments.each do |segment_data|
|
||||
track = create_track_from_segment_optimized(segment_data)
|
||||
tracks_created += 1 if track
|
||||
end
|
||||
|
||||
Rails.logger.info "Generated #{tracks_created} tracks for user #{user.id} in #{mode} mode"
|
||||
Rails.logger.info "Generated #{tracks_created} tracks for user #{user.id} in optimized #{mode} mode"
|
||||
tracks_created
|
||||
end
|
||||
|
||||
|
|
@ -99,6 +110,18 @@ class Tracks::Generator
|
|||
user.tracked_points.where(timestamp: day_range).order(:timestamp)
|
||||
end
|
||||
|
||||
def create_track_from_segment_optimized(segment_data)
|
||||
points = segment_data[:points]
|
||||
pre_calculated_distance = segment_data[:pre_calculated_distance]
|
||||
|
||||
Rails.logger.debug "Generator: processing segment with #{points.size} points"
|
||||
return unless points.size >= 2
|
||||
|
||||
track = create_track_from_points_optimized(points, pre_calculated_distance)
|
||||
Rails.logger.debug "Generator: created track #{track&.id}"
|
||||
track
|
||||
end
|
||||
|
||||
def create_track_from_segment(segment)
|
||||
Rails.logger.debug "Generator: processing segment with #{segment.size} points"
|
||||
return unless segment.size >= 2
|
||||
|
|
@ -171,6 +194,31 @@ class Tracks::Generator
|
|||
scope.destroy_all
|
||||
end
|
||||
|
||||
# Get timestamp range for SQL query based on mode
|
||||
def get_timestamp_range
|
||||
case mode
|
||||
when :bulk
|
||||
if start_at && end_at
|
||||
[start_at.to_i, end_at.to_i]
|
||||
else
|
||||
# Get full range for user
|
||||
first_point = user.tracked_points.order(:timestamp).first
|
||||
last_point = user.tracked_points.order(:timestamp).last
|
||||
[first_point&.timestamp || 0, last_point&.timestamp || Time.current.to_i]
|
||||
end
|
||||
when :daily
|
||||
day = start_at&.to_date || Date.current
|
||||
[day.beginning_of_day.to_i, day.end_of_day.to_i]
|
||||
when :incremental
|
||||
# For incremental, we need all untracked points up to end_at
|
||||
first_point = user.tracked_points.where(track_id: nil).order(:timestamp).first
|
||||
end_timestamp = end_at ? end_at.to_i : Time.current.to_i
|
||||
[first_point&.timestamp || 0, end_timestamp]
|
||||
else
|
||||
raise ArgumentError, "Unknown mode: #{mode}"
|
||||
end
|
||||
end
|
||||
|
||||
# Threshold methods from safe_settings
|
||||
def distance_threshold_meters
|
||||
@distance_threshold_meters ||= user.safe_settings.meters_between_routes.to_i
|
||||
|
|
|
|||
|
|
@ -86,11 +86,15 @@ module Tracks::Segmentation
|
|||
end
|
||||
|
||||
def calculate_km_distance_between_points(point1, point2)
|
||||
lat1, lon1 = point_coordinates(point1)
|
||||
lat2, lon2 = point_coordinates(point2)
|
||||
|
||||
# Use Geocoder to match behavior with frontend (same library used elsewhere in app)
|
||||
Geocoder::Calculations.distance_between([lat1, lon1], [lat2, lon2], units: :km)
|
||||
# OPTIMIZED: Use PostGIS for more accurate distance calculation (same as track distance)
|
||||
# This maintains consistency with track distance calculations
|
||||
distance_meters = Point.connection.select_value(
|
||||
'SELECT ST_Distance(ST_GeomFromEWKT($1)::geography, ST_GeomFromEWKT($2)::geography)',
|
||||
nil,
|
||||
[point1.lonlat, point2.lonlat]
|
||||
)
|
||||
|
||||
distance_meters.to_f / 1000.0 # Convert meters to kilometers
|
||||
end
|
||||
|
||||
def should_finalize_segment?(segment_points, grace_period_minutes = 5)
|
||||
|
|
|
|||
|
|
@ -82,6 +82,38 @@ module Tracks::TrackBuilder
|
|||
end
|
||||
end
|
||||
|
||||
# Optimized version that uses pre-calculated distance from SQL
|
||||
def create_track_from_points_optimized(points, pre_calculated_distance)
|
||||
return nil if points.size < 2
|
||||
|
||||
track = Track.new(
|
||||
user_id: user.id,
|
||||
start_at: Time.zone.at(points.first.timestamp),
|
||||
end_at: Time.zone.at(points.last.timestamp),
|
||||
original_path: build_path(points)
|
||||
)
|
||||
|
||||
# Use pre-calculated distance from SQL instead of recalculating
|
||||
track.distance = pre_calculated_distance.round
|
||||
track.duration = calculate_duration(points)
|
||||
track.avg_speed = calculate_average_speed(track.distance, track.duration)
|
||||
|
||||
# Calculate elevation statistics (no DB queries needed)
|
||||
elevation_stats = calculate_elevation_stats(points)
|
||||
track.elevation_gain = elevation_stats[:gain]
|
||||
track.elevation_loss = elevation_stats[:loss]
|
||||
track.elevation_max = elevation_stats[:max]
|
||||
track.elevation_min = elevation_stats[:min]
|
||||
|
||||
if track.save
|
||||
Point.where(id: points.map(&:id)).update_all(track_id: track.id)
|
||||
track
|
||||
else
|
||||
Rails.logger.error "Failed to create track for user #{user.id}: #{track.errors.full_messages.join(', ')}"
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def build_path(points)
|
||||
Tracks::BuildPath.new(points).call
|
||||
end
|
||||
|
|
|
|||
|
|
@ -21,7 +21,9 @@
|
|||
<% end %>
|
||||
</div>
|
||||
|
||||
<%= link_to 'Update stats', update_all_stats_path, data: { turbo_method: :put }, class: 'btn btn-primary mt-5' %>
|
||||
<% if current_user.active? %>
|
||||
<%= link_to 'Update stats', update_all_stats_path, data: { turbo_method: :put }, class: 'btn btn-primary mt-5' %>
|
||||
<% end %>
|
||||
|
||||
<div class="mt-6 grid grid-cols-1 sm:grid-cols-1 md:grid-cols-2 lg:grid-cols-2 gap-6">
|
||||
<% @stats.each do |year, stats| %>
|
||||
|
|
@ -33,7 +35,7 @@
|
|||
<%= link_to '[Map]', map_url(year_timespan(year)), class: 'underline hover:no-underline' %>
|
||||
</div>
|
||||
<div class="gap-2">
|
||||
<span class='text-xs text-gray-500'>Last updated: <%= human_date(stats.first.updated_at) %></span>
|
||||
<span class='text-xs text-gray-500'>Last update: <%= human_date(stats.first.updated_at) %></span>
|
||||
<%= link_to '🔄', update_year_month_stats_path(year, :all), data: { turbo_method: :put }, class: 'text-sm text-gray-500 hover:underline' %>
|
||||
</div>
|
||||
</h2>
|
||||
|
|
|
|||
|
|
@ -3,6 +3,17 @@
|
|||
require 'active_support/core_ext/integer/time'
|
||||
|
||||
Rails.application.configure do
|
||||
unless ENV['SELF_HOSTED'] == 'true'
|
||||
config.after_initialize do
|
||||
Bullet.enable = true
|
||||
Bullet.alert = true
|
||||
Bullet.bullet_logger = true
|
||||
Bullet.console = true
|
||||
Bullet.rails_logger = true
|
||||
Bullet.add_footer = true
|
||||
end
|
||||
end
|
||||
|
||||
# Settings specified here will take precedence over those in config/application.rb.
|
||||
|
||||
# In the development environment your application's code is reloaded any time
|
||||
|
|
|
|||
|
|
@ -8,6 +8,12 @@ require 'active_support/core_ext/integer/time'
|
|||
# and recreated between test runs. Don't rely on the data there!
|
||||
|
||||
Rails.application.configure do
|
||||
config.after_initialize do
|
||||
Bullet.enable = true
|
||||
Bullet.bullet_logger = true
|
||||
Bullet.raise = true # raise an error if n+1 query occurs
|
||||
end
|
||||
|
||||
# Settings specified here will take precedence over those in config/application.rb.
|
||||
|
||||
# While tests run files are not watched, reloading is not necessary.
|
||||
|
|
|
|||
145
lib/optimized_tracks_v1.rb
Normal file
145
lib/optimized_tracks_v1.rb
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
# Optimization V1: LAG-based distance calculation with Ruby segmentation
|
||||
# This keeps the existing Ruby segmentation logic but uses PostgreSQL LAG
|
||||
# for batch distance calculations instead of individual queries
|
||||
|
||||
module OptimizedTracksV1
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
module ClassMethods
|
||||
# V1: Use LAG to get all consecutive distances in a single query
|
||||
def calculate_all_consecutive_distances(points)
|
||||
return [] if points.length < 2
|
||||
|
||||
point_ids = points.map(&:id).join(',')
|
||||
|
||||
results = connection.execute(<<-SQL.squish)
|
||||
WITH points_with_previous AS (
|
||||
SELECT
|
||||
id,
|
||||
timestamp,
|
||||
lonlat,
|
||||
LAG(lonlat) OVER (ORDER BY timestamp) as prev_lonlat,
|
||||
LAG(timestamp) OVER (ORDER BY timestamp) as prev_timestamp,
|
||||
LAG(id) OVER (ORDER BY timestamp) as prev_id
|
||||
FROM points
|
||||
WHERE id IN (#{point_ids})
|
||||
)
|
||||
SELECT
|
||||
id,
|
||||
prev_id,
|
||||
timestamp,
|
||||
prev_timestamp,
|
||||
ST_Distance(lonlat::geography, prev_lonlat::geography) as distance_meters,
|
||||
(timestamp - prev_timestamp) as time_diff_seconds
|
||||
FROM points_with_previous
|
||||
WHERE prev_lonlat IS NOT NULL
|
||||
ORDER BY timestamp
|
||||
SQL
|
||||
|
||||
# Return hash mapping point_id => {distance_to_previous, time_diff}
|
||||
distance_map = {}
|
||||
results.each do |row|
|
||||
distance_map[row['id'].to_i] = {
|
||||
distance_meters: row['distance_meters'].to_f,
|
||||
time_diff_seconds: row['time_diff_seconds'].to_i,
|
||||
prev_id: row['prev_id'].to_i
|
||||
}
|
||||
end
|
||||
|
||||
distance_map
|
||||
end
|
||||
|
||||
# V1: Optimized total distance using LAG (already exists in distanceable.rb)
|
||||
def total_distance_lag(points, unit = :m)
|
||||
unless ::DISTANCE_UNITS.key?(unit.to_sym)
|
||||
raise ArgumentError, "Invalid unit. Supported units are: #{::DISTANCE_UNITS.keys.join(', ')}"
|
||||
end
|
||||
|
||||
return 0 if points.length < 2
|
||||
|
||||
point_ids = points.map(&:id).join(',')
|
||||
|
||||
distance_in_meters = connection.select_value(<<-SQL.squish)
|
||||
WITH points_with_previous AS (
|
||||
SELECT
|
||||
lonlat,
|
||||
LAG(lonlat) OVER (ORDER BY timestamp) as prev_lonlat
|
||||
FROM points
|
||||
WHERE id IN (#{point_ids})
|
||||
)
|
||||
SELECT COALESCE(
|
||||
SUM(ST_Distance(lonlat::geography, prev_lonlat::geography)),
|
||||
0
|
||||
)
|
||||
FROM points_with_previous
|
||||
WHERE prev_lonlat IS NOT NULL
|
||||
SQL
|
||||
|
||||
distance_in_meters.to_f / ::DISTANCE_UNITS[unit.to_sym]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Optimized segmentation module using pre-calculated distances
|
||||
module OptimizedSegmentationV1
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
private
|
||||
|
||||
def split_points_into_segments_v1(points)
|
||||
return [] if points.empty?
|
||||
|
||||
# V1: Pre-calculate all distances and time diffs in one query
|
||||
if points.size > 1
|
||||
distance_data = Point.calculate_all_consecutive_distances(points)
|
||||
else
|
||||
distance_data = {}
|
||||
end
|
||||
|
||||
segments = []
|
||||
current_segment = []
|
||||
|
||||
points.each do |point|
|
||||
if current_segment.empty?
|
||||
# First point always starts a segment
|
||||
current_segment = [point]
|
||||
elsif should_start_new_segment_v1?(point, current_segment.last, distance_data)
|
||||
# Finalize current segment if it has enough points
|
||||
segments << current_segment if current_segment.size >= 2
|
||||
current_segment = [point]
|
||||
else
|
||||
current_segment << point
|
||||
end
|
||||
end
|
||||
|
||||
# Don't forget the last segment
|
||||
segments << current_segment if current_segment.size >= 2
|
||||
|
||||
segments
|
||||
end
|
||||
|
||||
def should_start_new_segment_v1?(current_point, previous_point, distance_data)
|
||||
return false if previous_point.nil?
|
||||
|
||||
# Get pre-calculated data for this point
|
||||
point_data = distance_data[current_point.id]
|
||||
return false unless point_data
|
||||
|
||||
# Check time threshold
|
||||
time_threshold_seconds = time_threshold_minutes.to_i * 60
|
||||
return true if point_data[:time_diff_seconds] > time_threshold_seconds
|
||||
|
||||
# Check distance threshold
|
||||
distance_meters = point_data[:distance_meters]
|
||||
return true if distance_meters > distance_threshold_meters
|
||||
|
||||
false
|
||||
end
|
||||
end
|
||||
|
||||
# Add methods to Point class
|
||||
class Point
|
||||
extend OptimizedTracksV1::ClassMethods
|
||||
end
|
||||
291
lib/optimized_tracks_v2.rb
Normal file
291
lib/optimized_tracks_v2.rb
Normal file
|
|
@ -0,0 +1,291 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
# Optimization V2: Full SQL segmentation using PostgreSQL window functions
|
||||
# This does both distance calculation AND segmentation entirely in SQL
|
||||
|
||||
module OptimizedTracksV2
|
||||
extend ActiveSupport::Concern
|
||||
|
||||
module ClassMethods
|
||||
# V2: Complete segmentation in SQL using LAG and window functions
|
||||
def segment_points_in_sql(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters)
|
||||
time_threshold_seconds = time_threshold_minutes * 60
|
||||
|
||||
sql = <<~SQL
|
||||
WITH points_with_gaps AS (
|
||||
SELECT
|
||||
id,
|
||||
timestamp,
|
||||
lonlat,
|
||||
LAG(lonlat) OVER (ORDER BY timestamp) as prev_lonlat,
|
||||
LAG(timestamp) OVER (ORDER BY timestamp) as prev_timestamp,
|
||||
ST_Distance(
|
||||
lonlat::geography,
|
||||
LAG(lonlat) OVER (ORDER BY timestamp)::geography
|
||||
) as distance_meters,
|
||||
(timestamp - LAG(timestamp) OVER (ORDER BY timestamp)) as time_diff_seconds
|
||||
FROM points
|
||||
WHERE user_id = $1
|
||||
AND timestamp BETWEEN $2 AND $3
|
||||
ORDER BY timestamp
|
||||
),
|
||||
segment_breaks AS (
|
||||
SELECT *,
|
||||
CASE
|
||||
WHEN prev_lonlat IS NULL THEN 1
|
||||
WHEN time_diff_seconds > $4 THEN 1
|
||||
WHEN distance_meters > $5 THEN 1
|
||||
ELSE 0
|
||||
END as is_break
|
||||
FROM points_with_gaps
|
||||
),
|
||||
segments AS (
|
||||
SELECT *,
|
||||
SUM(is_break) OVER (ORDER BY timestamp ROWS UNBOUNDED PRECEDING) as segment_id
|
||||
FROM segment_breaks
|
||||
)
|
||||
SELECT
|
||||
segment_id,
|
||||
array_agg(id ORDER BY timestamp) as point_ids,
|
||||
count(*) as point_count,
|
||||
min(timestamp) as start_timestamp,
|
||||
max(timestamp) as end_timestamp,
|
||||
sum(COALESCE(distance_meters, 0)) as total_distance_meters
|
||||
FROM segments
|
||||
GROUP BY segment_id
|
||||
HAVING count(*) >= 2
|
||||
ORDER BY segment_id
|
||||
SQL
|
||||
|
||||
results = connection.exec_query(
|
||||
sql,
|
||||
'segment_points_in_sql',
|
||||
[user_id, start_timestamp, end_timestamp, time_threshold_seconds, distance_threshold_meters]
|
||||
)
|
||||
|
||||
# Convert results to segment data
|
||||
segments_data = []
|
||||
results.each do |row|
|
||||
segments_data << {
|
||||
segment_id: row['segment_id'].to_i,
|
||||
point_ids: parse_postgres_array(row['point_ids']),
|
||||
point_count: row['point_count'].to_i,
|
||||
start_timestamp: row['start_timestamp'].to_i,
|
||||
end_timestamp: row['end_timestamp'].to_i,
|
||||
total_distance_meters: row['total_distance_meters'].to_f
|
||||
}
|
||||
end
|
||||
|
||||
segments_data
|
||||
end
|
||||
|
||||
# V2: Get actual Point objects for each segment
|
||||
def get_segments_with_points(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters)
|
||||
segments_data = segment_points_in_sql(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters)
|
||||
|
||||
# Get all point IDs we need
|
||||
all_point_ids = segments_data.flat_map { |seg| seg[:point_ids] }
|
||||
|
||||
# Single query to get all points
|
||||
points_by_id = Point.where(id: all_point_ids).index_by(&:id)
|
||||
|
||||
# Build segments with actual Point objects
|
||||
segments_data.map do |seg_data|
|
||||
{
|
||||
points: seg_data[:point_ids].map { |id| points_by_id[id] }.compact,
|
||||
pre_calculated_distance: seg_data[:total_distance_meters],
|
||||
start_timestamp: seg_data[:start_timestamp],
|
||||
end_timestamp: seg_data[:end_timestamp]
|
||||
}
|
||||
end
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
# Parse PostgreSQL array format like "{1,2,3}" into Ruby array
|
||||
def parse_postgres_array(pg_array_string)
|
||||
return [] if pg_array_string.nil? || pg_array_string.empty?
|
||||
|
||||
# Remove curly braces and split by comma
|
||||
pg_array_string.gsub(/[{}]/, '').split(',').map(&:to_i)
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
# Optimized generator using V2 SQL segmentation
|
||||
class OptimizedTracksGeneratorV2
|
||||
attr_reader :user, :start_at, :end_at, :mode
|
||||
|
||||
def initialize(user, start_at: nil, end_at: nil, mode: :bulk)
|
||||
@user = user
|
||||
@start_at = start_at
|
||||
@end_at = end_at
|
||||
@mode = mode.to_sym
|
||||
end
|
||||
|
||||
def call
|
||||
clean_existing_tracks if should_clean_tracks?
|
||||
|
||||
# Get timestamp range for SQL query
|
||||
start_timestamp, end_timestamp = get_timestamp_range
|
||||
|
||||
Rails.logger.debug "OptimizedGeneratorV2: querying points for user #{user.id} in #{mode} mode"
|
||||
|
||||
# V2: Get segments directly from SQL with pre-calculated distances
|
||||
segments = Point.get_segments_with_points(
|
||||
user.id,
|
||||
start_timestamp,
|
||||
end_timestamp,
|
||||
time_threshold_minutes,
|
||||
distance_threshold_meters
|
||||
)
|
||||
|
||||
Rails.logger.debug "OptimizedGeneratorV2: created #{segments.size} segments via SQL"
|
||||
|
||||
tracks_created = 0
|
||||
|
||||
segments.each do |segment_data|
|
||||
track = create_track_from_segment_v2(segment_data)
|
||||
tracks_created += 1 if track
|
||||
end
|
||||
|
||||
Rails.logger.info "Generated #{tracks_created} tracks for user #{user.id} in optimized V2 #{mode} mode"
|
||||
tracks_created
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def create_track_from_segment_v2(segment_data)
|
||||
points = segment_data[:points]
|
||||
pre_calculated_distance = segment_data[:pre_calculated_distance]
|
||||
|
||||
Rails.logger.debug "OptimizedGeneratorV2: processing segment with #{points.size} points"
|
||||
return unless points.size >= 2
|
||||
|
||||
track = Track.new(
|
||||
user_id: user.id,
|
||||
start_at: Time.zone.at(points.first.timestamp),
|
||||
end_at: Time.zone.at(points.last.timestamp),
|
||||
original_path: build_path(points)
|
||||
)
|
||||
|
||||
# V2: Use pre-calculated distance from SQL
|
||||
track.distance = pre_calculated_distance.round
|
||||
track.duration = calculate_duration(points)
|
||||
track.avg_speed = calculate_average_speed(track.distance, track.duration)
|
||||
|
||||
# Calculate elevation statistics (no DB queries needed)
|
||||
elevation_stats = calculate_elevation_stats(points)
|
||||
track.elevation_gain = elevation_stats[:gain]
|
||||
track.elevation_loss = elevation_stats[:loss]
|
||||
track.elevation_max = elevation_stats[:max]
|
||||
track.elevation_min = elevation_stats[:min]
|
||||
|
||||
if track.save
|
||||
Point.where(id: points.map(&:id)).update_all(track_id: track.id)
|
||||
track
|
||||
else
|
||||
Rails.logger.error "Failed to create track for user #{user.id}: #{track.errors.full_messages.join(', ')}"
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def get_timestamp_range
|
||||
case mode
|
||||
when :bulk
|
||||
if start_at && end_at
|
||||
[start_at.to_i, end_at.to_i]
|
||||
else
|
||||
# Get full range for user
|
||||
first_point = user.tracked_points.order(:timestamp).first
|
||||
last_point = user.tracked_points.order(:timestamp).last
|
||||
[first_point&.timestamp || 0, last_point&.timestamp || Time.current.to_i]
|
||||
end
|
||||
when :daily
|
||||
day = start_at&.to_date || Date.current
|
||||
[day.beginning_of_day.to_i, day.end_of_day.to_i]
|
||||
when :incremental
|
||||
# For incremental, we need all untracked points up to end_at
|
||||
first_point = user.tracked_points.where(track_id: nil).order(:timestamp).first
|
||||
end_timestamp = end_at ? end_at.to_i : Time.current.to_i
|
||||
[first_point&.timestamp || 0, end_timestamp]
|
||||
end
|
||||
end
|
||||
|
||||
def should_clean_tracks?
|
||||
case mode
|
||||
when :bulk, :daily then true
|
||||
else false
|
||||
end
|
||||
end
|
||||
|
||||
def clean_existing_tracks
|
||||
case mode
|
||||
when :bulk
|
||||
scope = user.tracks
|
||||
if start_at && end_at
|
||||
scope = scope.where(start_at: start_at..end_at)
|
||||
end
|
||||
scope.destroy_all
|
||||
when :daily
|
||||
day = start_at&.to_date || Date.current
|
||||
range = day.beginning_of_day..day.end_of_day
|
||||
user.tracks.where(start_at: range).destroy_all
|
||||
end
|
||||
end
|
||||
|
||||
# Helper methods (same as original)
|
||||
def build_path(points)
|
||||
Tracks::BuildPath.new(points).call
|
||||
end
|
||||
|
||||
def calculate_duration(points)
|
||||
points.last.timestamp - points.first.timestamp
|
||||
end
|
||||
|
||||
def calculate_average_speed(distance_in_meters, duration_seconds)
|
||||
return 0.0 if duration_seconds <= 0 || distance_in_meters <= 0
|
||||
|
||||
speed_mps = distance_in_meters.to_f / duration_seconds
|
||||
(speed_mps * 3.6).round(2) # m/s to km/h
|
||||
end
|
||||
|
||||
def calculate_elevation_stats(points)
|
||||
altitudes = points.map(&:altitude).compact
|
||||
return { gain: 0, loss: 0, max: 0, min: 0 } if altitudes.empty?
|
||||
|
||||
elevation_gain = 0
|
||||
elevation_loss = 0
|
||||
previous_altitude = altitudes.first
|
||||
|
||||
altitudes[1..].each do |altitude|
|
||||
diff = altitude - previous_altitude
|
||||
if diff > 0
|
||||
elevation_gain += diff
|
||||
else
|
||||
elevation_loss += diff.abs
|
||||
end
|
||||
previous_altitude = altitude
|
||||
end
|
||||
|
||||
{
|
||||
gain: elevation_gain.round,
|
||||
loss: elevation_loss.round,
|
||||
max: altitudes.max,
|
||||
min: altitudes.min
|
||||
}
|
||||
end
|
||||
|
||||
def distance_threshold_meters
|
||||
@distance_threshold_meters ||= user.safe_settings.meters_between_routes.to_i
|
||||
end
|
||||
|
||||
def time_threshold_minutes
|
||||
@time_threshold_minutes ||= user.safe_settings.minutes_between_routes.to_i
|
||||
end
|
||||
end
|
||||
|
||||
# Add methods to Point class
|
||||
class Point
|
||||
extend OptimizedTracksV2::ClassMethods
|
||||
end
|
||||
122
lib/results.md
Normal file
122
lib/results.md
Normal file
|
|
@ -0,0 +1,122 @@
|
|||
## Original
|
||||
|
||||
Generator: created track 227296
|
||||
Generated 1437 tracks for user 1 in bulk mode
|
||||
✅ Generation completed successfully
|
||||
|
||||
============================================================
|
||||
📊 BENCHMARK RESULTS
|
||||
============================================================
|
||||
Status: ✅ SUCCESS
|
||||
Execution Time: 1m 28.5s
|
||||
Tracks Created: 1437
|
||||
Timeframe Coverage: 8.0% of user's total data
|
||||
|
||||
💾 Memory Usage:
|
||||
Start: 210.9MB
|
||||
End: 433.2MB
|
||||
Memory Increase: +222.3MB
|
||||
|
||||
🗄️ Database Performance:
|
||||
Total Queries: 115920
|
||||
Total Query Time: 50453.1ms
|
||||
Average Query Time: 0.44ms
|
||||
Slow Queries (>100ms): 63
|
||||
1. 983.24ms - SELECT COUNT(*) FROM "points" WHERE "points"."user_id" = $1 AND "points"."timestamp" BETWEEN $2 A...
|
||||
2. 2826.02ms - SELECT "points".* FROM "points" WHERE "points"."user_id" = $1 AND "points"."timestamp" BETWEEN $2...
|
||||
3. 217.02ms - UPDATE "points" SET "track_id" = $1 WHERE "points"."id" IN ($2, $3, $4, $5, $6, $7, $8, $9, $10, ...
|
||||
|
||||
✔️ Post-Generation Validation:
|
||||
Points in Timeframe: 111609
|
||||
Points with Tracks: 110167
|
||||
Points without Tracks: 1442
|
||||
Track Records: 1437
|
||||
✅ Data integrity: PASS
|
||||
|
||||
🔍 Performance Analysis:
|
||||
Speed Rating: 🚀 Excellent (1m 28.5s)
|
||||
Memory Rating: 🧡 High (433.2MB peak)
|
||||
Recommendation: Consider database optimization or smaller batch sizes
|
||||
|
||||
🔮 Extrapolation for Full Dataset:
|
||||
Full Dataset Size: 1,403,662 points
|
||||
Scaling Factor: 12.6x
|
||||
Estimated Full Time: 18m 32.8s
|
||||
Estimated Full Memory: 5447.6MB
|
||||
|
||||
============================================================
|
||||
📋 BENCHMARK SUMMARY
|
||||
============================================================
|
||||
⏱️ Total Time: 1m 28.5s
|
||||
📍 Points Processed: 111,609
|
||||
🛤️ Tracks Created: 1437
|
||||
🚀 Processing Speed: 1261.4 points/second
|
||||
📅 Timeframe: 2024-01-01 to 2024-12-31
|
||||
👤 User: demo@dawarich.app (ID: 1)
|
||||
✅ Status: COMPLETED
|
||||
|
||||
|
||||
## Iteration 1
|
||||
|
||||
Generator: created track 244784
|
||||
Generated 1435 tracks for user 1 in optimized bulk mode
|
||||
✅ Generation completed successfully
|
||||
|
||||
============================================================
|
||||
📊 BENCHMARK RESULTS
|
||||
============================================================
|
||||
Status: ✅ SUCCESS
|
||||
Execution Time: 56.4s
|
||||
Tracks Created: 1435
|
||||
Points Processed: 111,609
|
||||
Processing Speed: 1978.3 points/second
|
||||
Average Points/Track: 77.8
|
||||
Timeframe Coverage: 8.0% of user's total data
|
||||
|
||||
💾 Memory Usage:
|
||||
Start: 297.2MB
|
||||
End: 407.5MB
|
||||
Memory Increase: +110.3MB
|
||||
|
||||
🗄️ Database Performance:
|
||||
Total Queries: 7178
|
||||
Total Query Time: 44521.33ms
|
||||
Average Query Time: 6.2ms
|
||||
Slow Queries (>100ms): 88
|
||||
1. 2338.43ms - WITH points_with_gaps AS (
|
||||
SELECT
|
||||
id,
|
||||
timestamp,
|
||||
lonlat,
|
||||
LAG(lonlat) OVER (ORDE...
|
||||
2. 4156.84ms - SELECT "points".* FROM "points" WHERE "points"."id" IN (2163775, 2163776, 2163777, 2163778, 21637...
|
||||
3. 298.62ms - UPDATE "points" SET "track_id" = $1 WHERE "points"."id" IN ($2, $3, $4, $5, $6, $7, $8, $9, $10, ...
|
||||
|
||||
✔️ Post-Generation Validation:
|
||||
Points in Timeframe: 111609
|
||||
Points with Tracks: 110123
|
||||
Points without Tracks: 1486
|
||||
Track Records: 1435
|
||||
✅ Data integrity: PASS
|
||||
|
||||
🔍 Performance Analysis:
|
||||
Speed Rating: 🚀 Excellent (56.4s)
|
||||
Memory Rating: 🧡 High (407.5MB peak)
|
||||
Recommendation: Consider database optimization or smaller batch sizes
|
||||
|
||||
🔮 Extrapolation for Full Dataset:
|
||||
Full Dataset Size: 1,403,662 points
|
||||
Scaling Factor: 12.6x
|
||||
Estimated Full Time: 11m 49.5s
|
||||
Estimated Full Memory: 5125.0MB
|
||||
|
||||
============================================================
|
||||
📋 BENCHMARK SUMMARY
|
||||
============================================================
|
||||
⏱️ Total Time: 56.4s
|
||||
📍 Points Processed: 111,609
|
||||
🛤️ Tracks Created: 1435
|
||||
🚀 Processing Speed: 1978.3 points/second
|
||||
📅 Timeframe: 2024-01-01 to 2024-12-31
|
||||
👤 User: demo@dawarich.app (ID: 1)
|
||||
✅ Status: COMPLETED
|
||||
|
|
@ -1,7 +1,6 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
module Timestamps
|
||||
|
||||
def self.parse_timestamp(timestamp)
|
||||
begin
|
||||
# if the timestamp is in ISO 8601 format, try to parse it
|
||||
|
|
|
|||
625
lib/tracks_optimization_benchmark.rb
Normal file
625
lib/tracks_optimization_benchmark.rb
Normal file
|
|
@ -0,0 +1,625 @@
|
|||
# frozen_string_literal: true
|
||||
|
||||
require_relative 'optimized_tracks_v1'
|
||||
require_relative 'optimized_tracks_v2'
|
||||
|
||||
# Benchmark script to compare three different track generation approaches:
|
||||
# - Original: Individual distance queries (current implementation)
|
||||
# - V1: LAG-based distance pre-calculation with Ruby segmentation
|
||||
# - V2: Full SQL segmentation with PostgreSQL window functions
|
||||
#
|
||||
# Usage:
|
||||
# rails runner lib/tracks_optimization_benchmark.rb USER_ID START_DATE END_DATE
|
||||
|
||||
class TracksOptimizationBenchmark
|
||||
attr_reader :user, :start_date, :end_date, :start_timestamp, :end_timestamp
|
||||
|
||||
def initialize(user_id, start_date, end_date)
|
||||
@user = User.find(user_id)
|
||||
@start_date = Date.parse(start_date)
|
||||
@end_date = Date.parse(end_date)
|
||||
@start_timestamp = @start_date.beginning_of_day.to_i
|
||||
@end_timestamp = @end_date.end_of_day.to_i
|
||||
|
||||
puts "🔬 Track Generation Optimization Benchmark"
|
||||
puts "👤 User: #{user.email} (ID: #{user.id})"
|
||||
puts "📅 Timeframe: #{start_date} to #{end_date}"
|
||||
|
||||
check_data_availability
|
||||
end
|
||||
|
||||
def run_all_benchmarks
|
||||
results = {}
|
||||
|
||||
puts "\n" + "=" * 80
|
||||
puts "🏃 RUNNING ALL BENCHMARKS"
|
||||
puts "=" * 80
|
||||
|
||||
# Test Original approach
|
||||
puts "\n1️⃣ Testing ORIGINAL approach..."
|
||||
results[:original] = benchmark_original
|
||||
|
||||
# Test V1 approach
|
||||
puts "\n2️⃣ Testing V1 (LAG + Ruby) approach..."
|
||||
results[:v1] = benchmark_v1
|
||||
|
||||
# Test V2 approach
|
||||
puts "\n3️⃣ Testing V2 (Full SQL) approach..."
|
||||
results[:v2] = benchmark_v2
|
||||
|
||||
# Compare results
|
||||
puts "\n" + "=" * 80
|
||||
puts "📊 PERFORMANCE COMPARISON"
|
||||
puts "=" * 80
|
||||
compare_results(results)
|
||||
|
||||
# Save results to files
|
||||
save_results_to_files(results)
|
||||
|
||||
results
|
||||
end
|
||||
|
||||
private
|
||||
|
||||
def check_data_availability
|
||||
point_count = user.tracked_points.where(timestamp: start_timestamp..end_timestamp).count
|
||||
existing_tracks = user.tracks.where(start_at: Time.zone.at(start_timestamp)..Time.zone.at(end_timestamp)).count
|
||||
|
||||
puts "📊 Dataset: #{point_count.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse} points"
|
||||
puts "🛤️ Existing tracks: #{existing_tracks}"
|
||||
|
||||
if point_count == 0
|
||||
puts "❌ No points found in timeframe"
|
||||
exit 1
|
||||
end
|
||||
|
||||
if point_count > 50000
|
||||
puts "⚠️ Large dataset detected. This benchmark may take a while..."
|
||||
end
|
||||
end
|
||||
|
||||
def benchmark_original
|
||||
puts " Using standard Tracks::Generator..."
|
||||
|
||||
# Clean existing tracks
|
||||
cleanup_tracks
|
||||
|
||||
# Monitor performance
|
||||
memory_start = get_memory_mb
|
||||
query_monitor = QueryMonitor.new
|
||||
query_monitor.start
|
||||
|
||||
start_time = Time.current
|
||||
|
||||
begin
|
||||
generator = Tracks::Generator.new(
|
||||
user,
|
||||
start_at: Time.zone.at(start_timestamp),
|
||||
end_at: Time.zone.at(end_timestamp),
|
||||
mode: :bulk
|
||||
)
|
||||
tracks_created = generator.call
|
||||
success = true
|
||||
rescue => e
|
||||
success = false
|
||||
error = e.message
|
||||
tracks_created = 0
|
||||
end
|
||||
|
||||
end_time = Time.current
|
||||
memory_end = get_memory_mb
|
||||
query_monitor.stop
|
||||
|
||||
execution_time = end_time - start_time
|
||||
|
||||
result = {
|
||||
approach: "Original",
|
||||
success: success,
|
||||
error: error,
|
||||
execution_time: execution_time,
|
||||
tracks_created: tracks_created,
|
||||
memory_increase: memory_end - memory_start,
|
||||
query_count: query_monitor.query_count,
|
||||
query_time_ms: query_monitor.total_time_ms
|
||||
}
|
||||
|
||||
print_result(result)
|
||||
result
|
||||
end
|
||||
|
||||
def benchmark_v1
|
||||
puts " Using V1: LAG + Ruby segmentation..."
|
||||
|
||||
# Clean existing tracks
|
||||
cleanup_tracks
|
||||
|
||||
# For V1, we need to modify the existing generator to use our optimized methods
|
||||
# This is a simplified test - in practice we'd modify the actual generator
|
||||
|
||||
memory_start = get_memory_mb
|
||||
query_monitor = QueryMonitor.new
|
||||
query_monitor.start
|
||||
|
||||
start_time = Time.current
|
||||
|
||||
begin
|
||||
# Load points
|
||||
points = user.tracked_points
|
||||
.where(timestamp: start_timestamp..end_timestamp)
|
||||
.order(:timestamp)
|
||||
|
||||
# V1: Use optimized segmentation with pre-calculated distances
|
||||
if points.size > 1
|
||||
distance_data = Point.calculate_all_consecutive_distances(points)
|
||||
else
|
||||
distance_data = {}
|
||||
end
|
||||
|
||||
# Segment using V1 approach (simplified for benchmark)
|
||||
segments = split_points_with_precalculated_distances(points, distance_data)
|
||||
|
||||
tracks_created = 0
|
||||
segments.each do |segment|
|
||||
if segment.size >= 2
|
||||
track = create_track_v1(segment)
|
||||
tracks_created += 1 if track
|
||||
end
|
||||
end
|
||||
|
||||
success = true
|
||||
rescue => e
|
||||
success = false
|
||||
error = e.message
|
||||
tracks_created = 0
|
||||
end
|
||||
|
||||
end_time = Time.current
|
||||
memory_end = get_memory_mb
|
||||
query_monitor.stop
|
||||
|
||||
execution_time = end_time - start_time
|
||||
|
||||
result = {
|
||||
approach: "V1 (LAG + Ruby)",
|
||||
success: success,
|
||||
error: error,
|
||||
execution_time: execution_time,
|
||||
tracks_created: tracks_created,
|
||||
memory_increase: memory_end - memory_start,
|
||||
query_count: query_monitor.query_count,
|
||||
query_time_ms: query_monitor.total_time_ms
|
||||
}
|
||||
|
||||
print_result(result)
|
||||
result
|
||||
end
|
||||
|
||||
def benchmark_v2
|
||||
puts " Using V2: Full SQL segmentation..."
|
||||
|
||||
cleanup_tracks
|
||||
|
||||
memory_start = get_memory_mb
|
||||
query_monitor = QueryMonitor.new
|
||||
query_monitor.start
|
||||
|
||||
start_time = Time.current
|
||||
|
||||
begin
|
||||
generator = OptimizedTracksGeneratorV2.new(
|
||||
user,
|
||||
start_at: Time.zone.at(start_timestamp),
|
||||
end_at: Time.zone.at(end_timestamp),
|
||||
mode: :bulk
|
||||
)
|
||||
tracks_created = generator.call
|
||||
success = true
|
||||
rescue => e
|
||||
success = false
|
||||
error = e.message
|
||||
tracks_created = 0
|
||||
end
|
||||
|
||||
end_time = Time.current
|
||||
memory_end = get_memory_mb
|
||||
query_monitor.stop
|
||||
|
||||
execution_time = end_time - start_time
|
||||
|
||||
result = {
|
||||
approach: "V2 (Full SQL)",
|
||||
success: success,
|
||||
error: error,
|
||||
execution_time: execution_time,
|
||||
tracks_created: tracks_created,
|
||||
memory_increase: memory_end - memory_start,
|
||||
query_count: query_monitor.query_count,
|
||||
query_time_ms: query_monitor.total_time_ms
|
||||
}
|
||||
|
||||
print_result(result)
|
||||
result
|
||||
end
|
||||
|
||||
def split_points_with_precalculated_distances(points, distance_data)
|
||||
return [] if points.empty?
|
||||
|
||||
segments = []
|
||||
current_segment = []
|
||||
|
||||
points.each do |point|
|
||||
if current_segment.empty?
|
||||
current_segment = [point]
|
||||
elsif should_break_segment_v1?(point, current_segment.last, distance_data)
|
||||
segments << current_segment if current_segment.size >= 2
|
||||
current_segment = [point]
|
||||
else
|
||||
current_segment << point
|
||||
end
|
||||
end
|
||||
|
||||
segments << current_segment if current_segment.size >= 2
|
||||
segments
|
||||
end
|
||||
|
||||
def should_break_segment_v1?(current_point, previous_point, distance_data)
|
||||
return false if previous_point.nil?
|
||||
|
||||
point_data = distance_data[current_point.id]
|
||||
return false unless point_data
|
||||
|
||||
time_threshold_seconds = user.safe_settings.minutes_between_routes.to_i * 60
|
||||
distance_threshold_meters = user.safe_settings.meters_between_routes.to_i
|
||||
|
||||
return true if point_data[:time_diff_seconds] > time_threshold_seconds
|
||||
return true if point_data[:distance_meters] > distance_threshold_meters
|
||||
|
||||
false
|
||||
end
|
||||
|
||||
def create_track_v1(points)
|
||||
return nil if points.size < 2
|
||||
|
||||
track = Track.new(
|
||||
user_id: user.id,
|
||||
start_at: Time.zone.at(points.first.timestamp),
|
||||
end_at: Time.zone.at(points.last.timestamp),
|
||||
original_path: build_path(points)
|
||||
)
|
||||
|
||||
# Use LAG-based distance calculation
|
||||
track.distance = Point.total_distance_lag(points, :m).round
|
||||
track.duration = points.last.timestamp - points.first.timestamp
|
||||
track.avg_speed = calculate_average_speed(track.distance, track.duration)
|
||||
|
||||
# Elevation stats (same as original)
|
||||
elevation_stats = calculate_elevation_stats(points)
|
||||
track.elevation_gain = elevation_stats[:gain]
|
||||
track.elevation_loss = elevation_stats[:loss]
|
||||
track.elevation_max = elevation_stats[:max]
|
||||
track.elevation_min = elevation_stats[:min]
|
||||
|
||||
if track.save
|
||||
Point.where(id: points.map(&:id)).update_all(track_id: track.id)
|
||||
track
|
||||
else
|
||||
nil
|
||||
end
|
||||
end
|
||||
|
||||
def cleanup_tracks
|
||||
user.tracks.where(start_at: Time.zone.at(start_timestamp)..Time.zone.at(end_timestamp)).destroy_all
|
||||
end
|
||||
|
||||
def print_result(result)
|
||||
status = result[:success] ? "✅ SUCCESS" : "❌ FAILED"
|
||||
puts " #{status}"
|
||||
puts " ⏱️ Time: #{format_duration(result[:execution_time])}"
|
||||
puts " 🛤️ Tracks: #{result[:tracks_created]}"
|
||||
puts " 💾 Memory: +#{result[:memory_increase].round(1)}MB"
|
||||
puts " 🗄️ Queries: #{result[:query_count]} (#{result[:query_time_ms].round(1)}ms)"
|
||||
puts " ❌ Error: #{result[:error]}" if result[:error]
|
||||
end
|
||||
|
||||
def compare_results(results)
|
||||
return unless results[:original] && results[:v1] && results[:v2]
|
||||
|
||||
puts sprintf("%-20s %-10s %-12s %-10s %-15s %-10s",
|
||||
"Approach", "Time", "Tracks", "Memory", "Queries", "Query Time")
|
||||
puts "-" * 80
|
||||
|
||||
[:original, :v1, :v2].each do |approach|
|
||||
result = results[approach]
|
||||
next unless result[:success]
|
||||
|
||||
puts sprintf("%-20s %-10s %-12s %-10s %-15s %-10s",
|
||||
result[:approach],
|
||||
format_duration(result[:execution_time]),
|
||||
result[:tracks_created],
|
||||
"+#{result[:memory_increase].round(1)}MB",
|
||||
result[:query_count],
|
||||
"#{result[:query_time_ms].round(1)}ms")
|
||||
end
|
||||
|
||||
# Calculate improvements
|
||||
if results[:original][:success]
|
||||
original_time = results[:original][:execution_time]
|
||||
original_queries = results[:original][:query_count]
|
||||
|
||||
puts "\n🚀 Performance Improvements vs Original:"
|
||||
|
||||
if results[:v1][:success]
|
||||
v1_speedup = (original_time / results[:v1][:execution_time]).round(2)
|
||||
v1_query_reduction = ((original_queries - results[:v1][:query_count]) / original_queries.to_f * 100).round(1)
|
||||
puts " V1: #{v1_speedup}x faster, #{v1_query_reduction}% fewer queries"
|
||||
end
|
||||
|
||||
if results[:v2][:success]
|
||||
v2_speedup = (original_time / results[:v2][:execution_time]).round(2)
|
||||
v2_query_reduction = ((original_queries - results[:v2][:query_count]) / original_queries.to_f * 100).round(1)
|
||||
puts " V2: #{v2_speedup}x faster, #{v2_query_reduction}% fewer queries"
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
def save_results_to_files(results)
|
||||
timestamp = Time.current.strftime('%Y%m%d_%H%M%S')
|
||||
point_count = user.tracked_points.where(timestamp: start_timestamp..end_timestamp).count
|
||||
|
||||
# Create detailed results structure
|
||||
benchmark_data = {
|
||||
meta: {
|
||||
timestamp: Time.current.iso8601,
|
||||
user_id: user.id,
|
||||
user_email: user.email,
|
||||
start_date: start_date.strftime('%Y-%m-%d'),
|
||||
end_date: end_date.strftime('%Y-%m-%d'),
|
||||
point_count: point_count,
|
||||
ruby_version: RUBY_VERSION,
|
||||
rails_version: Rails.version,
|
||||
database_adapter: ActiveRecord::Base.connection.adapter_name
|
||||
},
|
||||
results: results,
|
||||
performance_analysis: analyze_performance_data(results)
|
||||
}
|
||||
|
||||
# Save JSON results for programmatic analysis
|
||||
json_filename = "tracks_optimization_#{timestamp}.json"
|
||||
json_path = Rails.root.join('lib', json_filename)
|
||||
File.write(json_path, JSON.pretty_generate(benchmark_data))
|
||||
|
||||
# Save human-readable markdown report
|
||||
md_filename = "tracks_optimization_#{timestamp}.md"
|
||||
md_path = Rails.root.join('lib', md_filename)
|
||||
File.write(md_path, generate_markdown_report(benchmark_data))
|
||||
|
||||
puts "\n💾 Results saved:"
|
||||
puts " 📄 JSON: #{json_path}"
|
||||
puts " 📝 Report: #{md_path}"
|
||||
end
|
||||
|
||||
def analyze_performance_data(results)
|
||||
return {} unless results[:original] && results[:original][:success]
|
||||
|
||||
original = results[:original]
|
||||
analysis = {
|
||||
baseline: {
|
||||
execution_time: original[:execution_time],
|
||||
query_count: original[:query_count],
|
||||
memory_usage: original[:memory_increase]
|
||||
}
|
||||
}
|
||||
|
||||
[:v1, :v2].each do |version|
|
||||
next unless results[version] && results[version][:success]
|
||||
|
||||
result = results[version]
|
||||
analysis[version] = {
|
||||
speedup_factor: (original[:execution_time] / result[:execution_time]).round(2),
|
||||
query_reduction_percent: ((original[:query_count] - result[:query_count]) / original[:query_count].to_f * 100).round(1),
|
||||
memory_change_percent: ((result[:memory_increase] - original[:memory_increase]) / original[:memory_increase].to_f * 100).round(1),
|
||||
execution_time_saved: (original[:execution_time] - result[:execution_time]).round(2)
|
||||
}
|
||||
end
|
||||
|
||||
analysis
|
||||
end
|
||||
|
||||
def generate_markdown_report(benchmark_data)
|
||||
meta = benchmark_data[:meta]
|
||||
results = benchmark_data[:results]
|
||||
analysis = benchmark_data[:performance_analysis]
|
||||
|
||||
report = <<~MD
|
||||
# Tracks Generation Optimization Benchmark Report
|
||||
|
||||
**Generated:** #{meta[:timestamp]}
|
||||
**User:** #{meta[:user_email]} (ID: #{meta[:user_id]})
|
||||
**Timeframe:** #{meta[:start_date]} to #{meta[:end_date]}
|
||||
**Dataset:** #{meta[:point_count].to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse} points
|
||||
**Environment:** Ruby #{meta[:ruby_version]}, Rails #{meta[:rails_version]}, #{meta[:database_adapter]}
|
||||
|
||||
## Summary
|
||||
|
||||
This benchmark compares three approaches to track generation:
|
||||
- **Original:** Individual PostGIS queries for each distance calculation
|
||||
- **V1 (LAG + Ruby):** PostgreSQL LAG for batch distance calculation, Ruby segmentation
|
||||
- **V2 (Full SQL):** Complete segmentation using PostgreSQL window functions
|
||||
|
||||
## Results
|
||||
|
||||
| Approach | Status | Time | Tracks | Memory | Queries | Query Time |
|
||||
|----------|--------|------|--------|--------|---------|------------|
|
||||
MD
|
||||
|
||||
[:original, :v1, :v2].each do |approach|
|
||||
next unless results[approach]
|
||||
|
||||
result = results[approach]
|
||||
status = result[:success] ? "✅" : "❌"
|
||||
|
||||
report += "| #{result[:approach]} | #{status} | #{format_duration(result[:execution_time])} | #{result[:tracks_created]} | +#{result[:memory_increase].round(1)}MB | #{result[:query_count]} | #{result[:query_time_ms].round(1)}ms |\n"
|
||||
end
|
||||
|
||||
if analysis[:v1] || analysis[:v2]
|
||||
report += "\n## Performance Improvements\n\n"
|
||||
|
||||
if analysis[:v1]
|
||||
v1 = analysis[:v1]
|
||||
report += "### V1 (LAG + Ruby) vs Original\n"
|
||||
report += "- **#{v1[:speedup_factor]}x faster** execution\n"
|
||||
report += "- **#{v1[:query_reduction_percent]}% fewer** database queries\n"
|
||||
report += "- **#{format_duration(v1[:execution_time_saved])} time saved**\n"
|
||||
report += "- Memory change: #{v1[:memory_change_percent] > 0 ? '+' : ''}#{v1[:memory_change_percent]}%\n\n"
|
||||
end
|
||||
|
||||
if analysis[:v2]
|
||||
v2 = analysis[:v2]
|
||||
report += "### V2 (Full SQL) vs Original\n"
|
||||
report += "- **#{v2[:speedup_factor]}x faster** execution\n"
|
||||
report += "- **#{v2[:query_reduction_percent]}% fewer** database queries\n"
|
||||
report += "- **#{format_duration(v2[:execution_time_saved])} time saved**\n"
|
||||
report += "- Memory change: #{v2[:memory_change_percent] > 0 ? '+' : ''}#{v2[:memory_change_percent]}%\n\n"
|
||||
end
|
||||
end
|
||||
|
||||
# Add detailed results
|
||||
report += "## Detailed Results\n\n"
|
||||
|
||||
[:original, :v1, :v2].each do |approach|
|
||||
next unless results[approach]
|
||||
|
||||
result = results[approach]
|
||||
report += "### #{result[:approach]}\n\n"
|
||||
|
||||
if result[:success]
|
||||
report += "- ✅ **Status:** Success\n"
|
||||
report += "- ⏱️ **Execution Time:** #{format_duration(result[:execution_time])}\n"
|
||||
report += "- 🛤️ **Tracks Created:** #{result[:tracks_created]}\n"
|
||||
report += "- 💾 **Memory Increase:** +#{result[:memory_increase].round(1)}MB\n"
|
||||
report += "- 🗄️ **Database Queries:** #{result[:query_count]}\n"
|
||||
report += "- ⚡ **Query Time:** #{result[:query_time_ms].round(1)}ms\n"
|
||||
|
||||
if result[:query_count] > 0
|
||||
avg_query_time = (result[:query_time_ms] / result[:query_count]).round(2)
|
||||
report += "- 📊 **Average Query Time:** #{avg_query_time}ms\n"
|
||||
end
|
||||
else
|
||||
report += "- ❌ **Status:** Failed\n"
|
||||
report += "- 🚨 **Error:** #{result[:error]}\n"
|
||||
end
|
||||
|
||||
report += "\n"
|
||||
end
|
||||
|
||||
report += "## Recommendations\n\n"
|
||||
|
||||
if analysis[:v2] && analysis[:v2][:speedup_factor] > analysis.dig(:v1, :speedup_factor).to_f
|
||||
report += "🚀 **V2 (Full SQL)** shows the best performance with #{analysis[:v2][:speedup_factor]}x speedup.\n\n"
|
||||
report += "Benefits:\n"
|
||||
report += "- Minimal database queries (#{results.dig(:v2, :query_count)} vs #{results.dig(:original, :query_count)})\n"
|
||||
report += "- Fastest execution time\n"
|
||||
report += "- Leverages PostgreSQL's optimized window functions\n\n"
|
||||
elsif analysis[:v1]
|
||||
report += "🏃 **V1 (LAG + Ruby)** provides good performance improvements with #{analysis[:v1][:speedup_factor]}x speedup.\n\n"
|
||||
end
|
||||
|
||||
if results[:original] && results[:original][:query_count] > 50000
|
||||
report += "⚠️ **Current implementation** makes excessive database queries (#{results[:original][:query_count]}) for this dataset size.\n\n"
|
||||
end
|
||||
|
||||
report += "---\n*Generated by TracksOptimizationBenchmark*"
|
||||
|
||||
report
|
||||
end
|
||||
|
||||
# Helper methods
|
||||
def get_memory_mb
|
||||
`ps -o rss= -p #{Process.pid}`.to_i / 1024.0
|
||||
end
|
||||
|
||||
def format_duration(seconds)
|
||||
if seconds < 60
|
||||
"#{seconds.round(1)}s"
|
||||
else
|
||||
minutes = (seconds / 60).floor
|
||||
remaining_seconds = (seconds % 60).round(1)
|
||||
"#{minutes}m #{remaining_seconds}s"
|
||||
end
|
||||
end
|
||||
|
||||
def build_path(points)
|
||||
Tracks::BuildPath.new(points).call
|
||||
end
|
||||
|
||||
def calculate_average_speed(distance_in_meters, duration_seconds)
|
||||
return 0.0 if duration_seconds <= 0 || distance_in_meters <= 0
|
||||
speed_mps = distance_in_meters.to_f / duration_seconds
|
||||
(speed_mps * 3.6).round(2)
|
||||
end
|
||||
|
||||
def calculate_elevation_stats(points)
|
||||
altitudes = points.map(&:altitude).compact
|
||||
return { gain: 0, loss: 0, max: 0, min: 0 } if altitudes.empty?
|
||||
|
||||
elevation_gain = 0
|
||||
elevation_loss = 0
|
||||
previous_altitude = altitudes.first
|
||||
|
||||
altitudes[1..].each do |altitude|
|
||||
diff = altitude - previous_altitude
|
||||
if diff > 0
|
||||
elevation_gain += diff
|
||||
else
|
||||
elevation_loss += diff.abs
|
||||
end
|
||||
previous_altitude = altitude
|
||||
end
|
||||
|
||||
{ gain: elevation_gain.round, loss: elevation_loss.round, max: altitudes.max, min: altitudes.min }
|
||||
end
|
||||
end
|
||||
|
||||
# Simple query monitor for this benchmark
|
||||
class QueryMonitor
|
||||
attr_reader :query_count, :total_time_ms
|
||||
|
||||
def initialize
|
||||
@query_count = 0
|
||||
@total_time_ms = 0
|
||||
end
|
||||
|
||||
def start
|
||||
@subscription = ActiveSupport::Notifications.subscribe('sql.active_record') do |*args|
|
||||
event = ActiveSupport::Notifications::Event.new(*args)
|
||||
next if event.payload[:name]&.include?('SCHEMA')
|
||||
|
||||
@query_count += 1
|
||||
@total_time_ms += event.duration
|
||||
end
|
||||
end
|
||||
|
||||
def stop
|
||||
ActiveSupport::Notifications.unsubscribe(@subscription) if @subscription
|
||||
end
|
||||
end
|
||||
|
||||
# Command line interface
|
||||
if __FILE__ == $0
|
||||
if ARGV.length < 3
|
||||
puts "Usage: rails runner #{__FILE__} USER_ID START_DATE END_DATE"
|
||||
puts ""
|
||||
puts "Example:"
|
||||
puts " rails runner #{__FILE__} 1 2024-01-01 2024-01-31"
|
||||
exit 1
|
||||
end
|
||||
|
||||
user_id = ARGV[0].to_i
|
||||
start_date = ARGV[1]
|
||||
end_date = ARGV[2]
|
||||
|
||||
benchmark = TracksOptimizationBenchmark.new(user_id, start_date, end_date)
|
||||
results = benchmark.run_all_benchmarks
|
||||
|
||||
puts "\n🎉 Benchmark completed! Check results above."
|
||||
end
|
||||
235
tracks_performance_optimization_options.md
Normal file
235
tracks_performance_optimization_options.md
Normal file
|
|
@ -0,0 +1,235 @@
|
|||
# Tracks Feature Performance Optimization Options
|
||||
|
||||
## Current State Analysis
|
||||
|
||||
### Performance Characteristics
|
||||
- **Time Complexity:** O(n log n) where n = number of GPS points
|
||||
- **Memory Usage:** Loads entire dataset into memory (~200-400 bytes per point)
|
||||
- **Processing Mode:** Single-threaded, sequential segmentation
|
||||
- **Database Load:** Multiple PostGIS distance calculations per point pair
|
||||
|
||||
### Performance Estimates (Bulk Mode)
|
||||
| Points | Processing Time | Memory Usage | Database Load |
|
||||
|--------|----------------|--------------|---------------|
|
||||
| 10K | 30-60 seconds | ~50 MB | Low |
|
||||
| 100K | 5-15 minutes | ~200 MB | Medium |
|
||||
| 1M+ | 30-90 minutes | 400+ MB | High |
|
||||
|
||||
### Current Bottlenecks
|
||||
1. **Memory constraints** - Loading all points at once
|
||||
2. **PostGIS distance calculations** - Sequential, not optimized
|
||||
3. **Single-threaded processing** - No parallelization
|
||||
4. **No progress indication** - Users can't track long-running operations
|
||||
|
||||
---
|
||||
|
||||
## Optimization Options
|
||||
|
||||
### Option 1: Enhanced Time-Based Batching
|
||||
**Complexity:** Low | **Impact:** High | **Risk:** Low
|
||||
|
||||
#### Implementation
|
||||
- Extend existing `:daily` mode with configurable batch sizes
|
||||
- Add 1-point overlap between batches to maintain segmentation accuracy
|
||||
- Implement batch-aware progress reporting
|
||||
|
||||
#### Benefits
|
||||
- **Memory reduction:** 90%+ reduction (from 400MB to ~40MB for 1M points)
|
||||
- **Better UX:** Progress indication and cancellation support
|
||||
- **Incremental processing:** Can resume interrupted operations
|
||||
- **Lower DB pressure:** Smaller query result sets
|
||||
|
||||
#### Changes Required
|
||||
```ruby
|
||||
# Enhanced generator with configurable batching
|
||||
Tracks::Generator.new(
|
||||
user,
|
||||
mode: :batched,
|
||||
batch_size: 24.hours,
|
||||
enable_overlap: true
|
||||
).call
|
||||
```
|
||||
|
||||
#### Edge Cases to Handle
|
||||
- Tracks spanning batch boundaries (solved with overlap)
|
||||
- Midnight-crossing tracks in daily mode
|
||||
- Deduplication of overlapping segments
|
||||
|
||||
---
|
||||
|
||||
### Option 2: Spatial Indexing Optimization
|
||||
**Complexity:** Medium | **Impact:** Medium | **Risk:** Low
|
||||
|
||||
#### Implementation
|
||||
- Replace individual PostGIS calls with batch distance calculations
|
||||
- Implement spatial clustering for nearby points before segmentation
|
||||
- Use PostGIS window functions for distance calculations
|
||||
|
||||
#### Benefits
|
||||
- **Faster distance calculations:** Batch operations vs individual queries
|
||||
- **Reduced DB round-trips:** Single query for multiple distance calculations
|
||||
- **Better index utilization:** Leverage existing spatial indexes
|
||||
|
||||
#### Changes Required
|
||||
```sql
|
||||
-- Batch distance calculation approach
|
||||
WITH point_distances AS (
|
||||
SELECT
|
||||
id,
|
||||
timestamp,
|
||||
ST_Distance(
|
||||
lonlat::geography,
|
||||
LAG(lonlat::geography) OVER (ORDER BY timestamp)
|
||||
) as distance_to_previous
|
||||
FROM points
|
||||
WHERE user_id = ?
|
||||
ORDER BY timestamp
|
||||
)
|
||||
SELECT * FROM point_distances WHERE distance_to_previous > ?
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Option 3: Parallel Processing with Worker Pools
|
||||
**Complexity:** High | **Impact:** High | **Risk:** Medium
|
||||
|
||||
#### Implementation
|
||||
- Split large datasets into non-overlapping time ranges
|
||||
- Process multiple batches in parallel using Sidekiq workers
|
||||
- Implement coordination mechanism for dependent segments
|
||||
|
||||
#### Benefits
|
||||
- **Faster processing:** Utilize multiple CPU cores
|
||||
- **Scalable:** Performance scales with worker capacity
|
||||
- **Background processing:** Non-blocking for users
|
||||
|
||||
#### Challenges
|
||||
- **Complex coordination:** Managing dependencies between batches
|
||||
- **Resource competition:** Multiple workers accessing same user's data
|
||||
- **Error handling:** Partial failure scenarios
|
||||
|
||||
#### Architecture
|
||||
```ruby
|
||||
# Parallel processing coordinator
|
||||
class Tracks::ParallelGenerator
|
||||
def call
|
||||
time_ranges = split_into_parallel_ranges
|
||||
|
||||
time_ranges.map do |range|
|
||||
Tracks::BatchProcessorJob.perform_later(user_id, range)
|
||||
end
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Option 4: Incremental Algorithm Enhancement
|
||||
**Complexity:** Medium | **Impact:** Medium | **Risk:** Medium
|
||||
|
||||
#### Implementation
|
||||
- Enhance existing `:incremental` mode with smarter buffering
|
||||
- Implement sliding window approach for active track detection
|
||||
- Add automatic track finalization based on time gaps
|
||||
|
||||
#### Benefits
|
||||
- **Real-time processing:** Process points as they arrive
|
||||
- **Lower memory footprint:** Only active segments in memory
|
||||
- **Better for live tracking:** Immediate track updates
|
||||
|
||||
#### Current Limitations
|
||||
- Existing incremental mode processes untracked points only
|
||||
- No automatic track finalization
|
||||
- Limited to single active track per user
|
||||
|
||||
---
|
||||
|
||||
### Option 5: Database-Level Optimization
|
||||
**Complexity:** Low-Medium | **Impact:** Medium | **Risk:** Low
|
||||
|
||||
#### Implementation
|
||||
- Add composite indexes for common query patterns
|
||||
- Implement materialized views for expensive calculations
|
||||
- Use database-level segmentation logic
|
||||
|
||||
#### Benefits
|
||||
- **Faster queries:** Better index utilization
|
||||
- **Reduced Ruby processing:** Move logic to database
|
||||
- **Consistent performance:** Database optimizations benefit all modes
|
||||
|
||||
#### Proposed Indexes
|
||||
```sql
|
||||
-- Optimized for bulk processing
|
||||
CREATE INDEX CONCURRENTLY idx_points_user_timestamp_track
|
||||
ON points(user_id, timestamp) WHERE track_id IS NULL;
|
||||
|
||||
-- Optimized for incremental processing
|
||||
CREATE INDEX CONCURRENTLY idx_points_untracked_timestamp
|
||||
ON points(timestamp) WHERE track_id IS NULL;
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Recommended Implementation Strategy
|
||||
|
||||
### Phase 1: Quick Wins (Week 1-2)
|
||||
1. **Implement Enhanced Time-Based Batching** (Option 1)
|
||||
- Extend existing daily mode with overlap
|
||||
- Add progress reporting
|
||||
- Configurable batch sizes
|
||||
|
||||
### Phase 2: Database Optimization (Week 3)
|
||||
2. **Add Database-Level Optimizations** (Option 5)
|
||||
- Create optimized indexes
|
||||
- Implement batch distance calculations
|
||||
|
||||
### Phase 3: Advanced Features (Week 4-6)
|
||||
3. **Spatial Indexing Optimization** (Option 2)
|
||||
- Replace individual distance calculations
|
||||
- Implement spatial clustering
|
||||
|
||||
### Phase 4: Future Enhancements
|
||||
4. **Parallel Processing** (Option 3) - Consider for v2
|
||||
5. **Incremental Enhancement** (Option 4) - For real-time features
|
||||
|
||||
---
|
||||
|
||||
## Risk Assessment
|
||||
|
||||
### Low Risk
|
||||
- **Time-based batching:** Builds on existing daily mode
|
||||
- **Database indexes:** Standard optimization technique
|
||||
- **Progress reporting:** UI enhancement only
|
||||
|
||||
### Medium Risk
|
||||
- **Spatial optimization:** Requires careful testing of distance calculations
|
||||
- **Incremental enhancement:** Changes to existing algorithm logic
|
||||
|
||||
### High Risk
|
||||
- **Parallel processing:** Complex coordination, potential race conditions
|
||||
- **Major algorithm changes:** Could introduce segmentation bugs
|
||||
|
||||
---
|
||||
|
||||
## Success Metrics
|
||||
|
||||
### Performance Targets
|
||||
- **Memory usage:** < 100MB for datasets up to 1M points
|
||||
- **Processing time:** < 10 minutes for 1M points
|
||||
- **User experience:** Progress indication and cancellation
|
||||
|
||||
### Monitoring Points
|
||||
- Database query performance
|
||||
- Memory consumption during processing
|
||||
- User-reported processing times
|
||||
- Track generation accuracy (no regression)
|
||||
|
||||
---
|
||||
|
||||
## Next Steps
|
||||
|
||||
1. **Choose initial approach** based on urgency and resources
|
||||
2. **Create feature branch** for selected optimization
|
||||
3. **Implement comprehensive testing** including edge cases
|
||||
4. **Monitor performance** in staging environment
|
||||
5. **Gradual rollout** with feature flags
|
||||
Loading…
Reference in a new issue