mirror of
https://github.com/Freika/dawarich.git
synced 2026-01-11 09:41:40 -05:00
Update track generation
This commit is contained in:
parent
4044e77fcd
commit
d8033a1e27
17 changed files with 8103 additions and 29 deletions
7
Gemfile
7
Gemfile
|
|
@ -52,6 +52,7 @@ gem 'jwt'
|
||||||
|
|
||||||
group :development, :test do
|
group :development, :test do
|
||||||
gem 'brakeman', require: false
|
gem 'brakeman', require: false
|
||||||
|
gem 'bullet'
|
||||||
gem 'bundler-audit', require: false
|
gem 'bundler-audit', require: false
|
||||||
gem 'debug', platforms: %i[mri mingw x64_mingw]
|
gem 'debug', platforms: %i[mri mingw x64_mingw]
|
||||||
gem 'dotenv-rails'
|
gem 'dotenv-rails'
|
||||||
|
|
@ -78,3 +79,9 @@ group :development do
|
||||||
gem 'foreman'
|
gem 'foreman'
|
||||||
gem 'rubocop-rails', require: false
|
gem 'rubocop-rails', require: false
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# group :production do
|
||||||
|
# gem 'uglifier'
|
||||||
|
# end
|
||||||
|
|
||||||
|
# gem 'sassc-rails'
|
||||||
|
|
|
||||||
|
|
@ -113,6 +113,9 @@ GEM
|
||||||
brakeman (7.0.2)
|
brakeman (7.0.2)
|
||||||
racc
|
racc
|
||||||
builder (3.3.0)
|
builder (3.3.0)
|
||||||
|
bullet (8.0.8)
|
||||||
|
activesupport (>= 3.0.0)
|
||||||
|
uniform_notifier (~> 1.11)
|
||||||
bundler-audit (0.9.2)
|
bundler-audit (0.9.2)
|
||||||
bundler (>= 1.2.0, < 3)
|
bundler (>= 1.2.0, < 3)
|
||||||
thor (~> 1.0)
|
thor (~> 1.0)
|
||||||
|
|
@ -486,6 +489,7 @@ GEM
|
||||||
unicode-display_width (3.1.4)
|
unicode-display_width (3.1.4)
|
||||||
unicode-emoji (~> 4.0, >= 4.0.4)
|
unicode-emoji (~> 4.0, >= 4.0.4)
|
||||||
unicode-emoji (4.0.4)
|
unicode-emoji (4.0.4)
|
||||||
|
uniform_notifier (1.17.0)
|
||||||
uri (1.0.3)
|
uri (1.0.3)
|
||||||
useragent (0.16.11)
|
useragent (0.16.11)
|
||||||
warden (1.2.9)
|
warden (1.2.9)
|
||||||
|
|
@ -519,6 +523,7 @@ DEPENDENCIES
|
||||||
aws-sdk-s3 (~> 1.177.0)
|
aws-sdk-s3 (~> 1.177.0)
|
||||||
bootsnap
|
bootsnap
|
||||||
brakeman
|
brakeman
|
||||||
|
bullet
|
||||||
bundler-audit
|
bundler-audit
|
||||||
capybara
|
capybara
|
||||||
chartkick
|
chartkick
|
||||||
|
|
|
||||||
File diff suppressed because one or more lines are too long
|
|
@ -50,16 +50,43 @@ module Distanceable
|
||||||
|
|
||||||
return 0 if points.length < 2
|
return 0 if points.length < 2
|
||||||
|
|
||||||
total_meters = points.each_cons(2).sum do |point1, point2|
|
# OPTIMIZED: Single SQL query instead of N individual queries
|
||||||
connection.select_value(
|
total_meters = calculate_batch_distances(points).sum
|
||||||
'SELECT ST_Distance(ST_GeomFromEWKT($1)::geography, ST_GeomFromEWKT($2)::geography)',
|
|
||||||
nil,
|
|
||||||
[point1.lonlat, point2.lonlat]
|
|
||||||
)
|
|
||||||
end
|
|
||||||
|
|
||||||
total_meters.to_f / ::DISTANCE_UNITS[unit.to_sym]
|
total_meters.to_f / ::DISTANCE_UNITS[unit.to_sym]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Optimized batch distance calculation using single SQL query
|
||||||
|
def calculate_batch_distances(points)
|
||||||
|
return [] if points.length < 2
|
||||||
|
|
||||||
|
point_pairs = points.each_cons(2).to_a
|
||||||
|
return [] if point_pairs.empty?
|
||||||
|
|
||||||
|
# Create a VALUES clause with all point pairs
|
||||||
|
values_clause = point_pairs.map.with_index do |(p1, p2), index|
|
||||||
|
"(#{index}, ST_GeomFromEWKT('#{p1.lonlat}')::geography, ST_GeomFromEWKT('#{p2.lonlat}')::geography)"
|
||||||
|
end.join(', ')
|
||||||
|
|
||||||
|
# Single query to calculate all distances
|
||||||
|
results = connection.execute(<<-SQL.squish)
|
||||||
|
WITH point_pairs AS (
|
||||||
|
SELECT
|
||||||
|
pair_id,
|
||||||
|
point1,
|
||||||
|
point2
|
||||||
|
FROM (VALUES #{values_clause}) AS t(pair_id, point1, point2)
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
pair_id,
|
||||||
|
ST_Distance(point1, point2) as distance_meters
|
||||||
|
FROM point_pairs
|
||||||
|
ORDER BY pair_id
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Return array of distances in meters
|
||||||
|
results.map { |row| row['distance_meters'].to_f }
|
||||||
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
def distance_to(other_point, unit = :km)
|
def distance_to(other_point, unit = :km)
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,112 @@ class Track < ApplicationRecord
|
||||||
.first
|
.first
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Optimized SQL segmentation using PostgreSQL window functions
|
||||||
|
def self.segment_points_in_sql(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters, untracked_only: false)
|
||||||
|
time_threshold_seconds = time_threshold_minutes * 60
|
||||||
|
|
||||||
|
where_clause = if untracked_only
|
||||||
|
"WHERE user_id = $1 AND timestamp BETWEEN $2 AND $3 AND track_id IS NULL"
|
||||||
|
else
|
||||||
|
"WHERE user_id = $1 AND timestamp BETWEEN $2 AND $3"
|
||||||
|
end
|
||||||
|
|
||||||
|
sql = <<~SQL
|
||||||
|
WITH points_with_gaps AS (
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
timestamp,
|
||||||
|
lonlat,
|
||||||
|
LAG(lonlat) OVER (ORDER BY timestamp) as prev_lonlat,
|
||||||
|
LAG(timestamp) OVER (ORDER BY timestamp) as prev_timestamp,
|
||||||
|
ST_Distance(
|
||||||
|
lonlat::geography,
|
||||||
|
LAG(lonlat) OVER (ORDER BY timestamp)::geography
|
||||||
|
) as distance_meters,
|
||||||
|
(timestamp - LAG(timestamp) OVER (ORDER BY timestamp)) as time_diff_seconds
|
||||||
|
FROM points
|
||||||
|
#{where_clause}
|
||||||
|
ORDER BY timestamp
|
||||||
|
),
|
||||||
|
segment_breaks AS (
|
||||||
|
SELECT *,
|
||||||
|
CASE
|
||||||
|
WHEN prev_lonlat IS NULL THEN 1
|
||||||
|
WHEN time_diff_seconds > $4 THEN 1
|
||||||
|
WHEN distance_meters > $5 THEN 1
|
||||||
|
ELSE 0
|
||||||
|
END as is_break
|
||||||
|
FROM points_with_gaps
|
||||||
|
),
|
||||||
|
segments AS (
|
||||||
|
SELECT *,
|
||||||
|
SUM(is_break) OVER (ORDER BY timestamp ROWS UNBOUNDED PRECEDING) as segment_id
|
||||||
|
FROM segment_breaks
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
segment_id,
|
||||||
|
array_agg(id ORDER BY timestamp) as point_ids,
|
||||||
|
count(*) as point_count,
|
||||||
|
min(timestamp) as start_timestamp,
|
||||||
|
max(timestamp) as end_timestamp,
|
||||||
|
sum(COALESCE(distance_meters, 0)) as total_distance_meters
|
||||||
|
FROM segments
|
||||||
|
GROUP BY segment_id
|
||||||
|
HAVING count(*) >= 2
|
||||||
|
ORDER BY segment_id
|
||||||
|
SQL
|
||||||
|
|
||||||
|
results = Point.connection.exec_query(
|
||||||
|
sql,
|
||||||
|
'segment_points_in_sql',
|
||||||
|
[user_id, start_timestamp, end_timestamp, time_threshold_seconds, distance_threshold_meters]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert results to segment data
|
||||||
|
segments_data = []
|
||||||
|
results.each do |row|
|
||||||
|
segments_data << {
|
||||||
|
segment_id: row['segment_id'].to_i,
|
||||||
|
point_ids: parse_postgres_array(row['point_ids']),
|
||||||
|
point_count: row['point_count'].to_i,
|
||||||
|
start_timestamp: row['start_timestamp'].to_i,
|
||||||
|
end_timestamp: row['end_timestamp'].to_i,
|
||||||
|
total_distance_meters: row['total_distance_meters'].to_f
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
segments_data
|
||||||
|
end
|
||||||
|
|
||||||
|
# Get actual Point objects for each segment with pre-calculated distances
|
||||||
|
def self.get_segments_with_points(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters, untracked_only: false)
|
||||||
|
segments_data = segment_points_in_sql(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters, untracked_only: untracked_only)
|
||||||
|
|
||||||
|
# Get all point IDs we need
|
||||||
|
all_point_ids = segments_data.flat_map { |seg| seg[:point_ids] }
|
||||||
|
|
||||||
|
# Single query to get all points
|
||||||
|
points_by_id = Point.where(id: all_point_ids).index_by(&:id)
|
||||||
|
|
||||||
|
# Build segments with actual Point objects
|
||||||
|
segments_data.map do |seg_data|
|
||||||
|
{
|
||||||
|
points: seg_data[:point_ids].map { |id| points_by_id[id] }.compact,
|
||||||
|
pre_calculated_distance: seg_data[:total_distance_meters],
|
||||||
|
start_timestamp: seg_data[:start_timestamp],
|
||||||
|
end_timestamp: seg_data[:end_timestamp]
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Parse PostgreSQL array format like "{1,2,3}" into Ruby array
|
||||||
|
def self.parse_postgres_array(pg_array_string)
|
||||||
|
return [] if pg_array_string.nil? || pg_array_string.empty?
|
||||||
|
|
||||||
|
# Remove curly braces and split by comma
|
||||||
|
pg_array_string.gsub(/[{}]/, '').split(',').map(&:to_i)
|
||||||
|
end
|
||||||
|
|
||||||
private
|
private
|
||||||
|
|
||||||
def broadcast_track_created
|
def broadcast_track_created
|
||||||
|
|
|
||||||
|
|
@ -40,21 +40,32 @@ class Tracks::Generator
|
||||||
def call
|
def call
|
||||||
clean_existing_tracks if should_clean_tracks?
|
clean_existing_tracks if should_clean_tracks?
|
||||||
|
|
||||||
points = load_points
|
# Get timestamp range for SQL query
|
||||||
Rails.logger.debug "Generator: loaded #{points.size} points for user #{user.id} in #{mode} mode"
|
start_timestamp, end_timestamp = get_timestamp_range
|
||||||
return 0 if points.empty?
|
|
||||||
|
|
||||||
segments = split_points_into_segments(points)
|
Rails.logger.debug "Generator: querying points for user #{user.id} in #{mode} mode"
|
||||||
Rails.logger.debug "Generator: created #{segments.size} segments"
|
|
||||||
|
# Use optimized SQL segmentation with pre-calculated distances
|
||||||
|
untracked_only = (mode == :incremental)
|
||||||
|
segments = Track.get_segments_with_points(
|
||||||
|
user.id,
|
||||||
|
start_timestamp,
|
||||||
|
end_timestamp,
|
||||||
|
time_threshold_minutes,
|
||||||
|
distance_threshold_meters,
|
||||||
|
untracked_only: untracked_only
|
||||||
|
)
|
||||||
|
|
||||||
|
Rails.logger.debug "Generator: created #{segments.size} segments via SQL"
|
||||||
|
|
||||||
tracks_created = 0
|
tracks_created = 0
|
||||||
|
|
||||||
segments.each do |segment|
|
segments.each do |segment_data|
|
||||||
track = create_track_from_segment(segment)
|
track = create_track_from_segment_optimized(segment_data)
|
||||||
tracks_created += 1 if track
|
tracks_created += 1 if track
|
||||||
end
|
end
|
||||||
|
|
||||||
Rails.logger.info "Generated #{tracks_created} tracks for user #{user.id} in #{mode} mode"
|
Rails.logger.info "Generated #{tracks_created} tracks for user #{user.id} in optimized #{mode} mode"
|
||||||
tracks_created
|
tracks_created
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|
@ -99,6 +110,18 @@ class Tracks::Generator
|
||||||
user.tracked_points.where(timestamp: day_range).order(:timestamp)
|
user.tracked_points.where(timestamp: day_range).order(:timestamp)
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def create_track_from_segment_optimized(segment_data)
|
||||||
|
points = segment_data[:points]
|
||||||
|
pre_calculated_distance = segment_data[:pre_calculated_distance]
|
||||||
|
|
||||||
|
Rails.logger.debug "Generator: processing segment with #{points.size} points"
|
||||||
|
return unless points.size >= 2
|
||||||
|
|
||||||
|
track = create_track_from_points_optimized(points, pre_calculated_distance)
|
||||||
|
Rails.logger.debug "Generator: created track #{track&.id}"
|
||||||
|
track
|
||||||
|
end
|
||||||
|
|
||||||
def create_track_from_segment(segment)
|
def create_track_from_segment(segment)
|
||||||
Rails.logger.debug "Generator: processing segment with #{segment.size} points"
|
Rails.logger.debug "Generator: processing segment with #{segment.size} points"
|
||||||
return unless segment.size >= 2
|
return unless segment.size >= 2
|
||||||
|
|
@ -171,6 +194,31 @@ class Tracks::Generator
|
||||||
scope.destroy_all
|
scope.destroy_all
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Get timestamp range for SQL query based on mode
|
||||||
|
def get_timestamp_range
|
||||||
|
case mode
|
||||||
|
when :bulk
|
||||||
|
if start_at && end_at
|
||||||
|
[start_at.to_i, end_at.to_i]
|
||||||
|
else
|
||||||
|
# Get full range for user
|
||||||
|
first_point = user.tracked_points.order(:timestamp).first
|
||||||
|
last_point = user.tracked_points.order(:timestamp).last
|
||||||
|
[first_point&.timestamp || 0, last_point&.timestamp || Time.current.to_i]
|
||||||
|
end
|
||||||
|
when :daily
|
||||||
|
day = start_at&.to_date || Date.current
|
||||||
|
[day.beginning_of_day.to_i, day.end_of_day.to_i]
|
||||||
|
when :incremental
|
||||||
|
# For incremental, we need all untracked points up to end_at
|
||||||
|
first_point = user.tracked_points.where(track_id: nil).order(:timestamp).first
|
||||||
|
end_timestamp = end_at ? end_at.to_i : Time.current.to_i
|
||||||
|
[first_point&.timestamp || 0, end_timestamp]
|
||||||
|
else
|
||||||
|
raise ArgumentError, "Unknown mode: #{mode}"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Threshold methods from safe_settings
|
# Threshold methods from safe_settings
|
||||||
def distance_threshold_meters
|
def distance_threshold_meters
|
||||||
@distance_threshold_meters ||= user.safe_settings.meters_between_routes.to_i
|
@distance_threshold_meters ||= user.safe_settings.meters_between_routes.to_i
|
||||||
|
|
|
||||||
|
|
@ -86,11 +86,15 @@ module Tracks::Segmentation
|
||||||
end
|
end
|
||||||
|
|
||||||
def calculate_km_distance_between_points(point1, point2)
|
def calculate_km_distance_between_points(point1, point2)
|
||||||
lat1, lon1 = point_coordinates(point1)
|
# OPTIMIZED: Use PostGIS for more accurate distance calculation (same as track distance)
|
||||||
lat2, lon2 = point_coordinates(point2)
|
# This maintains consistency with track distance calculations
|
||||||
|
distance_meters = Point.connection.select_value(
|
||||||
|
'SELECT ST_Distance(ST_GeomFromEWKT($1)::geography, ST_GeomFromEWKT($2)::geography)',
|
||||||
|
nil,
|
||||||
|
[point1.lonlat, point2.lonlat]
|
||||||
|
)
|
||||||
|
|
||||||
# Use Geocoder to match behavior with frontend (same library used elsewhere in app)
|
distance_meters.to_f / 1000.0 # Convert meters to kilometers
|
||||||
Geocoder::Calculations.distance_between([lat1, lon1], [lat2, lon2], units: :km)
|
|
||||||
end
|
end
|
||||||
|
|
||||||
def should_finalize_segment?(segment_points, grace_period_minutes = 5)
|
def should_finalize_segment?(segment_points, grace_period_minutes = 5)
|
||||||
|
|
|
||||||
|
|
@ -82,6 +82,38 @@ module Tracks::TrackBuilder
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Optimized version that uses pre-calculated distance from SQL
|
||||||
|
def create_track_from_points_optimized(points, pre_calculated_distance)
|
||||||
|
return nil if points.size < 2
|
||||||
|
|
||||||
|
track = Track.new(
|
||||||
|
user_id: user.id,
|
||||||
|
start_at: Time.zone.at(points.first.timestamp),
|
||||||
|
end_at: Time.zone.at(points.last.timestamp),
|
||||||
|
original_path: build_path(points)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use pre-calculated distance from SQL instead of recalculating
|
||||||
|
track.distance = pre_calculated_distance.round
|
||||||
|
track.duration = calculate_duration(points)
|
||||||
|
track.avg_speed = calculate_average_speed(track.distance, track.duration)
|
||||||
|
|
||||||
|
# Calculate elevation statistics (no DB queries needed)
|
||||||
|
elevation_stats = calculate_elevation_stats(points)
|
||||||
|
track.elevation_gain = elevation_stats[:gain]
|
||||||
|
track.elevation_loss = elevation_stats[:loss]
|
||||||
|
track.elevation_max = elevation_stats[:max]
|
||||||
|
track.elevation_min = elevation_stats[:min]
|
||||||
|
|
||||||
|
if track.save
|
||||||
|
Point.where(id: points.map(&:id)).update_all(track_id: track.id)
|
||||||
|
track
|
||||||
|
else
|
||||||
|
Rails.logger.error "Failed to create track for user #{user.id}: #{track.errors.full_messages.join(', ')}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
def build_path(points)
|
def build_path(points)
|
||||||
Tracks::BuildPath.new(points).call
|
Tracks::BuildPath.new(points).call
|
||||||
end
|
end
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,9 @@
|
||||||
<% end %>
|
<% end %>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<% if current_user.active? %>
|
||||||
<%= link_to 'Update stats', update_all_stats_path, data: { turbo_method: :put }, class: 'btn btn-primary mt-5' %>
|
<%= link_to 'Update stats', update_all_stats_path, data: { turbo_method: :put }, class: 'btn btn-primary mt-5' %>
|
||||||
|
<% end %>
|
||||||
|
|
||||||
<div class="mt-6 grid grid-cols-1 sm:grid-cols-1 md:grid-cols-2 lg:grid-cols-2 gap-6">
|
<div class="mt-6 grid grid-cols-1 sm:grid-cols-1 md:grid-cols-2 lg:grid-cols-2 gap-6">
|
||||||
<% @stats.each do |year, stats| %>
|
<% @stats.each do |year, stats| %>
|
||||||
|
|
@ -33,7 +35,7 @@
|
||||||
<%= link_to '[Map]', map_url(year_timespan(year)), class: 'underline hover:no-underline' %>
|
<%= link_to '[Map]', map_url(year_timespan(year)), class: 'underline hover:no-underline' %>
|
||||||
</div>
|
</div>
|
||||||
<div class="gap-2">
|
<div class="gap-2">
|
||||||
<span class='text-xs text-gray-500'>Last updated: <%= human_date(stats.first.updated_at) %></span>
|
<span class='text-xs text-gray-500'>Last update: <%= human_date(stats.first.updated_at) %></span>
|
||||||
<%= link_to '🔄', update_year_month_stats_path(year, :all), data: { turbo_method: :put }, class: 'text-sm text-gray-500 hover:underline' %>
|
<%= link_to '🔄', update_year_month_stats_path(year, :all), data: { turbo_method: :put }, class: 'text-sm text-gray-500 hover:underline' %>
|
||||||
</div>
|
</div>
|
||||||
</h2>
|
</h2>
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,17 @@
|
||||||
require 'active_support/core_ext/integer/time'
|
require 'active_support/core_ext/integer/time'
|
||||||
|
|
||||||
Rails.application.configure do
|
Rails.application.configure do
|
||||||
|
unless ENV['SELF_HOSTED'] == 'true'
|
||||||
|
config.after_initialize do
|
||||||
|
Bullet.enable = true
|
||||||
|
Bullet.alert = true
|
||||||
|
Bullet.bullet_logger = true
|
||||||
|
Bullet.console = true
|
||||||
|
Bullet.rails_logger = true
|
||||||
|
Bullet.add_footer = true
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
# Settings specified here will take precedence over those in config/application.rb.
|
# Settings specified here will take precedence over those in config/application.rb.
|
||||||
|
|
||||||
# In the development environment your application's code is reloaded any time
|
# In the development environment your application's code is reloaded any time
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,12 @@ require 'active_support/core_ext/integer/time'
|
||||||
# and recreated between test runs. Don't rely on the data there!
|
# and recreated between test runs. Don't rely on the data there!
|
||||||
|
|
||||||
Rails.application.configure do
|
Rails.application.configure do
|
||||||
|
config.after_initialize do
|
||||||
|
Bullet.enable = true
|
||||||
|
Bullet.bullet_logger = true
|
||||||
|
Bullet.raise = true # raise an error if n+1 query occurs
|
||||||
|
end
|
||||||
|
|
||||||
# Settings specified here will take precedence over those in config/application.rb.
|
# Settings specified here will take precedence over those in config/application.rb.
|
||||||
|
|
||||||
# While tests run files are not watched, reloading is not necessary.
|
# While tests run files are not watched, reloading is not necessary.
|
||||||
|
|
|
||||||
145
lib/optimized_tracks_v1.rb
Normal file
145
lib/optimized_tracks_v1.rb
Normal file
|
|
@ -0,0 +1,145 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# Optimization V1: LAG-based distance calculation with Ruby segmentation
|
||||||
|
# This keeps the existing Ruby segmentation logic but uses PostgreSQL LAG
|
||||||
|
# for batch distance calculations instead of individual queries
|
||||||
|
|
||||||
|
module OptimizedTracksV1
|
||||||
|
extend ActiveSupport::Concern
|
||||||
|
|
||||||
|
module ClassMethods
|
||||||
|
# V1: Use LAG to get all consecutive distances in a single query
|
||||||
|
def calculate_all_consecutive_distances(points)
|
||||||
|
return [] if points.length < 2
|
||||||
|
|
||||||
|
point_ids = points.map(&:id).join(',')
|
||||||
|
|
||||||
|
results = connection.execute(<<-SQL.squish)
|
||||||
|
WITH points_with_previous AS (
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
timestamp,
|
||||||
|
lonlat,
|
||||||
|
LAG(lonlat) OVER (ORDER BY timestamp) as prev_lonlat,
|
||||||
|
LAG(timestamp) OVER (ORDER BY timestamp) as prev_timestamp,
|
||||||
|
LAG(id) OVER (ORDER BY timestamp) as prev_id
|
||||||
|
FROM points
|
||||||
|
WHERE id IN (#{point_ids})
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
prev_id,
|
||||||
|
timestamp,
|
||||||
|
prev_timestamp,
|
||||||
|
ST_Distance(lonlat::geography, prev_lonlat::geography) as distance_meters,
|
||||||
|
(timestamp - prev_timestamp) as time_diff_seconds
|
||||||
|
FROM points_with_previous
|
||||||
|
WHERE prev_lonlat IS NOT NULL
|
||||||
|
ORDER BY timestamp
|
||||||
|
SQL
|
||||||
|
|
||||||
|
# Return hash mapping point_id => {distance_to_previous, time_diff}
|
||||||
|
distance_map = {}
|
||||||
|
results.each do |row|
|
||||||
|
distance_map[row['id'].to_i] = {
|
||||||
|
distance_meters: row['distance_meters'].to_f,
|
||||||
|
time_diff_seconds: row['time_diff_seconds'].to_i,
|
||||||
|
prev_id: row['prev_id'].to_i
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
distance_map
|
||||||
|
end
|
||||||
|
|
||||||
|
# V1: Optimized total distance using LAG (already exists in distanceable.rb)
|
||||||
|
def total_distance_lag(points, unit = :m)
|
||||||
|
unless ::DISTANCE_UNITS.key?(unit.to_sym)
|
||||||
|
raise ArgumentError, "Invalid unit. Supported units are: #{::DISTANCE_UNITS.keys.join(', ')}"
|
||||||
|
end
|
||||||
|
|
||||||
|
return 0 if points.length < 2
|
||||||
|
|
||||||
|
point_ids = points.map(&:id).join(',')
|
||||||
|
|
||||||
|
distance_in_meters = connection.select_value(<<-SQL.squish)
|
||||||
|
WITH points_with_previous AS (
|
||||||
|
SELECT
|
||||||
|
lonlat,
|
||||||
|
LAG(lonlat) OVER (ORDER BY timestamp) as prev_lonlat
|
||||||
|
FROM points
|
||||||
|
WHERE id IN (#{point_ids})
|
||||||
|
)
|
||||||
|
SELECT COALESCE(
|
||||||
|
SUM(ST_Distance(lonlat::geography, prev_lonlat::geography)),
|
||||||
|
0
|
||||||
|
)
|
||||||
|
FROM points_with_previous
|
||||||
|
WHERE prev_lonlat IS NOT NULL
|
||||||
|
SQL
|
||||||
|
|
||||||
|
distance_in_meters.to_f / ::DISTANCE_UNITS[unit.to_sym]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Optimized segmentation module using pre-calculated distances
|
||||||
|
module OptimizedSegmentationV1
|
||||||
|
extend ActiveSupport::Concern
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def split_points_into_segments_v1(points)
|
||||||
|
return [] if points.empty?
|
||||||
|
|
||||||
|
# V1: Pre-calculate all distances and time diffs in one query
|
||||||
|
if points.size > 1
|
||||||
|
distance_data = Point.calculate_all_consecutive_distances(points)
|
||||||
|
else
|
||||||
|
distance_data = {}
|
||||||
|
end
|
||||||
|
|
||||||
|
segments = []
|
||||||
|
current_segment = []
|
||||||
|
|
||||||
|
points.each do |point|
|
||||||
|
if current_segment.empty?
|
||||||
|
# First point always starts a segment
|
||||||
|
current_segment = [point]
|
||||||
|
elsif should_start_new_segment_v1?(point, current_segment.last, distance_data)
|
||||||
|
# Finalize current segment if it has enough points
|
||||||
|
segments << current_segment if current_segment.size >= 2
|
||||||
|
current_segment = [point]
|
||||||
|
else
|
||||||
|
current_segment << point
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Don't forget the last segment
|
||||||
|
segments << current_segment if current_segment.size >= 2
|
||||||
|
|
||||||
|
segments
|
||||||
|
end
|
||||||
|
|
||||||
|
def should_start_new_segment_v1?(current_point, previous_point, distance_data)
|
||||||
|
return false if previous_point.nil?
|
||||||
|
|
||||||
|
# Get pre-calculated data for this point
|
||||||
|
point_data = distance_data[current_point.id]
|
||||||
|
return false unless point_data
|
||||||
|
|
||||||
|
# Check time threshold
|
||||||
|
time_threshold_seconds = time_threshold_minutes.to_i * 60
|
||||||
|
return true if point_data[:time_diff_seconds] > time_threshold_seconds
|
||||||
|
|
||||||
|
# Check distance threshold
|
||||||
|
distance_meters = point_data[:distance_meters]
|
||||||
|
return true if distance_meters > distance_threshold_meters
|
||||||
|
|
||||||
|
false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Add methods to Point class
|
||||||
|
class Point
|
||||||
|
extend OptimizedTracksV1::ClassMethods
|
||||||
|
end
|
||||||
291
lib/optimized_tracks_v2.rb
Normal file
291
lib/optimized_tracks_v2.rb
Normal file
|
|
@ -0,0 +1,291 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
# Optimization V2: Full SQL segmentation using PostgreSQL window functions
|
||||||
|
# This does both distance calculation AND segmentation entirely in SQL
|
||||||
|
|
||||||
|
module OptimizedTracksV2
|
||||||
|
extend ActiveSupport::Concern
|
||||||
|
|
||||||
|
module ClassMethods
|
||||||
|
# V2: Complete segmentation in SQL using LAG and window functions
|
||||||
|
def segment_points_in_sql(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters)
|
||||||
|
time_threshold_seconds = time_threshold_minutes * 60
|
||||||
|
|
||||||
|
sql = <<~SQL
|
||||||
|
WITH points_with_gaps AS (
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
timestamp,
|
||||||
|
lonlat,
|
||||||
|
LAG(lonlat) OVER (ORDER BY timestamp) as prev_lonlat,
|
||||||
|
LAG(timestamp) OVER (ORDER BY timestamp) as prev_timestamp,
|
||||||
|
ST_Distance(
|
||||||
|
lonlat::geography,
|
||||||
|
LAG(lonlat) OVER (ORDER BY timestamp)::geography
|
||||||
|
) as distance_meters,
|
||||||
|
(timestamp - LAG(timestamp) OVER (ORDER BY timestamp)) as time_diff_seconds
|
||||||
|
FROM points
|
||||||
|
WHERE user_id = $1
|
||||||
|
AND timestamp BETWEEN $2 AND $3
|
||||||
|
ORDER BY timestamp
|
||||||
|
),
|
||||||
|
segment_breaks AS (
|
||||||
|
SELECT *,
|
||||||
|
CASE
|
||||||
|
WHEN prev_lonlat IS NULL THEN 1
|
||||||
|
WHEN time_diff_seconds > $4 THEN 1
|
||||||
|
WHEN distance_meters > $5 THEN 1
|
||||||
|
ELSE 0
|
||||||
|
END as is_break
|
||||||
|
FROM points_with_gaps
|
||||||
|
),
|
||||||
|
segments AS (
|
||||||
|
SELECT *,
|
||||||
|
SUM(is_break) OVER (ORDER BY timestamp ROWS UNBOUNDED PRECEDING) as segment_id
|
||||||
|
FROM segment_breaks
|
||||||
|
)
|
||||||
|
SELECT
|
||||||
|
segment_id,
|
||||||
|
array_agg(id ORDER BY timestamp) as point_ids,
|
||||||
|
count(*) as point_count,
|
||||||
|
min(timestamp) as start_timestamp,
|
||||||
|
max(timestamp) as end_timestamp,
|
||||||
|
sum(COALESCE(distance_meters, 0)) as total_distance_meters
|
||||||
|
FROM segments
|
||||||
|
GROUP BY segment_id
|
||||||
|
HAVING count(*) >= 2
|
||||||
|
ORDER BY segment_id
|
||||||
|
SQL
|
||||||
|
|
||||||
|
results = connection.exec_query(
|
||||||
|
sql,
|
||||||
|
'segment_points_in_sql',
|
||||||
|
[user_id, start_timestamp, end_timestamp, time_threshold_seconds, distance_threshold_meters]
|
||||||
|
)
|
||||||
|
|
||||||
|
# Convert results to segment data
|
||||||
|
segments_data = []
|
||||||
|
results.each do |row|
|
||||||
|
segments_data << {
|
||||||
|
segment_id: row['segment_id'].to_i,
|
||||||
|
point_ids: parse_postgres_array(row['point_ids']),
|
||||||
|
point_count: row['point_count'].to_i,
|
||||||
|
start_timestamp: row['start_timestamp'].to_i,
|
||||||
|
end_timestamp: row['end_timestamp'].to_i,
|
||||||
|
total_distance_meters: row['total_distance_meters'].to_f
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
segments_data
|
||||||
|
end
|
||||||
|
|
||||||
|
# V2: Get actual Point objects for each segment
|
||||||
|
def get_segments_with_points(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters)
|
||||||
|
segments_data = segment_points_in_sql(user_id, start_timestamp, end_timestamp, time_threshold_minutes, distance_threshold_meters)
|
||||||
|
|
||||||
|
# Get all point IDs we need
|
||||||
|
all_point_ids = segments_data.flat_map { |seg| seg[:point_ids] }
|
||||||
|
|
||||||
|
# Single query to get all points
|
||||||
|
points_by_id = Point.where(id: all_point_ids).index_by(&:id)
|
||||||
|
|
||||||
|
# Build segments with actual Point objects
|
||||||
|
segments_data.map do |seg_data|
|
||||||
|
{
|
||||||
|
points: seg_data[:point_ids].map { |id| points_by_id[id] }.compact,
|
||||||
|
pre_calculated_distance: seg_data[:total_distance_meters],
|
||||||
|
start_timestamp: seg_data[:start_timestamp],
|
||||||
|
end_timestamp: seg_data[:end_timestamp]
|
||||||
|
}
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
# Parse PostgreSQL array format like "{1,2,3}" into Ruby array
|
||||||
|
def parse_postgres_array(pg_array_string)
|
||||||
|
return [] if pg_array_string.nil? || pg_array_string.empty?
|
||||||
|
|
||||||
|
# Remove curly braces and split by comma
|
||||||
|
pg_array_string.gsub(/[{}]/, '').split(',').map(&:to_i)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Optimized generator using V2 SQL segmentation
|
||||||
|
class OptimizedTracksGeneratorV2
|
||||||
|
attr_reader :user, :start_at, :end_at, :mode
|
||||||
|
|
||||||
|
def initialize(user, start_at: nil, end_at: nil, mode: :bulk)
|
||||||
|
@user = user
|
||||||
|
@start_at = start_at
|
||||||
|
@end_at = end_at
|
||||||
|
@mode = mode.to_sym
|
||||||
|
end
|
||||||
|
|
||||||
|
def call
|
||||||
|
clean_existing_tracks if should_clean_tracks?
|
||||||
|
|
||||||
|
# Get timestamp range for SQL query
|
||||||
|
start_timestamp, end_timestamp = get_timestamp_range
|
||||||
|
|
||||||
|
Rails.logger.debug "OptimizedGeneratorV2: querying points for user #{user.id} in #{mode} mode"
|
||||||
|
|
||||||
|
# V2: Get segments directly from SQL with pre-calculated distances
|
||||||
|
segments = Point.get_segments_with_points(
|
||||||
|
user.id,
|
||||||
|
start_timestamp,
|
||||||
|
end_timestamp,
|
||||||
|
time_threshold_minutes,
|
||||||
|
distance_threshold_meters
|
||||||
|
)
|
||||||
|
|
||||||
|
Rails.logger.debug "OptimizedGeneratorV2: created #{segments.size} segments via SQL"
|
||||||
|
|
||||||
|
tracks_created = 0
|
||||||
|
|
||||||
|
segments.each do |segment_data|
|
||||||
|
track = create_track_from_segment_v2(segment_data)
|
||||||
|
tracks_created += 1 if track
|
||||||
|
end
|
||||||
|
|
||||||
|
Rails.logger.info "Generated #{tracks_created} tracks for user #{user.id} in optimized V2 #{mode} mode"
|
||||||
|
tracks_created
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def create_track_from_segment_v2(segment_data)
|
||||||
|
points = segment_data[:points]
|
||||||
|
pre_calculated_distance = segment_data[:pre_calculated_distance]
|
||||||
|
|
||||||
|
Rails.logger.debug "OptimizedGeneratorV2: processing segment with #{points.size} points"
|
||||||
|
return unless points.size >= 2
|
||||||
|
|
||||||
|
track = Track.new(
|
||||||
|
user_id: user.id,
|
||||||
|
start_at: Time.zone.at(points.first.timestamp),
|
||||||
|
end_at: Time.zone.at(points.last.timestamp),
|
||||||
|
original_path: build_path(points)
|
||||||
|
)
|
||||||
|
|
||||||
|
# V2: Use pre-calculated distance from SQL
|
||||||
|
track.distance = pre_calculated_distance.round
|
||||||
|
track.duration = calculate_duration(points)
|
||||||
|
track.avg_speed = calculate_average_speed(track.distance, track.duration)
|
||||||
|
|
||||||
|
# Calculate elevation statistics (no DB queries needed)
|
||||||
|
elevation_stats = calculate_elevation_stats(points)
|
||||||
|
track.elevation_gain = elevation_stats[:gain]
|
||||||
|
track.elevation_loss = elevation_stats[:loss]
|
||||||
|
track.elevation_max = elevation_stats[:max]
|
||||||
|
track.elevation_min = elevation_stats[:min]
|
||||||
|
|
||||||
|
if track.save
|
||||||
|
Point.where(id: points.map(&:id)).update_all(track_id: track.id)
|
||||||
|
track
|
||||||
|
else
|
||||||
|
Rails.logger.error "Failed to create track for user #{user.id}: #{track.errors.full_messages.join(', ')}"
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def get_timestamp_range
|
||||||
|
case mode
|
||||||
|
when :bulk
|
||||||
|
if start_at && end_at
|
||||||
|
[start_at.to_i, end_at.to_i]
|
||||||
|
else
|
||||||
|
# Get full range for user
|
||||||
|
first_point = user.tracked_points.order(:timestamp).first
|
||||||
|
last_point = user.tracked_points.order(:timestamp).last
|
||||||
|
[first_point&.timestamp || 0, last_point&.timestamp || Time.current.to_i]
|
||||||
|
end
|
||||||
|
when :daily
|
||||||
|
day = start_at&.to_date || Date.current
|
||||||
|
[day.beginning_of_day.to_i, day.end_of_day.to_i]
|
||||||
|
when :incremental
|
||||||
|
# For incremental, we need all untracked points up to end_at
|
||||||
|
first_point = user.tracked_points.where(track_id: nil).order(:timestamp).first
|
||||||
|
end_timestamp = end_at ? end_at.to_i : Time.current.to_i
|
||||||
|
[first_point&.timestamp || 0, end_timestamp]
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def should_clean_tracks?
|
||||||
|
case mode
|
||||||
|
when :bulk, :daily then true
|
||||||
|
else false
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def clean_existing_tracks
|
||||||
|
case mode
|
||||||
|
when :bulk
|
||||||
|
scope = user.tracks
|
||||||
|
if start_at && end_at
|
||||||
|
scope = scope.where(start_at: start_at..end_at)
|
||||||
|
end
|
||||||
|
scope.destroy_all
|
||||||
|
when :daily
|
||||||
|
day = start_at&.to_date || Date.current
|
||||||
|
range = day.beginning_of_day..day.end_of_day
|
||||||
|
user.tracks.where(start_at: range).destroy_all
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Helper methods (same as original)
|
||||||
|
def build_path(points)
|
||||||
|
Tracks::BuildPath.new(points).call
|
||||||
|
end
|
||||||
|
|
||||||
|
def calculate_duration(points)
|
||||||
|
points.last.timestamp - points.first.timestamp
|
||||||
|
end
|
||||||
|
|
||||||
|
def calculate_average_speed(distance_in_meters, duration_seconds)
|
||||||
|
return 0.0 if duration_seconds <= 0 || distance_in_meters <= 0
|
||||||
|
|
||||||
|
speed_mps = distance_in_meters.to_f / duration_seconds
|
||||||
|
(speed_mps * 3.6).round(2) # m/s to km/h
|
||||||
|
end
|
||||||
|
|
||||||
|
def calculate_elevation_stats(points)
|
||||||
|
altitudes = points.map(&:altitude).compact
|
||||||
|
return { gain: 0, loss: 0, max: 0, min: 0 } if altitudes.empty?
|
||||||
|
|
||||||
|
elevation_gain = 0
|
||||||
|
elevation_loss = 0
|
||||||
|
previous_altitude = altitudes.first
|
||||||
|
|
||||||
|
altitudes[1..].each do |altitude|
|
||||||
|
diff = altitude - previous_altitude
|
||||||
|
if diff > 0
|
||||||
|
elevation_gain += diff
|
||||||
|
else
|
||||||
|
elevation_loss += diff.abs
|
||||||
|
end
|
||||||
|
previous_altitude = altitude
|
||||||
|
end
|
||||||
|
|
||||||
|
{
|
||||||
|
gain: elevation_gain.round,
|
||||||
|
loss: elevation_loss.round,
|
||||||
|
max: altitudes.max,
|
||||||
|
min: altitudes.min
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
def distance_threshold_meters
|
||||||
|
@distance_threshold_meters ||= user.safe_settings.meters_between_routes.to_i
|
||||||
|
end
|
||||||
|
|
||||||
|
def time_threshold_minutes
|
||||||
|
@time_threshold_minutes ||= user.safe_settings.minutes_between_routes.to_i
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Add methods to Point class
|
||||||
|
class Point
|
||||||
|
extend OptimizedTracksV2::ClassMethods
|
||||||
|
end
|
||||||
122
lib/results.md
Normal file
122
lib/results.md
Normal file
|
|
@ -0,0 +1,122 @@
|
||||||
|
## Original
|
||||||
|
|
||||||
|
Generator: created track 227296
|
||||||
|
Generated 1437 tracks for user 1 in bulk mode
|
||||||
|
✅ Generation completed successfully
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
📊 BENCHMARK RESULTS
|
||||||
|
============================================================
|
||||||
|
Status: ✅ SUCCESS
|
||||||
|
Execution Time: 1m 28.5s
|
||||||
|
Tracks Created: 1437
|
||||||
|
Timeframe Coverage: 8.0% of user's total data
|
||||||
|
|
||||||
|
💾 Memory Usage:
|
||||||
|
Start: 210.9MB
|
||||||
|
End: 433.2MB
|
||||||
|
Memory Increase: +222.3MB
|
||||||
|
|
||||||
|
🗄️ Database Performance:
|
||||||
|
Total Queries: 115920
|
||||||
|
Total Query Time: 50453.1ms
|
||||||
|
Average Query Time: 0.44ms
|
||||||
|
Slow Queries (>100ms): 63
|
||||||
|
1. 983.24ms - SELECT COUNT(*) FROM "points" WHERE "points"."user_id" = $1 AND "points"."timestamp" BETWEEN $2 A...
|
||||||
|
2. 2826.02ms - SELECT "points".* FROM "points" WHERE "points"."user_id" = $1 AND "points"."timestamp" BETWEEN $2...
|
||||||
|
3. 217.02ms - UPDATE "points" SET "track_id" = $1 WHERE "points"."id" IN ($2, $3, $4, $5, $6, $7, $8, $9, $10, ...
|
||||||
|
|
||||||
|
✔️ Post-Generation Validation:
|
||||||
|
Points in Timeframe: 111609
|
||||||
|
Points with Tracks: 110167
|
||||||
|
Points without Tracks: 1442
|
||||||
|
Track Records: 1437
|
||||||
|
✅ Data integrity: PASS
|
||||||
|
|
||||||
|
🔍 Performance Analysis:
|
||||||
|
Speed Rating: 🚀 Excellent (1m 28.5s)
|
||||||
|
Memory Rating: 🧡 High (433.2MB peak)
|
||||||
|
Recommendation: Consider database optimization or smaller batch sizes
|
||||||
|
|
||||||
|
🔮 Extrapolation for Full Dataset:
|
||||||
|
Full Dataset Size: 1,403,662 points
|
||||||
|
Scaling Factor: 12.6x
|
||||||
|
Estimated Full Time: 18m 32.8s
|
||||||
|
Estimated Full Memory: 5447.6MB
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
📋 BENCHMARK SUMMARY
|
||||||
|
============================================================
|
||||||
|
⏱️ Total Time: 1m 28.5s
|
||||||
|
📍 Points Processed: 111,609
|
||||||
|
🛤️ Tracks Created: 1437
|
||||||
|
🚀 Processing Speed: 1261.4 points/second
|
||||||
|
📅 Timeframe: 2024-01-01 to 2024-12-31
|
||||||
|
👤 User: demo@dawarich.app (ID: 1)
|
||||||
|
✅ Status: COMPLETED
|
||||||
|
|
||||||
|
|
||||||
|
## Iteration 1
|
||||||
|
|
||||||
|
Generator: created track 244784
|
||||||
|
Generated 1435 tracks for user 1 in optimized bulk mode
|
||||||
|
✅ Generation completed successfully
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
📊 BENCHMARK RESULTS
|
||||||
|
============================================================
|
||||||
|
Status: ✅ SUCCESS
|
||||||
|
Execution Time: 56.4s
|
||||||
|
Tracks Created: 1435
|
||||||
|
Points Processed: 111,609
|
||||||
|
Processing Speed: 1978.3 points/second
|
||||||
|
Average Points/Track: 77.8
|
||||||
|
Timeframe Coverage: 8.0% of user's total data
|
||||||
|
|
||||||
|
💾 Memory Usage:
|
||||||
|
Start: 297.2MB
|
||||||
|
End: 407.5MB
|
||||||
|
Memory Increase: +110.3MB
|
||||||
|
|
||||||
|
🗄️ Database Performance:
|
||||||
|
Total Queries: 7178
|
||||||
|
Total Query Time: 44521.33ms
|
||||||
|
Average Query Time: 6.2ms
|
||||||
|
Slow Queries (>100ms): 88
|
||||||
|
1. 2338.43ms - WITH points_with_gaps AS (
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
timestamp,
|
||||||
|
lonlat,
|
||||||
|
LAG(lonlat) OVER (ORDE...
|
||||||
|
2. 4156.84ms - SELECT "points".* FROM "points" WHERE "points"."id" IN (2163775, 2163776, 2163777, 2163778, 21637...
|
||||||
|
3. 298.62ms - UPDATE "points" SET "track_id" = $1 WHERE "points"."id" IN ($2, $3, $4, $5, $6, $7, $8, $9, $10, ...
|
||||||
|
|
||||||
|
✔️ Post-Generation Validation:
|
||||||
|
Points in Timeframe: 111609
|
||||||
|
Points with Tracks: 110123
|
||||||
|
Points without Tracks: 1486
|
||||||
|
Track Records: 1435
|
||||||
|
✅ Data integrity: PASS
|
||||||
|
|
||||||
|
🔍 Performance Analysis:
|
||||||
|
Speed Rating: 🚀 Excellent (56.4s)
|
||||||
|
Memory Rating: 🧡 High (407.5MB peak)
|
||||||
|
Recommendation: Consider database optimization or smaller batch sizes
|
||||||
|
|
||||||
|
🔮 Extrapolation for Full Dataset:
|
||||||
|
Full Dataset Size: 1,403,662 points
|
||||||
|
Scaling Factor: 12.6x
|
||||||
|
Estimated Full Time: 11m 49.5s
|
||||||
|
Estimated Full Memory: 5125.0MB
|
||||||
|
|
||||||
|
============================================================
|
||||||
|
📋 BENCHMARK SUMMARY
|
||||||
|
============================================================
|
||||||
|
⏱️ Total Time: 56.4s
|
||||||
|
📍 Points Processed: 111,609
|
||||||
|
🛤️ Tracks Created: 1435
|
||||||
|
🚀 Processing Speed: 1978.3 points/second
|
||||||
|
📅 Timeframe: 2024-01-01 to 2024-12-31
|
||||||
|
👤 User: demo@dawarich.app (ID: 1)
|
||||||
|
✅ Status: COMPLETED
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
# frozen_string_literal: true
|
# frozen_string_literal: true
|
||||||
|
|
||||||
module Timestamps
|
module Timestamps
|
||||||
|
|
||||||
def self.parse_timestamp(timestamp)
|
def self.parse_timestamp(timestamp)
|
||||||
begin
|
begin
|
||||||
# if the timestamp is in ISO 8601 format, try to parse it
|
# if the timestamp is in ISO 8601 format, try to parse it
|
||||||
|
|
|
||||||
625
lib/tracks_optimization_benchmark.rb
Normal file
625
lib/tracks_optimization_benchmark.rb
Normal file
|
|
@ -0,0 +1,625 @@
|
||||||
|
# frozen_string_literal: true
|
||||||
|
|
||||||
|
require_relative 'optimized_tracks_v1'
|
||||||
|
require_relative 'optimized_tracks_v2'
|
||||||
|
|
||||||
|
# Benchmark script to compare three different track generation approaches:
|
||||||
|
# - Original: Individual distance queries (current implementation)
|
||||||
|
# - V1: LAG-based distance pre-calculation with Ruby segmentation
|
||||||
|
# - V2: Full SQL segmentation with PostgreSQL window functions
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# rails runner lib/tracks_optimization_benchmark.rb USER_ID START_DATE END_DATE
|
||||||
|
|
||||||
|
class TracksOptimizationBenchmark
|
||||||
|
attr_reader :user, :start_date, :end_date, :start_timestamp, :end_timestamp
|
||||||
|
|
||||||
|
def initialize(user_id, start_date, end_date)
|
||||||
|
@user = User.find(user_id)
|
||||||
|
@start_date = Date.parse(start_date)
|
||||||
|
@end_date = Date.parse(end_date)
|
||||||
|
@start_timestamp = @start_date.beginning_of_day.to_i
|
||||||
|
@end_timestamp = @end_date.end_of_day.to_i
|
||||||
|
|
||||||
|
puts "🔬 Track Generation Optimization Benchmark"
|
||||||
|
puts "👤 User: #{user.email} (ID: #{user.id})"
|
||||||
|
puts "📅 Timeframe: #{start_date} to #{end_date}"
|
||||||
|
|
||||||
|
check_data_availability
|
||||||
|
end
|
||||||
|
|
||||||
|
def run_all_benchmarks
|
||||||
|
results = {}
|
||||||
|
|
||||||
|
puts "\n" + "=" * 80
|
||||||
|
puts "🏃 RUNNING ALL BENCHMARKS"
|
||||||
|
puts "=" * 80
|
||||||
|
|
||||||
|
# Test Original approach
|
||||||
|
puts "\n1️⃣ Testing ORIGINAL approach..."
|
||||||
|
results[:original] = benchmark_original
|
||||||
|
|
||||||
|
# Test V1 approach
|
||||||
|
puts "\n2️⃣ Testing V1 (LAG + Ruby) approach..."
|
||||||
|
results[:v1] = benchmark_v1
|
||||||
|
|
||||||
|
# Test V2 approach
|
||||||
|
puts "\n3️⃣ Testing V2 (Full SQL) approach..."
|
||||||
|
results[:v2] = benchmark_v2
|
||||||
|
|
||||||
|
# Compare results
|
||||||
|
puts "\n" + "=" * 80
|
||||||
|
puts "📊 PERFORMANCE COMPARISON"
|
||||||
|
puts "=" * 80
|
||||||
|
compare_results(results)
|
||||||
|
|
||||||
|
# Save results to files
|
||||||
|
save_results_to_files(results)
|
||||||
|
|
||||||
|
results
|
||||||
|
end
|
||||||
|
|
||||||
|
private
|
||||||
|
|
||||||
|
def check_data_availability
|
||||||
|
point_count = user.tracked_points.where(timestamp: start_timestamp..end_timestamp).count
|
||||||
|
existing_tracks = user.tracks.where(start_at: Time.zone.at(start_timestamp)..Time.zone.at(end_timestamp)).count
|
||||||
|
|
||||||
|
puts "📊 Dataset: #{point_count.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse} points"
|
||||||
|
puts "🛤️ Existing tracks: #{existing_tracks}"
|
||||||
|
|
||||||
|
if point_count == 0
|
||||||
|
puts "❌ No points found in timeframe"
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
|
||||||
|
if point_count > 50000
|
||||||
|
puts "⚠️ Large dataset detected. This benchmark may take a while..."
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def benchmark_original
|
||||||
|
puts " Using standard Tracks::Generator..."
|
||||||
|
|
||||||
|
# Clean existing tracks
|
||||||
|
cleanup_tracks
|
||||||
|
|
||||||
|
# Monitor performance
|
||||||
|
memory_start = get_memory_mb
|
||||||
|
query_monitor = QueryMonitor.new
|
||||||
|
query_monitor.start
|
||||||
|
|
||||||
|
start_time = Time.current
|
||||||
|
|
||||||
|
begin
|
||||||
|
generator = Tracks::Generator.new(
|
||||||
|
user,
|
||||||
|
start_at: Time.zone.at(start_timestamp),
|
||||||
|
end_at: Time.zone.at(end_timestamp),
|
||||||
|
mode: :bulk
|
||||||
|
)
|
||||||
|
tracks_created = generator.call
|
||||||
|
success = true
|
||||||
|
rescue => e
|
||||||
|
success = false
|
||||||
|
error = e.message
|
||||||
|
tracks_created = 0
|
||||||
|
end
|
||||||
|
|
||||||
|
end_time = Time.current
|
||||||
|
memory_end = get_memory_mb
|
||||||
|
query_monitor.stop
|
||||||
|
|
||||||
|
execution_time = end_time - start_time
|
||||||
|
|
||||||
|
result = {
|
||||||
|
approach: "Original",
|
||||||
|
success: success,
|
||||||
|
error: error,
|
||||||
|
execution_time: execution_time,
|
||||||
|
tracks_created: tracks_created,
|
||||||
|
memory_increase: memory_end - memory_start,
|
||||||
|
query_count: query_monitor.query_count,
|
||||||
|
query_time_ms: query_monitor.total_time_ms
|
||||||
|
}
|
||||||
|
|
||||||
|
print_result(result)
|
||||||
|
result
|
||||||
|
end
|
||||||
|
|
||||||
|
def benchmark_v1
|
||||||
|
puts " Using V1: LAG + Ruby segmentation..."
|
||||||
|
|
||||||
|
# Clean existing tracks
|
||||||
|
cleanup_tracks
|
||||||
|
|
||||||
|
# For V1, we need to modify the existing generator to use our optimized methods
|
||||||
|
# This is a simplified test - in practice we'd modify the actual generator
|
||||||
|
|
||||||
|
memory_start = get_memory_mb
|
||||||
|
query_monitor = QueryMonitor.new
|
||||||
|
query_monitor.start
|
||||||
|
|
||||||
|
start_time = Time.current
|
||||||
|
|
||||||
|
begin
|
||||||
|
# Load points
|
||||||
|
points = user.tracked_points
|
||||||
|
.where(timestamp: start_timestamp..end_timestamp)
|
||||||
|
.order(:timestamp)
|
||||||
|
|
||||||
|
# V1: Use optimized segmentation with pre-calculated distances
|
||||||
|
if points.size > 1
|
||||||
|
distance_data = Point.calculate_all_consecutive_distances(points)
|
||||||
|
else
|
||||||
|
distance_data = {}
|
||||||
|
end
|
||||||
|
|
||||||
|
# Segment using V1 approach (simplified for benchmark)
|
||||||
|
segments = split_points_with_precalculated_distances(points, distance_data)
|
||||||
|
|
||||||
|
tracks_created = 0
|
||||||
|
segments.each do |segment|
|
||||||
|
if segment.size >= 2
|
||||||
|
track = create_track_v1(segment)
|
||||||
|
tracks_created += 1 if track
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
success = true
|
||||||
|
rescue => e
|
||||||
|
success = false
|
||||||
|
error = e.message
|
||||||
|
tracks_created = 0
|
||||||
|
end
|
||||||
|
|
||||||
|
end_time = Time.current
|
||||||
|
memory_end = get_memory_mb
|
||||||
|
query_monitor.stop
|
||||||
|
|
||||||
|
execution_time = end_time - start_time
|
||||||
|
|
||||||
|
result = {
|
||||||
|
approach: "V1 (LAG + Ruby)",
|
||||||
|
success: success,
|
||||||
|
error: error,
|
||||||
|
execution_time: execution_time,
|
||||||
|
tracks_created: tracks_created,
|
||||||
|
memory_increase: memory_end - memory_start,
|
||||||
|
query_count: query_monitor.query_count,
|
||||||
|
query_time_ms: query_monitor.total_time_ms
|
||||||
|
}
|
||||||
|
|
||||||
|
print_result(result)
|
||||||
|
result
|
||||||
|
end
|
||||||
|
|
||||||
|
def benchmark_v2
|
||||||
|
puts " Using V2: Full SQL segmentation..."
|
||||||
|
|
||||||
|
cleanup_tracks
|
||||||
|
|
||||||
|
memory_start = get_memory_mb
|
||||||
|
query_monitor = QueryMonitor.new
|
||||||
|
query_monitor.start
|
||||||
|
|
||||||
|
start_time = Time.current
|
||||||
|
|
||||||
|
begin
|
||||||
|
generator = OptimizedTracksGeneratorV2.new(
|
||||||
|
user,
|
||||||
|
start_at: Time.zone.at(start_timestamp),
|
||||||
|
end_at: Time.zone.at(end_timestamp),
|
||||||
|
mode: :bulk
|
||||||
|
)
|
||||||
|
tracks_created = generator.call
|
||||||
|
success = true
|
||||||
|
rescue => e
|
||||||
|
success = false
|
||||||
|
error = e.message
|
||||||
|
tracks_created = 0
|
||||||
|
end
|
||||||
|
|
||||||
|
end_time = Time.current
|
||||||
|
memory_end = get_memory_mb
|
||||||
|
query_monitor.stop
|
||||||
|
|
||||||
|
execution_time = end_time - start_time
|
||||||
|
|
||||||
|
result = {
|
||||||
|
approach: "V2 (Full SQL)",
|
||||||
|
success: success,
|
||||||
|
error: error,
|
||||||
|
execution_time: execution_time,
|
||||||
|
tracks_created: tracks_created,
|
||||||
|
memory_increase: memory_end - memory_start,
|
||||||
|
query_count: query_monitor.query_count,
|
||||||
|
query_time_ms: query_monitor.total_time_ms
|
||||||
|
}
|
||||||
|
|
||||||
|
print_result(result)
|
||||||
|
result
|
||||||
|
end
|
||||||
|
|
||||||
|
def split_points_with_precalculated_distances(points, distance_data)
|
||||||
|
return [] if points.empty?
|
||||||
|
|
||||||
|
segments = []
|
||||||
|
current_segment = []
|
||||||
|
|
||||||
|
points.each do |point|
|
||||||
|
if current_segment.empty?
|
||||||
|
current_segment = [point]
|
||||||
|
elsif should_break_segment_v1?(point, current_segment.last, distance_data)
|
||||||
|
segments << current_segment if current_segment.size >= 2
|
||||||
|
current_segment = [point]
|
||||||
|
else
|
||||||
|
current_segment << point
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
segments << current_segment if current_segment.size >= 2
|
||||||
|
segments
|
||||||
|
end
|
||||||
|
|
||||||
|
def should_break_segment_v1?(current_point, previous_point, distance_data)
|
||||||
|
return false if previous_point.nil?
|
||||||
|
|
||||||
|
point_data = distance_data[current_point.id]
|
||||||
|
return false unless point_data
|
||||||
|
|
||||||
|
time_threshold_seconds = user.safe_settings.minutes_between_routes.to_i * 60
|
||||||
|
distance_threshold_meters = user.safe_settings.meters_between_routes.to_i
|
||||||
|
|
||||||
|
return true if point_data[:time_diff_seconds] > time_threshold_seconds
|
||||||
|
return true if point_data[:distance_meters] > distance_threshold_meters
|
||||||
|
|
||||||
|
false
|
||||||
|
end
|
||||||
|
|
||||||
|
def create_track_v1(points)
|
||||||
|
return nil if points.size < 2
|
||||||
|
|
||||||
|
track = Track.new(
|
||||||
|
user_id: user.id,
|
||||||
|
start_at: Time.zone.at(points.first.timestamp),
|
||||||
|
end_at: Time.zone.at(points.last.timestamp),
|
||||||
|
original_path: build_path(points)
|
||||||
|
)
|
||||||
|
|
||||||
|
# Use LAG-based distance calculation
|
||||||
|
track.distance = Point.total_distance_lag(points, :m).round
|
||||||
|
track.duration = points.last.timestamp - points.first.timestamp
|
||||||
|
track.avg_speed = calculate_average_speed(track.distance, track.duration)
|
||||||
|
|
||||||
|
# Elevation stats (same as original)
|
||||||
|
elevation_stats = calculate_elevation_stats(points)
|
||||||
|
track.elevation_gain = elevation_stats[:gain]
|
||||||
|
track.elevation_loss = elevation_stats[:loss]
|
||||||
|
track.elevation_max = elevation_stats[:max]
|
||||||
|
track.elevation_min = elevation_stats[:min]
|
||||||
|
|
||||||
|
if track.save
|
||||||
|
Point.where(id: points.map(&:id)).update_all(track_id: track.id)
|
||||||
|
track
|
||||||
|
else
|
||||||
|
nil
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def cleanup_tracks
|
||||||
|
user.tracks.where(start_at: Time.zone.at(start_timestamp)..Time.zone.at(end_timestamp)).destroy_all
|
||||||
|
end
|
||||||
|
|
||||||
|
def print_result(result)
|
||||||
|
status = result[:success] ? "✅ SUCCESS" : "❌ FAILED"
|
||||||
|
puts " #{status}"
|
||||||
|
puts " ⏱️ Time: #{format_duration(result[:execution_time])}"
|
||||||
|
puts " 🛤️ Tracks: #{result[:tracks_created]}"
|
||||||
|
puts " 💾 Memory: +#{result[:memory_increase].round(1)}MB"
|
||||||
|
puts " 🗄️ Queries: #{result[:query_count]} (#{result[:query_time_ms].round(1)}ms)"
|
||||||
|
puts " ❌ Error: #{result[:error]}" if result[:error]
|
||||||
|
end
|
||||||
|
|
||||||
|
def compare_results(results)
|
||||||
|
return unless results[:original] && results[:v1] && results[:v2]
|
||||||
|
|
||||||
|
puts sprintf("%-20s %-10s %-12s %-10s %-15s %-10s",
|
||||||
|
"Approach", "Time", "Tracks", "Memory", "Queries", "Query Time")
|
||||||
|
puts "-" * 80
|
||||||
|
|
||||||
|
[:original, :v1, :v2].each do |approach|
|
||||||
|
result = results[approach]
|
||||||
|
next unless result[:success]
|
||||||
|
|
||||||
|
puts sprintf("%-20s %-10s %-12s %-10s %-15s %-10s",
|
||||||
|
result[:approach],
|
||||||
|
format_duration(result[:execution_time]),
|
||||||
|
result[:tracks_created],
|
||||||
|
"+#{result[:memory_increase].round(1)}MB",
|
||||||
|
result[:query_count],
|
||||||
|
"#{result[:query_time_ms].round(1)}ms")
|
||||||
|
end
|
||||||
|
|
||||||
|
# Calculate improvements
|
||||||
|
if results[:original][:success]
|
||||||
|
original_time = results[:original][:execution_time]
|
||||||
|
original_queries = results[:original][:query_count]
|
||||||
|
|
||||||
|
puts "\n🚀 Performance Improvements vs Original:"
|
||||||
|
|
||||||
|
if results[:v1][:success]
|
||||||
|
v1_speedup = (original_time / results[:v1][:execution_time]).round(2)
|
||||||
|
v1_query_reduction = ((original_queries - results[:v1][:query_count]) / original_queries.to_f * 100).round(1)
|
||||||
|
puts " V1: #{v1_speedup}x faster, #{v1_query_reduction}% fewer queries"
|
||||||
|
end
|
||||||
|
|
||||||
|
if results[:v2][:success]
|
||||||
|
v2_speedup = (original_time / results[:v2][:execution_time]).round(2)
|
||||||
|
v2_query_reduction = ((original_queries - results[:v2][:query_count]) / original_queries.to_f * 100).round(1)
|
||||||
|
puts " V2: #{v2_speedup}x faster, #{v2_query_reduction}% fewer queries"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def save_results_to_files(results)
|
||||||
|
timestamp = Time.current.strftime('%Y%m%d_%H%M%S')
|
||||||
|
point_count = user.tracked_points.where(timestamp: start_timestamp..end_timestamp).count
|
||||||
|
|
||||||
|
# Create detailed results structure
|
||||||
|
benchmark_data = {
|
||||||
|
meta: {
|
||||||
|
timestamp: Time.current.iso8601,
|
||||||
|
user_id: user.id,
|
||||||
|
user_email: user.email,
|
||||||
|
start_date: start_date.strftime('%Y-%m-%d'),
|
||||||
|
end_date: end_date.strftime('%Y-%m-%d'),
|
||||||
|
point_count: point_count,
|
||||||
|
ruby_version: RUBY_VERSION,
|
||||||
|
rails_version: Rails.version,
|
||||||
|
database_adapter: ActiveRecord::Base.connection.adapter_name
|
||||||
|
},
|
||||||
|
results: results,
|
||||||
|
performance_analysis: analyze_performance_data(results)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Save JSON results for programmatic analysis
|
||||||
|
json_filename = "tracks_optimization_#{timestamp}.json"
|
||||||
|
json_path = Rails.root.join('lib', json_filename)
|
||||||
|
File.write(json_path, JSON.pretty_generate(benchmark_data))
|
||||||
|
|
||||||
|
# Save human-readable markdown report
|
||||||
|
md_filename = "tracks_optimization_#{timestamp}.md"
|
||||||
|
md_path = Rails.root.join('lib', md_filename)
|
||||||
|
File.write(md_path, generate_markdown_report(benchmark_data))
|
||||||
|
|
||||||
|
puts "\n💾 Results saved:"
|
||||||
|
puts " 📄 JSON: #{json_path}"
|
||||||
|
puts " 📝 Report: #{md_path}"
|
||||||
|
end
|
||||||
|
|
||||||
|
def analyze_performance_data(results)
|
||||||
|
return {} unless results[:original] && results[:original][:success]
|
||||||
|
|
||||||
|
original = results[:original]
|
||||||
|
analysis = {
|
||||||
|
baseline: {
|
||||||
|
execution_time: original[:execution_time],
|
||||||
|
query_count: original[:query_count],
|
||||||
|
memory_usage: original[:memory_increase]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
[:v1, :v2].each do |version|
|
||||||
|
next unless results[version] && results[version][:success]
|
||||||
|
|
||||||
|
result = results[version]
|
||||||
|
analysis[version] = {
|
||||||
|
speedup_factor: (original[:execution_time] / result[:execution_time]).round(2),
|
||||||
|
query_reduction_percent: ((original[:query_count] - result[:query_count]) / original[:query_count].to_f * 100).round(1),
|
||||||
|
memory_change_percent: ((result[:memory_increase] - original[:memory_increase]) / original[:memory_increase].to_f * 100).round(1),
|
||||||
|
execution_time_saved: (original[:execution_time] - result[:execution_time]).round(2)
|
||||||
|
}
|
||||||
|
end
|
||||||
|
|
||||||
|
analysis
|
||||||
|
end
|
||||||
|
|
||||||
|
def generate_markdown_report(benchmark_data)
|
||||||
|
meta = benchmark_data[:meta]
|
||||||
|
results = benchmark_data[:results]
|
||||||
|
analysis = benchmark_data[:performance_analysis]
|
||||||
|
|
||||||
|
report = <<~MD
|
||||||
|
# Tracks Generation Optimization Benchmark Report
|
||||||
|
|
||||||
|
**Generated:** #{meta[:timestamp]}
|
||||||
|
**User:** #{meta[:user_email]} (ID: #{meta[:user_id]})
|
||||||
|
**Timeframe:** #{meta[:start_date]} to #{meta[:end_date]}
|
||||||
|
**Dataset:** #{meta[:point_count].to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse} points
|
||||||
|
**Environment:** Ruby #{meta[:ruby_version]}, Rails #{meta[:rails_version]}, #{meta[:database_adapter]}
|
||||||
|
|
||||||
|
## Summary
|
||||||
|
|
||||||
|
This benchmark compares three approaches to track generation:
|
||||||
|
- **Original:** Individual PostGIS queries for each distance calculation
|
||||||
|
- **V1 (LAG + Ruby):** PostgreSQL LAG for batch distance calculation, Ruby segmentation
|
||||||
|
- **V2 (Full SQL):** Complete segmentation using PostgreSQL window functions
|
||||||
|
|
||||||
|
## Results
|
||||||
|
|
||||||
|
| Approach | Status | Time | Tracks | Memory | Queries | Query Time |
|
||||||
|
|----------|--------|------|--------|--------|---------|------------|
|
||||||
|
MD
|
||||||
|
|
||||||
|
[:original, :v1, :v2].each do |approach|
|
||||||
|
next unless results[approach]
|
||||||
|
|
||||||
|
result = results[approach]
|
||||||
|
status = result[:success] ? "✅" : "❌"
|
||||||
|
|
||||||
|
report += "| #{result[:approach]} | #{status} | #{format_duration(result[:execution_time])} | #{result[:tracks_created]} | +#{result[:memory_increase].round(1)}MB | #{result[:query_count]} | #{result[:query_time_ms].round(1)}ms |\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
if analysis[:v1] || analysis[:v2]
|
||||||
|
report += "\n## Performance Improvements\n\n"
|
||||||
|
|
||||||
|
if analysis[:v1]
|
||||||
|
v1 = analysis[:v1]
|
||||||
|
report += "### V1 (LAG + Ruby) vs Original\n"
|
||||||
|
report += "- **#{v1[:speedup_factor]}x faster** execution\n"
|
||||||
|
report += "- **#{v1[:query_reduction_percent]}% fewer** database queries\n"
|
||||||
|
report += "- **#{format_duration(v1[:execution_time_saved])} time saved**\n"
|
||||||
|
report += "- Memory change: #{v1[:memory_change_percent] > 0 ? '+' : ''}#{v1[:memory_change_percent]}%\n\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
if analysis[:v2]
|
||||||
|
v2 = analysis[:v2]
|
||||||
|
report += "### V2 (Full SQL) vs Original\n"
|
||||||
|
report += "- **#{v2[:speedup_factor]}x faster** execution\n"
|
||||||
|
report += "- **#{v2[:query_reduction_percent]}% fewer** database queries\n"
|
||||||
|
report += "- **#{format_duration(v2[:execution_time_saved])} time saved**\n"
|
||||||
|
report += "- Memory change: #{v2[:memory_change_percent] > 0 ? '+' : ''}#{v2[:memory_change_percent]}%\n\n"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Add detailed results
|
||||||
|
report += "## Detailed Results\n\n"
|
||||||
|
|
||||||
|
[:original, :v1, :v2].each do |approach|
|
||||||
|
next unless results[approach]
|
||||||
|
|
||||||
|
result = results[approach]
|
||||||
|
report += "### #{result[:approach]}\n\n"
|
||||||
|
|
||||||
|
if result[:success]
|
||||||
|
report += "- ✅ **Status:** Success\n"
|
||||||
|
report += "- ⏱️ **Execution Time:** #{format_duration(result[:execution_time])}\n"
|
||||||
|
report += "- 🛤️ **Tracks Created:** #{result[:tracks_created]}\n"
|
||||||
|
report += "- 💾 **Memory Increase:** +#{result[:memory_increase].round(1)}MB\n"
|
||||||
|
report += "- 🗄️ **Database Queries:** #{result[:query_count]}\n"
|
||||||
|
report += "- ⚡ **Query Time:** #{result[:query_time_ms].round(1)}ms\n"
|
||||||
|
|
||||||
|
if result[:query_count] > 0
|
||||||
|
avg_query_time = (result[:query_time_ms] / result[:query_count]).round(2)
|
||||||
|
report += "- 📊 **Average Query Time:** #{avg_query_time}ms\n"
|
||||||
|
end
|
||||||
|
else
|
||||||
|
report += "- ❌ **Status:** Failed\n"
|
||||||
|
report += "- 🚨 **Error:** #{result[:error]}\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
report += "\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
report += "## Recommendations\n\n"
|
||||||
|
|
||||||
|
if analysis[:v2] && analysis[:v2][:speedup_factor] > analysis.dig(:v1, :speedup_factor).to_f
|
||||||
|
report += "🚀 **V2 (Full SQL)** shows the best performance with #{analysis[:v2][:speedup_factor]}x speedup.\n\n"
|
||||||
|
report += "Benefits:\n"
|
||||||
|
report += "- Minimal database queries (#{results.dig(:v2, :query_count)} vs #{results.dig(:original, :query_count)})\n"
|
||||||
|
report += "- Fastest execution time\n"
|
||||||
|
report += "- Leverages PostgreSQL's optimized window functions\n\n"
|
||||||
|
elsif analysis[:v1]
|
||||||
|
report += "🏃 **V1 (LAG + Ruby)** provides good performance improvements with #{analysis[:v1][:speedup_factor]}x speedup.\n\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
if results[:original] && results[:original][:query_count] > 50000
|
||||||
|
report += "⚠️ **Current implementation** makes excessive database queries (#{results[:original][:query_count]}) for this dataset size.\n\n"
|
||||||
|
end
|
||||||
|
|
||||||
|
report += "---\n*Generated by TracksOptimizationBenchmark*"
|
||||||
|
|
||||||
|
report
|
||||||
|
end
|
||||||
|
|
||||||
|
# Helper methods
|
||||||
|
def get_memory_mb
|
||||||
|
`ps -o rss= -p #{Process.pid}`.to_i / 1024.0
|
||||||
|
end
|
||||||
|
|
||||||
|
def format_duration(seconds)
|
||||||
|
if seconds < 60
|
||||||
|
"#{seconds.round(1)}s"
|
||||||
|
else
|
||||||
|
minutes = (seconds / 60).floor
|
||||||
|
remaining_seconds = (seconds % 60).round(1)
|
||||||
|
"#{minutes}m #{remaining_seconds}s"
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def build_path(points)
|
||||||
|
Tracks::BuildPath.new(points).call
|
||||||
|
end
|
||||||
|
|
||||||
|
def calculate_average_speed(distance_in_meters, duration_seconds)
|
||||||
|
return 0.0 if duration_seconds <= 0 || distance_in_meters <= 0
|
||||||
|
speed_mps = distance_in_meters.to_f / duration_seconds
|
||||||
|
(speed_mps * 3.6).round(2)
|
||||||
|
end
|
||||||
|
|
||||||
|
def calculate_elevation_stats(points)
|
||||||
|
altitudes = points.map(&:altitude).compact
|
||||||
|
return { gain: 0, loss: 0, max: 0, min: 0 } if altitudes.empty?
|
||||||
|
|
||||||
|
elevation_gain = 0
|
||||||
|
elevation_loss = 0
|
||||||
|
previous_altitude = altitudes.first
|
||||||
|
|
||||||
|
altitudes[1..].each do |altitude|
|
||||||
|
diff = altitude - previous_altitude
|
||||||
|
if diff > 0
|
||||||
|
elevation_gain += diff
|
||||||
|
else
|
||||||
|
elevation_loss += diff.abs
|
||||||
|
end
|
||||||
|
previous_altitude = altitude
|
||||||
|
end
|
||||||
|
|
||||||
|
{ gain: elevation_gain.round, loss: elevation_loss.round, max: altitudes.max, min: altitudes.min }
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Simple query monitor for this benchmark
|
||||||
|
class QueryMonitor
|
||||||
|
attr_reader :query_count, :total_time_ms
|
||||||
|
|
||||||
|
def initialize
|
||||||
|
@query_count = 0
|
||||||
|
@total_time_ms = 0
|
||||||
|
end
|
||||||
|
|
||||||
|
def start
|
||||||
|
@subscription = ActiveSupport::Notifications.subscribe('sql.active_record') do |*args|
|
||||||
|
event = ActiveSupport::Notifications::Event.new(*args)
|
||||||
|
next if event.payload[:name]&.include?('SCHEMA')
|
||||||
|
|
||||||
|
@query_count += 1
|
||||||
|
@total_time_ms += event.duration
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
def stop
|
||||||
|
ActiveSupport::Notifications.unsubscribe(@subscription) if @subscription
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
# Command line interface
|
||||||
|
if __FILE__ == $0
|
||||||
|
if ARGV.length < 3
|
||||||
|
puts "Usage: rails runner #{__FILE__} USER_ID START_DATE END_DATE"
|
||||||
|
puts ""
|
||||||
|
puts "Example:"
|
||||||
|
puts " rails runner #{__FILE__} 1 2024-01-01 2024-01-31"
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
|
||||||
|
user_id = ARGV[0].to_i
|
||||||
|
start_date = ARGV[1]
|
||||||
|
end_date = ARGV[2]
|
||||||
|
|
||||||
|
benchmark = TracksOptimizationBenchmark.new(user_id, start_date, end_date)
|
||||||
|
results = benchmark.run_all_benchmarks
|
||||||
|
|
||||||
|
puts "\n🎉 Benchmark completed! Check results above."
|
||||||
|
end
|
||||||
235
tracks_performance_optimization_options.md
Normal file
235
tracks_performance_optimization_options.md
Normal file
|
|
@ -0,0 +1,235 @@
|
||||||
|
# Tracks Feature Performance Optimization Options
|
||||||
|
|
||||||
|
## Current State Analysis
|
||||||
|
|
||||||
|
### Performance Characteristics
|
||||||
|
- **Time Complexity:** O(n log n) where n = number of GPS points
|
||||||
|
- **Memory Usage:** Loads entire dataset into memory (~200-400 bytes per point)
|
||||||
|
- **Processing Mode:** Single-threaded, sequential segmentation
|
||||||
|
- **Database Load:** Multiple PostGIS distance calculations per point pair
|
||||||
|
|
||||||
|
### Performance Estimates (Bulk Mode)
|
||||||
|
| Points | Processing Time | Memory Usage | Database Load |
|
||||||
|
|--------|----------------|--------------|---------------|
|
||||||
|
| 10K | 30-60 seconds | ~50 MB | Low |
|
||||||
|
| 100K | 5-15 minutes | ~200 MB | Medium |
|
||||||
|
| 1M+ | 30-90 minutes | 400+ MB | High |
|
||||||
|
|
||||||
|
### Current Bottlenecks
|
||||||
|
1. **Memory constraints** - Loading all points at once
|
||||||
|
2. **PostGIS distance calculations** - Sequential, not optimized
|
||||||
|
3. **Single-threaded processing** - No parallelization
|
||||||
|
4. **No progress indication** - Users can't track long-running operations
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Optimization Options
|
||||||
|
|
||||||
|
### Option 1: Enhanced Time-Based Batching
|
||||||
|
**Complexity:** Low | **Impact:** High | **Risk:** Low
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
- Extend existing `:daily` mode with configurable batch sizes
|
||||||
|
- Add 1-point overlap between batches to maintain segmentation accuracy
|
||||||
|
- Implement batch-aware progress reporting
|
||||||
|
|
||||||
|
#### Benefits
|
||||||
|
- **Memory reduction:** 90%+ reduction (from 400MB to ~40MB for 1M points)
|
||||||
|
- **Better UX:** Progress indication and cancellation support
|
||||||
|
- **Incremental processing:** Can resume interrupted operations
|
||||||
|
- **Lower DB pressure:** Smaller query result sets
|
||||||
|
|
||||||
|
#### Changes Required
|
||||||
|
```ruby
|
||||||
|
# Enhanced generator with configurable batching
|
||||||
|
Tracks::Generator.new(
|
||||||
|
user,
|
||||||
|
mode: :batched,
|
||||||
|
batch_size: 24.hours,
|
||||||
|
enable_overlap: true
|
||||||
|
).call
|
||||||
|
```
|
||||||
|
|
||||||
|
#### Edge Cases to Handle
|
||||||
|
- Tracks spanning batch boundaries (solved with overlap)
|
||||||
|
- Midnight-crossing tracks in daily mode
|
||||||
|
- Deduplication of overlapping segments
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Option 2: Spatial Indexing Optimization
|
||||||
|
**Complexity:** Medium | **Impact:** Medium | **Risk:** Low
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
- Replace individual PostGIS calls with batch distance calculations
|
||||||
|
- Implement spatial clustering for nearby points before segmentation
|
||||||
|
- Use PostGIS window functions for distance calculations
|
||||||
|
|
||||||
|
#### Benefits
|
||||||
|
- **Faster distance calculations:** Batch operations vs individual queries
|
||||||
|
- **Reduced DB round-trips:** Single query for multiple distance calculations
|
||||||
|
- **Better index utilization:** Leverage existing spatial indexes
|
||||||
|
|
||||||
|
#### Changes Required
|
||||||
|
```sql
|
||||||
|
-- Batch distance calculation approach
|
||||||
|
WITH point_distances AS (
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
timestamp,
|
||||||
|
ST_Distance(
|
||||||
|
lonlat::geography,
|
||||||
|
LAG(lonlat::geography) OVER (ORDER BY timestamp)
|
||||||
|
) as distance_to_previous
|
||||||
|
FROM points
|
||||||
|
WHERE user_id = ?
|
||||||
|
ORDER BY timestamp
|
||||||
|
)
|
||||||
|
SELECT * FROM point_distances WHERE distance_to_previous > ?
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Option 3: Parallel Processing with Worker Pools
|
||||||
|
**Complexity:** High | **Impact:** High | **Risk:** Medium
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
- Split large datasets into non-overlapping time ranges
|
||||||
|
- Process multiple batches in parallel using Sidekiq workers
|
||||||
|
- Implement coordination mechanism for dependent segments
|
||||||
|
|
||||||
|
#### Benefits
|
||||||
|
- **Faster processing:** Utilize multiple CPU cores
|
||||||
|
- **Scalable:** Performance scales with worker capacity
|
||||||
|
- **Background processing:** Non-blocking for users
|
||||||
|
|
||||||
|
#### Challenges
|
||||||
|
- **Complex coordination:** Managing dependencies between batches
|
||||||
|
- **Resource competition:** Multiple workers accessing same user's data
|
||||||
|
- **Error handling:** Partial failure scenarios
|
||||||
|
|
||||||
|
#### Architecture
|
||||||
|
```ruby
|
||||||
|
# Parallel processing coordinator
|
||||||
|
class Tracks::ParallelGenerator
|
||||||
|
def call
|
||||||
|
time_ranges = split_into_parallel_ranges
|
||||||
|
|
||||||
|
time_ranges.map do |range|
|
||||||
|
Tracks::BatchProcessorJob.perform_later(user_id, range)
|
||||||
|
end
|
||||||
|
end
|
||||||
|
end
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Option 4: Incremental Algorithm Enhancement
|
||||||
|
**Complexity:** Medium | **Impact:** Medium | **Risk:** Medium
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
- Enhance existing `:incremental` mode with smarter buffering
|
||||||
|
- Implement sliding window approach for active track detection
|
||||||
|
- Add automatic track finalization based on time gaps
|
||||||
|
|
||||||
|
#### Benefits
|
||||||
|
- **Real-time processing:** Process points as they arrive
|
||||||
|
- **Lower memory footprint:** Only active segments in memory
|
||||||
|
- **Better for live tracking:** Immediate track updates
|
||||||
|
|
||||||
|
#### Current Limitations
|
||||||
|
- Existing incremental mode processes untracked points only
|
||||||
|
- No automatic track finalization
|
||||||
|
- Limited to single active track per user
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Option 5: Database-Level Optimization
|
||||||
|
**Complexity:** Low-Medium | **Impact:** Medium | **Risk:** Low
|
||||||
|
|
||||||
|
#### Implementation
|
||||||
|
- Add composite indexes for common query patterns
|
||||||
|
- Implement materialized views for expensive calculations
|
||||||
|
- Use database-level segmentation logic
|
||||||
|
|
||||||
|
#### Benefits
|
||||||
|
- **Faster queries:** Better index utilization
|
||||||
|
- **Reduced Ruby processing:** Move logic to database
|
||||||
|
- **Consistent performance:** Database optimizations benefit all modes
|
||||||
|
|
||||||
|
#### Proposed Indexes
|
||||||
|
```sql
|
||||||
|
-- Optimized for bulk processing
|
||||||
|
CREATE INDEX CONCURRENTLY idx_points_user_timestamp_track
|
||||||
|
ON points(user_id, timestamp) WHERE track_id IS NULL;
|
||||||
|
|
||||||
|
-- Optimized for incremental processing
|
||||||
|
CREATE INDEX CONCURRENTLY idx_points_untracked_timestamp
|
||||||
|
ON points(timestamp) WHERE track_id IS NULL;
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Recommended Implementation Strategy
|
||||||
|
|
||||||
|
### Phase 1: Quick Wins (Week 1-2)
|
||||||
|
1. **Implement Enhanced Time-Based Batching** (Option 1)
|
||||||
|
- Extend existing daily mode with overlap
|
||||||
|
- Add progress reporting
|
||||||
|
- Configurable batch sizes
|
||||||
|
|
||||||
|
### Phase 2: Database Optimization (Week 3)
|
||||||
|
2. **Add Database-Level Optimizations** (Option 5)
|
||||||
|
- Create optimized indexes
|
||||||
|
- Implement batch distance calculations
|
||||||
|
|
||||||
|
### Phase 3: Advanced Features (Week 4-6)
|
||||||
|
3. **Spatial Indexing Optimization** (Option 2)
|
||||||
|
- Replace individual distance calculations
|
||||||
|
- Implement spatial clustering
|
||||||
|
|
||||||
|
### Phase 4: Future Enhancements
|
||||||
|
4. **Parallel Processing** (Option 3) - Consider for v2
|
||||||
|
5. **Incremental Enhancement** (Option 4) - For real-time features
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Risk Assessment
|
||||||
|
|
||||||
|
### Low Risk
|
||||||
|
- **Time-based batching:** Builds on existing daily mode
|
||||||
|
- **Database indexes:** Standard optimization technique
|
||||||
|
- **Progress reporting:** UI enhancement only
|
||||||
|
|
||||||
|
### Medium Risk
|
||||||
|
- **Spatial optimization:** Requires careful testing of distance calculations
|
||||||
|
- **Incremental enhancement:** Changes to existing algorithm logic
|
||||||
|
|
||||||
|
### High Risk
|
||||||
|
- **Parallel processing:** Complex coordination, potential race conditions
|
||||||
|
- **Major algorithm changes:** Could introduce segmentation bugs
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Success Metrics
|
||||||
|
|
||||||
|
### Performance Targets
|
||||||
|
- **Memory usage:** < 100MB for datasets up to 1M points
|
||||||
|
- **Processing time:** < 10 minutes for 1M points
|
||||||
|
- **User experience:** Progress indication and cancellation
|
||||||
|
|
||||||
|
### Monitoring Points
|
||||||
|
- Database query performance
|
||||||
|
- Memory consumption during processing
|
||||||
|
- User-reported processing times
|
||||||
|
- Track generation accuracy (no regression)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Next Steps
|
||||||
|
|
||||||
|
1. **Choose initial approach** based on urgency and resources
|
||||||
|
2. **Create feature branch** for selected optimization
|
||||||
|
3. **Implement comprehensive testing** including edge cases
|
||||||
|
4. **Monitor performance** in staging environment
|
||||||
|
5. **Gradual rollout** with feature flags
|
||||||
Loading…
Reference in a new issue