dawarich/lib/tracks_optimization_benchmark.rb
2025-07-23 18:21:21 +02:00

625 lines
No EOL
20 KiB
Ruby
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# frozen_string_literal: true
require_relative 'optimized_tracks_v1'
require_relative 'optimized_tracks_v2'
# Benchmark script to compare three different track generation approaches:
# - Original: Individual distance queries (current implementation)
# - V1: LAG-based distance pre-calculation with Ruby segmentation
# - V2: Full SQL segmentation with PostgreSQL window functions
#
# Usage:
# rails runner lib/tracks_optimization_benchmark.rb USER_ID START_DATE END_DATE
class TracksOptimizationBenchmark
attr_reader :user, :start_date, :end_date, :start_timestamp, :end_timestamp
def initialize(user_id, start_date, end_date)
@user = User.find(user_id)
@start_date = Date.parse(start_date)
@end_date = Date.parse(end_date)
@start_timestamp = @start_date.beginning_of_day.to_i
@end_timestamp = @end_date.end_of_day.to_i
puts "🔬 Track Generation Optimization Benchmark"
puts "👤 User: #{user.email} (ID: #{user.id})"
puts "📅 Timeframe: #{start_date} to #{end_date}"
check_data_availability
end
def run_all_benchmarks
results = {}
puts "\n" + "=" * 80
puts "🏃 RUNNING ALL BENCHMARKS"
puts "=" * 80
# Test Original approach
puts "\n1⃣ Testing ORIGINAL approach..."
results[:original] = benchmark_original
# Test V1 approach
puts "\n2⃣ Testing V1 (LAG + Ruby) approach..."
results[:v1] = benchmark_v1
# Test V2 approach
puts "\n3⃣ Testing V2 (Full SQL) approach..."
results[:v2] = benchmark_v2
# Compare results
puts "\n" + "=" * 80
puts "📊 PERFORMANCE COMPARISON"
puts "=" * 80
compare_results(results)
# Save results to files
save_results_to_files(results)
results
end
private
def check_data_availability
point_count = user.tracked_points.where(timestamp: start_timestamp..end_timestamp).count
existing_tracks = user.tracks.where(start_at: Time.zone.at(start_timestamp)..Time.zone.at(end_timestamp)).count
puts "📊 Dataset: #{point_count.to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse} points"
puts "🛤️ Existing tracks: #{existing_tracks}"
if point_count == 0
puts "❌ No points found in timeframe"
exit 1
end
if point_count > 50000
puts "⚠️ Large dataset detected. This benchmark may take a while..."
end
end
def benchmark_original
puts " Using standard Tracks::Generator..."
# Clean existing tracks
cleanup_tracks
# Monitor performance
memory_start = get_memory_mb
query_monitor = QueryMonitor.new
query_monitor.start
start_time = Time.current
begin
generator = Tracks::Generator.new(
user,
start_at: Time.zone.at(start_timestamp),
end_at: Time.zone.at(end_timestamp),
mode: :bulk
)
tracks_created = generator.call
success = true
rescue => e
success = false
error = e.message
tracks_created = 0
end
end_time = Time.current
memory_end = get_memory_mb
query_monitor.stop
execution_time = end_time - start_time
result = {
approach: "Original",
success: success,
error: error,
execution_time: execution_time,
tracks_created: tracks_created,
memory_increase: memory_end - memory_start,
query_count: query_monitor.query_count,
query_time_ms: query_monitor.total_time_ms
}
print_result(result)
result
end
def benchmark_v1
puts " Using V1: LAG + Ruby segmentation..."
# Clean existing tracks
cleanup_tracks
# For V1, we need to modify the existing generator to use our optimized methods
# This is a simplified test - in practice we'd modify the actual generator
memory_start = get_memory_mb
query_monitor = QueryMonitor.new
query_monitor.start
start_time = Time.current
begin
# Load points
points = user.tracked_points
.where(timestamp: start_timestamp..end_timestamp)
.order(:timestamp)
# V1: Use optimized segmentation with pre-calculated distances
if points.size > 1
distance_data = Point.calculate_all_consecutive_distances(points)
else
distance_data = {}
end
# Segment using V1 approach (simplified for benchmark)
segments = split_points_with_precalculated_distances(points, distance_data)
tracks_created = 0
segments.each do |segment|
if segment.size >= 2
track = create_track_v1(segment)
tracks_created += 1 if track
end
end
success = true
rescue => e
success = false
error = e.message
tracks_created = 0
end
end_time = Time.current
memory_end = get_memory_mb
query_monitor.stop
execution_time = end_time - start_time
result = {
approach: "V1 (LAG + Ruby)",
success: success,
error: error,
execution_time: execution_time,
tracks_created: tracks_created,
memory_increase: memory_end - memory_start,
query_count: query_monitor.query_count,
query_time_ms: query_monitor.total_time_ms
}
print_result(result)
result
end
def benchmark_v2
puts " Using V2: Full SQL segmentation..."
cleanup_tracks
memory_start = get_memory_mb
query_monitor = QueryMonitor.new
query_monitor.start
start_time = Time.current
begin
generator = OptimizedTracksGeneratorV2.new(
user,
start_at: Time.zone.at(start_timestamp),
end_at: Time.zone.at(end_timestamp),
mode: :bulk
)
tracks_created = generator.call
success = true
rescue => e
success = false
error = e.message
tracks_created = 0
end
end_time = Time.current
memory_end = get_memory_mb
query_monitor.stop
execution_time = end_time - start_time
result = {
approach: "V2 (Full SQL)",
success: success,
error: error,
execution_time: execution_time,
tracks_created: tracks_created,
memory_increase: memory_end - memory_start,
query_count: query_monitor.query_count,
query_time_ms: query_monitor.total_time_ms
}
print_result(result)
result
end
def split_points_with_precalculated_distances(points, distance_data)
return [] if points.empty?
segments = []
current_segment = []
points.each do |point|
if current_segment.empty?
current_segment = [point]
elsif should_break_segment_v1?(point, current_segment.last, distance_data)
segments << current_segment if current_segment.size >= 2
current_segment = [point]
else
current_segment << point
end
end
segments << current_segment if current_segment.size >= 2
segments
end
def should_break_segment_v1?(current_point, previous_point, distance_data)
return false if previous_point.nil?
point_data = distance_data[current_point.id]
return false unless point_data
time_threshold_seconds = user.safe_settings.minutes_between_routes.to_i * 60
distance_threshold_meters = user.safe_settings.meters_between_routes.to_i
return true if point_data[:time_diff_seconds] > time_threshold_seconds
return true if point_data[:distance_meters] > distance_threshold_meters
false
end
def create_track_v1(points)
return nil if points.size < 2
track = Track.new(
user_id: user.id,
start_at: Time.zone.at(points.first.timestamp),
end_at: Time.zone.at(points.last.timestamp),
original_path: build_path(points)
)
# Use LAG-based distance calculation
track.distance = Point.total_distance_lag(points, :m).round
track.duration = points.last.timestamp - points.first.timestamp
track.avg_speed = calculate_average_speed(track.distance, track.duration)
# Elevation stats (same as original)
elevation_stats = calculate_elevation_stats(points)
track.elevation_gain = elevation_stats[:gain]
track.elevation_loss = elevation_stats[:loss]
track.elevation_max = elevation_stats[:max]
track.elevation_min = elevation_stats[:min]
if track.save
Point.where(id: points.map(&:id)).update_all(track_id: track.id)
track
else
nil
end
end
def cleanup_tracks
user.tracks.where(start_at: Time.zone.at(start_timestamp)..Time.zone.at(end_timestamp)).destroy_all
end
def print_result(result)
status = result[:success] ? "✅ SUCCESS" : "❌ FAILED"
puts " #{status}"
puts " ⏱️ Time: #{format_duration(result[:execution_time])}"
puts " 🛤️ Tracks: #{result[:tracks_created]}"
puts " 💾 Memory: +#{result[:memory_increase].round(1)}MB"
puts " 🗄️ Queries: #{result[:query_count]} (#{result[:query_time_ms].round(1)}ms)"
puts " ❌ Error: #{result[:error]}" if result[:error]
end
def compare_results(results)
return unless results[:original] && results[:v1] && results[:v2]
puts sprintf("%-20s %-10s %-12s %-10s %-15s %-10s",
"Approach", "Time", "Tracks", "Memory", "Queries", "Query Time")
puts "-" * 80
[:original, :v1, :v2].each do |approach|
result = results[approach]
next unless result[:success]
puts sprintf("%-20s %-10s %-12s %-10s %-15s %-10s",
result[:approach],
format_duration(result[:execution_time]),
result[:tracks_created],
"+#{result[:memory_increase].round(1)}MB",
result[:query_count],
"#{result[:query_time_ms].round(1)}ms")
end
# Calculate improvements
if results[:original][:success]
original_time = results[:original][:execution_time]
original_queries = results[:original][:query_count]
puts "\n🚀 Performance Improvements vs Original:"
if results[:v1][:success]
v1_speedup = (original_time / results[:v1][:execution_time]).round(2)
v1_query_reduction = ((original_queries - results[:v1][:query_count]) / original_queries.to_f * 100).round(1)
puts " V1: #{v1_speedup}x faster, #{v1_query_reduction}% fewer queries"
end
if results[:v2][:success]
v2_speedup = (original_time / results[:v2][:execution_time]).round(2)
v2_query_reduction = ((original_queries - results[:v2][:query_count]) / original_queries.to_f * 100).round(1)
puts " V2: #{v2_speedup}x faster, #{v2_query_reduction}% fewer queries"
end
end
end
def save_results_to_files(results)
timestamp = Time.current.strftime('%Y%m%d_%H%M%S')
point_count = user.tracked_points.where(timestamp: start_timestamp..end_timestamp).count
# Create detailed results structure
benchmark_data = {
meta: {
timestamp: Time.current.iso8601,
user_id: user.id,
user_email: user.email,
start_date: start_date.strftime('%Y-%m-%d'),
end_date: end_date.strftime('%Y-%m-%d'),
point_count: point_count,
ruby_version: RUBY_VERSION,
rails_version: Rails.version,
database_adapter: ActiveRecord::Base.connection.adapter_name
},
results: results,
performance_analysis: analyze_performance_data(results)
}
# Save JSON results for programmatic analysis
json_filename = "tracks_optimization_#{timestamp}.json"
json_path = Rails.root.join('lib', json_filename)
File.write(json_path, JSON.pretty_generate(benchmark_data))
# Save human-readable markdown report
md_filename = "tracks_optimization_#{timestamp}.md"
md_path = Rails.root.join('lib', md_filename)
File.write(md_path, generate_markdown_report(benchmark_data))
puts "\n💾 Results saved:"
puts " 📄 JSON: #{json_path}"
puts " 📝 Report: #{md_path}"
end
def analyze_performance_data(results)
return {} unless results[:original] && results[:original][:success]
original = results[:original]
analysis = {
baseline: {
execution_time: original[:execution_time],
query_count: original[:query_count],
memory_usage: original[:memory_increase]
}
}
[:v1, :v2].each do |version|
next unless results[version] && results[version][:success]
result = results[version]
analysis[version] = {
speedup_factor: (original[:execution_time] / result[:execution_time]).round(2),
query_reduction_percent: ((original[:query_count] - result[:query_count]) / original[:query_count].to_f * 100).round(1),
memory_change_percent: ((result[:memory_increase] - original[:memory_increase]) / original[:memory_increase].to_f * 100).round(1),
execution_time_saved: (original[:execution_time] - result[:execution_time]).round(2)
}
end
analysis
end
def generate_markdown_report(benchmark_data)
meta = benchmark_data[:meta]
results = benchmark_data[:results]
analysis = benchmark_data[:performance_analysis]
report = <<~MD
# Tracks Generation Optimization Benchmark Report
**Generated:** #{meta[:timestamp]}
**User:** #{meta[:user_email]} (ID: #{meta[:user_id]})
**Timeframe:** #{meta[:start_date]} to #{meta[:end_date]}
**Dataset:** #{meta[:point_count].to_s.reverse.gsub(/(\d{3})(?=\d)/, '\\1,').reverse} points
**Environment:** Ruby #{meta[:ruby_version]}, Rails #{meta[:rails_version]}, #{meta[:database_adapter]}
## Summary
This benchmark compares three approaches to track generation:
- **Original:** Individual PostGIS queries for each distance calculation
- **V1 (LAG + Ruby):** PostgreSQL LAG for batch distance calculation, Ruby segmentation
- **V2 (Full SQL):** Complete segmentation using PostgreSQL window functions
## Results
| Approach | Status | Time | Tracks | Memory | Queries | Query Time |
|----------|--------|------|--------|--------|---------|------------|
MD
[:original, :v1, :v2].each do |approach|
next unless results[approach]
result = results[approach]
status = result[:success] ? "" : ""
report += "| #{result[:approach]} | #{status} | #{format_duration(result[:execution_time])} | #{result[:tracks_created]} | +#{result[:memory_increase].round(1)}MB | #{result[:query_count]} | #{result[:query_time_ms].round(1)}ms |\n"
end
if analysis[:v1] || analysis[:v2]
report += "\n## Performance Improvements\n\n"
if analysis[:v1]
v1 = analysis[:v1]
report += "### V1 (LAG + Ruby) vs Original\n"
report += "- **#{v1[:speedup_factor]}x faster** execution\n"
report += "- **#{v1[:query_reduction_percent]}% fewer** database queries\n"
report += "- **#{format_duration(v1[:execution_time_saved])} time saved**\n"
report += "- Memory change: #{v1[:memory_change_percent] > 0 ? '+' : ''}#{v1[:memory_change_percent]}%\n\n"
end
if analysis[:v2]
v2 = analysis[:v2]
report += "### V2 (Full SQL) vs Original\n"
report += "- **#{v2[:speedup_factor]}x faster** execution\n"
report += "- **#{v2[:query_reduction_percent]}% fewer** database queries\n"
report += "- **#{format_duration(v2[:execution_time_saved])} time saved**\n"
report += "- Memory change: #{v2[:memory_change_percent] > 0 ? '+' : ''}#{v2[:memory_change_percent]}%\n\n"
end
end
# Add detailed results
report += "## Detailed Results\n\n"
[:original, :v1, :v2].each do |approach|
next unless results[approach]
result = results[approach]
report += "### #{result[:approach]}\n\n"
if result[:success]
report += "- ✅ **Status:** Success\n"
report += "- ⏱️ **Execution Time:** #{format_duration(result[:execution_time])}\n"
report += "- 🛤️ **Tracks Created:** #{result[:tracks_created]}\n"
report += "- 💾 **Memory Increase:** +#{result[:memory_increase].round(1)}MB\n"
report += "- 🗄️ **Database Queries:** #{result[:query_count]}\n"
report += "- ⚡ **Query Time:** #{result[:query_time_ms].round(1)}ms\n"
if result[:query_count] > 0
avg_query_time = (result[:query_time_ms] / result[:query_count]).round(2)
report += "- 📊 **Average Query Time:** #{avg_query_time}ms\n"
end
else
report += "- ❌ **Status:** Failed\n"
report += "- 🚨 **Error:** #{result[:error]}\n"
end
report += "\n"
end
report += "## Recommendations\n\n"
if analysis[:v2] && analysis[:v2][:speedup_factor] > analysis.dig(:v1, :speedup_factor).to_f
report += "🚀 **V2 (Full SQL)** shows the best performance with #{analysis[:v2][:speedup_factor]}x speedup.\n\n"
report += "Benefits:\n"
report += "- Minimal database queries (#{results.dig(:v2, :query_count)} vs #{results.dig(:original, :query_count)})\n"
report += "- Fastest execution time\n"
report += "- Leverages PostgreSQL's optimized window functions\n\n"
elsif analysis[:v1]
report += "🏃 **V1 (LAG + Ruby)** provides good performance improvements with #{analysis[:v1][:speedup_factor]}x speedup.\n\n"
end
if results[:original] && results[:original][:query_count] > 50000
report += "⚠️ **Current implementation** makes excessive database queries (#{results[:original][:query_count]}) for this dataset size.\n\n"
end
report += "---\n*Generated by TracksOptimizationBenchmark*"
report
end
# Helper methods
def get_memory_mb
`ps -o rss= -p #{Process.pid}`.to_i / 1024.0
end
def format_duration(seconds)
if seconds < 60
"#{seconds.round(1)}s"
else
minutes = (seconds / 60).floor
remaining_seconds = (seconds % 60).round(1)
"#{minutes}m #{remaining_seconds}s"
end
end
def build_path(points)
Tracks::BuildPath.new(points).call
end
def calculate_average_speed(distance_in_meters, duration_seconds)
return 0.0 if duration_seconds <= 0 || distance_in_meters <= 0
speed_mps = distance_in_meters.to_f / duration_seconds
(speed_mps * 3.6).round(2)
end
def calculate_elevation_stats(points)
altitudes = points.map(&:altitude).compact
return { gain: 0, loss: 0, max: 0, min: 0 } if altitudes.empty?
elevation_gain = 0
elevation_loss = 0
previous_altitude = altitudes.first
altitudes[1..].each do |altitude|
diff = altitude - previous_altitude
if diff > 0
elevation_gain += diff
else
elevation_loss += diff.abs
end
previous_altitude = altitude
end
{ gain: elevation_gain.round, loss: elevation_loss.round, max: altitudes.max, min: altitudes.min }
end
end
# Simple query monitor for this benchmark
class QueryMonitor
attr_reader :query_count, :total_time_ms
def initialize
@query_count = 0
@total_time_ms = 0
end
def start
@subscription = ActiveSupport::Notifications.subscribe('sql.active_record') do |*args|
event = ActiveSupport::Notifications::Event.new(*args)
next if event.payload[:name]&.include?('SCHEMA')
@query_count += 1
@total_time_ms += event.duration
end
end
def stop
ActiveSupport::Notifications.unsubscribe(@subscription) if @subscription
end
end
# Command line interface
if __FILE__ == $0
if ARGV.length < 3
puts "Usage: rails runner #{__FILE__} USER_ID START_DATE END_DATE"
puts ""
puts "Example:"
puts " rails runner #{__FILE__} 1 2024-01-01 2024-01-31"
exit 1
end
user_id = ARGV[0].to_i
start_date = ARGV[1]
end_date = ARGV[2]
benchmark = TracksOptimizationBenchmark.new(user_id, start_date, end_date)
results = benchmark.run_all_benchmarks
puts "\n🎉 Benchmark completed! Check results above."
end