Update calculation of time spent in a country for year-end digest email

This commit is contained in:
Eugene Burmakin 2026-01-03 13:20:02 +01:00
parent b4c2def2be
commit c3ed22bb5e
7 changed files with 177 additions and 41 deletions

View file

@ -1 +1 @@
0.37.1
0.37.2

View file

@ -4,6 +4,12 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](http://keepachangelog.com/)
and this project adheres to [Semantic Versioning](http://semver.org/).
# [0.37.2] - 2026-01-03
## Fixed
- Time spent in a country and city is now calculated correctly for the year-end digest email. #2104
# [0.37.1] - 2025-12-30
## Fixed

View file

@ -88,35 +88,85 @@ module Users
end
def calculate_time_spent
country_time = Hash.new(0)
{
'countries' => calculate_country_time_spent,
'cities' => calculate_city_time_spent
}
end
def calculate_country_time_spent
country_days = build_country_days_map
# Convert days to minutes (days * 24 * 60) and return top 10
country_days
.transform_values { |days| days.size * 24 * 60 }
.sort_by { |_, minutes| -minutes }
.first(10)
.map { |name, minutes| { 'name' => name, 'minutes' => minutes } }
end
def build_country_days_map
year_points = fetch_year_points_with_country
country_days = Hash.new { |h, k| h[k] = Set.new }
year_points.each do |point|
date = Time.zone.at(point.timestamp).to_date
country_days[point.country_name].add(date)
end
country_days
end
def fetch_year_points_with_country
start_of_year = Time.zone.local(year, 1, 1, 0, 0, 0)
end_of_year = start_of_year.end_of_year
user.points
.where('timestamp >= ? AND timestamp <= ?', start_of_year.to_i, end_of_year.to_i)
.where.not(country_name: [nil, ''])
.select(:country_name, :timestamp)
end
def calculate_city_time_spent
city_time = aggregate_city_time_from_monthly_stats
city_time
.sort_by { |_, minutes| -minutes }
.first(10)
.map { |name, minutes| { 'name' => name, 'minutes' => minutes } }
end
def aggregate_city_time_from_monthly_stats
city_time = Hash.new(0)
monthly_stats.each do |stat|
toponyms = stat.toponyms
next unless toponyms.is_a?(Array)
toponyms.each do |toponym|
next unless toponym.is_a?(Hash)
country = toponym['country']
next unless toponym['cities'].is_a?(Array)
toponym['cities'].each do |city|
next unless city.is_a?(Hash)
stayed_for = city['stayed_for'].to_i
city_name = city['city']
country_time[country] += stayed_for if country.present?
city_time[city_name] += stayed_for if city_name.present?
end
end
process_stat_toponyms(stat, city_time)
end
{
'countries' => country_time.sort_by { |_, v| -v }.first(10).map { |name, minutes| { 'name' => name, 'minutes' => minutes } },
'cities' => city_time.sort_by { |_, v| -v }.first(10).map { |name, minutes| { 'name' => name, 'minutes' => minutes } }
}
city_time
end
def process_stat_toponyms(stat, city_time)
toponyms = stat.toponyms
return unless toponyms.is_a?(Array)
toponyms.each do |toponym|
process_toponym_cities(toponym, city_time)
end
end
def process_toponym_cities(toponym, city_time)
return unless toponym.is_a?(Hash)
return unless toponym['cities'].is_a?(Array)
toponym['cities'].each do |city|
next unless city.is_a?(Hash)
stayed_for = city['stayed_for'].to_i
city_name = city['city']
city_time[city_name] += stayed_for if city_name.present?
end
end
def calculate_first_time_visits

View file

@ -3,21 +3,19 @@ class InstallRailsPulseTables < ActiveRecord::Migration[8.0]
def change
# Load and execute the Rails Pulse schema directly
# This ensures the migration is always in sync with the schema file
schema_file = File.join(::Rails.root.to_s, "db/rails_pulse_schema.rb")
schema_file = Rails.root.join('db/rails_pulse_schema.rb').to_s
if File.exist?(schema_file)
say "Loading Rails Pulse schema from db/rails_pulse_schema.rb"
raise 'Rails Pulse schema file not found at db/rails_pulse_schema.rb' unless File.exist?(schema_file)
# Load the schema file to define RailsPulse::Schema
load schema_file
say 'Loading Rails Pulse schema from db/rails_pulse_schema.rb'
# Execute the schema in the context of this migration
RailsPulse::Schema.call(connection)
# Load the schema file to define RailsPulse::Schema
load schema_file
say "Rails Pulse tables created successfully"
say "The schema file db/rails_pulse_schema.rb remains as your single source of truth"
else
raise "Rails Pulse schema file not found at db/rails_pulse_schema.rb"
end
# Execute the schema in the context of this migration
RailsPulse::Schema.call(connection)
say 'Rails Pulse tables created successfully'
say 'The schema file db/rails_pulse_schema.rb remains as your single source of truth'
end
end

View file

@ -0,0 +1,21 @@
class AddIndexesToPointsForStatsQuery < ActiveRecord::Migration[8.0]
disable_ddl_transaction!
def change
# Index for counting reverse geocoded points
# This speeds up: COUNT(reverse_geocoded_at)
add_index :points, [:user_id, :reverse_geocoded_at],
where: "reverse_geocoded_at IS NOT NULL",
algorithm: :concurrently,
if_not_exists: true,
name: 'index_points_on_user_id_and_reverse_geocoded_at'
# Index for finding points with empty geodata
# This speeds up: COUNT(CASE WHEN geodata = '{}'::jsonb THEN 1 END)
add_index :points, [:user_id, :geodata],
where: "geodata = '{}'::jsonb",
algorithm: :concurrently,
if_not_exists: true,
name: 'index_points_on_user_id_and_empty_geodata'
end
end

3
db/schema.rb generated
View file

@ -10,7 +10,7 @@
#
# It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[8.0].define(version: 2025_12_28_163703) do
ActiveRecord::Schema[8.0].define(version: 2026_01_03_114630) do
# These are extensions that must be enabled in order to support this database
enable_extension "pg_catalog.plpgsql"
enable_extension "postgis"
@ -260,6 +260,7 @@ ActiveRecord::Schema[8.0].define(version: 2025_12_28_163703) do
t.index ["track_id"], name: "index_points_on_track_id"
t.index ["user_id", "city"], name: "idx_points_user_city"
t.index ["user_id", "country_name"], name: "idx_points_user_country_name"
t.index ["user_id", "geodata"], name: "index_points_on_user_id_and_empty_geodata", where: "(geodata = '{}'::jsonb)"
t.index ["user_id", "reverse_geocoded_at"], name: "index_points_on_user_id_and_reverse_geocoded_at", where: "(reverse_geocoded_at IS NOT NULL)"
t.index ["user_id", "timestamp", "track_id"], name: "idx_points_track_generation"
t.index ["user_id", "timestamp"], name: "idx_points_user_visit_null_timestamp", where: "(visit_id IS NULL)"

View file

@ -77,18 +77,78 @@ RSpec.describe Users::Digests::CalculateYear do
end
it 'calculates time spent by location' do
# Create points to enable country time calculation based on unique days
jan_1 = Time.zone.local(2024, 1, 1, 10, 0, 0).to_i
jan_2 = Time.zone.local(2024, 1, 2, 10, 0, 0).to_i
feb_1 = Time.zone.local(2024, 2, 1, 10, 0, 0).to_i
create(:point, user: user, timestamp: jan_1, country_name: 'Germany', city: 'Berlin')
create(:point, user: user, timestamp: jan_2, country_name: 'Germany', city: 'Munich')
create(:point, user: user, timestamp: feb_1, country_name: 'France', city: 'Paris')
countries = calculate_digest.time_spent_by_location['countries']
cities = calculate_digest.time_spent_by_location['cities']
expect(countries.first['name']).to eq('Germany')
expect(countries.first['minutes']).to eq(720) # 480 + 240
# Countries: based on unique days (2 days in Germany, 1 day in France)
germany_country = countries.find { |c| c['name'] == 'Germany' }
expect(germany_country['minutes']).to eq(2 * 24 * 60) # 2 days = 2880 minutes
# Cities: based on stayed_for from monthly stats (sum across months)
expect(cities.first['name']).to eq('Berlin')
expect(cities.first['minutes']).to eq(480)
end
it 'calculates all time stats' do
expect(calculate_digest.all_time_stats['total_distance']).to eq('125000')
end
context 'when user visits same country across multiple months' do
it 'does not double-count days' do
# Create a user who was in Germany for 10 days in March and 10 days in July
# If we summed the stayed_for values from cities, we might get inflated numbers
# The fix counts unique days to prevent exceeding 365 days per year
mar_start = Time.zone.local(2024, 3, 1, 10, 0, 0).to_i
jul_start = Time.zone.local(2024, 7, 1, 10, 0, 0).to_i
# Create 10 days of points in March
10.times do |i|
timestamp = mar_start + (i * 24 * 60 * 60)
create(:point, user: user, timestamp: timestamp, country_name: 'Germany', city: 'Berlin')
end
# Create 10 days of points in July
10.times do |i|
timestamp = jul_start + (i * 24 * 60 * 60)
create(:point, user: user, timestamp: timestamp, country_name: 'Germany', city: 'Munich')
end
# Create the monthly stats (simulating what would be created by the stats calculation)
create(:stat, user: user, year: 2024, month: 3, distance: 10_000, toponyms: [
{ 'country' => 'Germany', 'cities' => [
{ 'city' => 'Berlin', 'stayed_for' => 14_400 } # 10 days in minutes
] }
])
create(:stat, user: user, year: 2024, month: 7, distance: 15_000, toponyms: [
{ 'country' => 'Germany', 'cities' => [
{ 'city' => 'Munich', 'stayed_for' => 14_400 } # 10 days in minutes
] }
])
digest = calculate_digest
countries = digest.time_spent_by_location['countries']
germany = countries.find { |c| c['name'] == 'Germany' }
# Should be 20 days total (10 unique days in Mar + 10 unique days in Jul)
expected_minutes = 20 * 24 * 60 # 28,800 minutes
expect(germany['minutes']).to eq(expected_minutes)
# Verify this is less than 365 days (the bug would cause inflated numbers)
total_days = germany['minutes'] / (24 * 60)
expect(total_days).to be <= 365
end
end
context 'when digest already exists' do
let!(:existing_digest) do
create(:users_digest, user: user, year: 2024, period_type: :yearly, distance: 10_000)