Consider MIN_MINUTES_SPENT_IN_CITY during stats calculation

This commit is contained in:
Eugene Burmakin 2025-12-08 21:32:41 +01:00
parent 6cc8ba0fbd
commit 6cfea8f1b7
5 changed files with 134 additions and 6 deletions

View file

@ -10,6 +10,10 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
- Setting `ARCHIVE_RAW_DATA` env var to true will enable monthly raw data archiving for all users. It will look for points older than 2 months with `raw_data` column not empty and create a zip archive containing raw data files for each month. After successful archiving, raw data will be removed from the database to save space. Monthly archiving job is being run every day at 2:00 AM. Default env var value is false. - Setting `ARCHIVE_RAW_DATA` env var to true will enable monthly raw data archiving for all users. It will look for points older than 2 months with `raw_data` column not empty and create a zip archive containing raw data files for each month. After successful archiving, raw data will be removed from the database to save space. Monthly archiving job is being run every day at 2:00 AM. Default env var value is false.
## Fixed
- Cities visited during a trip are now being calculated correctly. #547
# [0.36.2] - 2025-12-06 # [0.36.2] - 2025-12-06

View file

@ -66,8 +66,7 @@ class Stats::CalculateMonth
.points .points
.without_raw_data .without_raw_data
.where(timestamp: start_timestamp..end_timestamp) .where(timestamp: start_timestamp..end_timestamp)
.select(:city, :country_name) .select(:city, :country_name, :timestamp)
.distinct
CountriesAndCities.new(toponym_points).call CountriesAndCities.new(toponym_points).call
end end

View file

@ -0,0 +1,18 @@
# frozen_string_literal: true
class AddCompositeIndexToStats < ActiveRecord::Migration[8.0]
disable_ddl_transaction!
def change
# Add composite index for the most common stats lookup pattern:
# Stat.find_or_initialize_by(year:, month:, user:)
# This query is called on EVERY stats calculation
#
# Using algorithm: :concurrently to avoid locking the table during index creation
# This is crucial for production deployments with existing data
add_index :stats, %i[user_id year month],
name: 'index_stats_on_user_id_year_month',
unique: true,
algorithm: :concurrently
end
end

7
db/schema.rb generated
View file

@ -10,7 +10,7 @@
# #
# It's strongly recommended that you check this file into your version control system. # It's strongly recommended that you check this file into your version control system.
ActiveRecord::Schema[8.0].define(version: 2025_12_06_000004) do ActiveRecord::Schema[8.0].define(version: 2025_12_08_210410) do
# These are extensions that must be enabled in order to support this database # These are extensions that must be enabled in order to support this database
enable_extension "pg_catalog.plpgsql" enable_extension "pg_catalog.plpgsql"
enable_extension "postgis" enable_extension "postgis"
@ -226,8 +226,6 @@ ActiveRecord::Schema[8.0].define(version: 2025_12_06_000004) do
t.string "country_name" t.string "country_name"
t.boolean "raw_data_archived", default: false, null: false t.boolean "raw_data_archived", default: false, null: false
t.bigint "raw_data_archive_id" t.bigint "raw_data_archive_id"
t.integer "timestamp_year"
t.integer "timestamp_month"
t.index ["altitude"], name: "index_points_on_altitude" t.index ["altitude"], name: "index_points_on_altitude"
t.index ["battery"], name: "index_points_on_battery" t.index ["battery"], name: "index_points_on_battery"
t.index ["battery_status"], name: "index_points_on_battery_status" t.index ["battery_status"], name: "index_points_on_battery_status"
@ -251,7 +249,6 @@ ActiveRecord::Schema[8.0].define(version: 2025_12_06_000004) do
t.index ["user_id", "country_name"], name: "idx_points_user_country_name" t.index ["user_id", "country_name"], name: "idx_points_user_country_name"
t.index ["user_id", "reverse_geocoded_at"], name: "index_points_on_user_id_and_reverse_geocoded_at", where: "(reverse_geocoded_at IS NOT NULL)" t.index ["user_id", "reverse_geocoded_at"], name: "index_points_on_user_id_and_reverse_geocoded_at", where: "(reverse_geocoded_at IS NOT NULL)"
t.index ["user_id", "timestamp", "track_id"], name: "idx_points_track_generation" t.index ["user_id", "timestamp", "track_id"], name: "idx_points_track_generation"
t.index ["user_id", "timestamp_year", "timestamp_month", "raw_data_archived"], name: "index_points_on_user_time_archived"
t.index ["user_id"], name: "index_points_on_user_id" t.index ["user_id"], name: "index_points_on_user_id"
t.index ["visit_id"], name: "index_points_on_visit_id" t.index ["visit_id"], name: "index_points_on_visit_id"
end end
@ -288,6 +285,7 @@ ActiveRecord::Schema[8.0].define(version: 2025_12_06_000004) do
t.index ["h3_hex_ids"], name: "index_stats_on_h3_hex_ids", where: "((h3_hex_ids IS NOT NULL) AND (h3_hex_ids <> '{}'::jsonb))", using: :gin t.index ["h3_hex_ids"], name: "index_stats_on_h3_hex_ids", where: "((h3_hex_ids IS NOT NULL) AND (h3_hex_ids <> '{}'::jsonb))", using: :gin
t.index ["month"], name: "index_stats_on_month" t.index ["month"], name: "index_stats_on_month"
t.index ["sharing_uuid"], name: "index_stats_on_sharing_uuid", unique: true t.index ["sharing_uuid"], name: "index_stats_on_sharing_uuid", unique: true
t.index ["user_id", "year", "month"], name: "index_stats_on_user_id_year_month", unique: true
t.index ["user_id"], name: "index_stats_on_user_id" t.index ["user_id"], name: "index_stats_on_user_id"
t.index ["year"], name: "index_stats_on_year" t.index ["year"], name: "index_stats_on_year"
end end
@ -374,6 +372,7 @@ ActiveRecord::Schema[8.0].define(version: 2025_12_06_000004) do
t.string "utm_term" t.string "utm_term"
t.string "utm_content" t.string "utm_content"
t.index ["email"], name: "index_users_on_email", unique: true t.index ["email"], name: "index_users_on_email", unique: true
t.index ["provider", "uid"], name: "index_users_on_provider_and_uid", unique: true
t.index ["reset_password_token"], name: "index_users_on_reset_password_token", unique: true t.index ["reset_password_token"], name: "index_users_on_reset_password_token", unique: true
end end

View file

@ -93,6 +93,114 @@ RSpec.describe Stats::CalculateMonth do
expect(user.stats.last.distance).to be_within(1000).of(340_000) expect(user.stats.last.distance).to be_within(1000).of(340_000)
end end
end end
context 'when calculating visited cities and countries' do
let(:timestamp_base) { DateTime.new(year, month, 1, 12).to_i }
let!(:import) { create(:import, user:) }
context 'when user spent more than MIN_MINUTES_SPENT_IN_CITY in a city' do
let!(:berlin_points) do
[
create(:point, user:, import:, timestamp: timestamp_base,
city: 'Berlin', country_name: 'Germany',
lonlat: 'POINT(13.404954 52.520008)'),
create(:point, user:, import:, timestamp: timestamp_base + 30.minutes,
city: 'Berlin', country_name: 'Germany',
lonlat: 'POINT(13.404954 52.520008)'),
create(:point, user:, import:, timestamp: timestamp_base + 70.minutes,
city: 'Berlin', country_name: 'Germany',
lonlat: 'POINT(13.404954 52.520008)')
]
end
it 'includes the city in toponyms' do
calculate_stats
stat = user.stats.last
expect(stat.toponyms).not_to be_empty
expect(stat.toponyms.first['country']).to eq('Germany')
expect(stat.toponyms.first['cities']).not_to be_empty
expect(stat.toponyms.first['cities'].first['city']).to eq('Berlin')
end
end
context 'when user spent less than MIN_MINUTES_SPENT_IN_CITY in a city' do
let!(:prague_points) do
[
create(:point, user:, import:, timestamp: timestamp_base,
city: 'Prague', country_name: 'Czech Republic',
lonlat: 'POINT(14.4378 50.0755)'),
create(:point, user:, import:, timestamp: timestamp_base + 10.minutes,
city: 'Prague', country_name: 'Czech Republic',
lonlat: 'POINT(14.4378 50.0755)'),
create(:point, user:, import:, timestamp: timestamp_base + 20.minutes,
city: 'Prague', country_name: 'Czech Republic',
lonlat: 'POINT(14.4378 50.0755)')
]
end
it 'excludes the city from toponyms' do
calculate_stats
stat = user.stats.last
expect(stat.toponyms).not_to be_empty
# Country should be listed but with no cities
czech_country = stat.toponyms.find { |t| t['country'] == 'Czech Republic' }
expect(czech_country).not_to be_nil
expect(czech_country['cities']).to be_empty
end
end
context 'when user visited multiple cities with mixed durations' do
let!(:mixed_points) do
[
# Berlin: 70 minutes (should be included)
create(:point, user:, import:, timestamp: timestamp_base,
city: 'Berlin', country_name: 'Germany',
lonlat: 'POINT(13.404954 52.520008)'),
create(:point, user:, import:, timestamp: timestamp_base + 70.minutes,
city: 'Berlin', country_name: 'Germany',
lonlat: 'POINT(13.404954 52.520008)'),
# Prague: 20 minutes (should be excluded)
create(:point, user:, import:, timestamp: timestamp_base + 100.minutes,
city: 'Prague', country_name: 'Czech Republic',
lonlat: 'POINT(14.4378 50.0755)'),
create(:point, user:, import:, timestamp: timestamp_base + 120.minutes,
city: 'Prague', country_name: 'Czech Republic',
lonlat: 'POINT(14.4378 50.0755)'),
# Vienna: 90 minutes (should be included)
create(:point, user:, import:, timestamp: timestamp_base + 150.minutes,
city: 'Vienna', country_name: 'Austria',
lonlat: 'POINT(16.3738 48.2082)'),
create(:point, user:, import:, timestamp: timestamp_base + 240.minutes,
city: 'Vienna', country_name: 'Austria',
lonlat: 'POINT(16.3738 48.2082)')
]
end
it 'only includes cities where user spent >= MIN_MINUTES_SPENT_IN_CITY' do
calculate_stats
stat = user.stats.last
expect(stat.toponyms).not_to be_empty
# Get all cities from all countries
all_cities = stat.toponyms.flat_map { |t| t['cities'].map { |c| c['city'] } }
# Berlin and Vienna should be included
expect(all_cities).to include('Berlin', 'Vienna')
# Prague should NOT be included
expect(all_cities).not_to include('Prague')
# Should have exactly 2 cities
expect(all_cities.size).to eq(2)
end
end
end
end end
end end
end end