Add duplicate detection to business import

- Skip businesses that already exist (by slug)
- Track skipped count in import results
- Display skipped count in admin import UI

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
Kevin Sivic 2025-12-01 08:14:42 -05:00
parent f62ddffb80
commit 94cb0870ff
5 changed files with 143 additions and 191 deletions

View file

@ -5,13 +5,26 @@ import Config
# The MIX_TEST_PARTITION environment variable can be used # The MIX_TEST_PARTITION environment variable can be used
# to provide built-in test partitioning in CI environment. # to provide built-in test partitioning in CI environment.
# Run `mix help test` for more information. # Run `mix help test` for more information.
config :localspot, Localspot.Repo, database_url = System.get_env("DATABASE_URL")
username: "kevinsivic",
password: "", db_config =
hostname: "localhost", if database_url do
database: "localspot_test#{System.get_env("MIX_TEST_PARTITION")}", [url: database_url]
pool: Ecto.Adapters.SQL.Sandbox, else
pool_size: System.schedulers_online() * 2 [
username: "kevinsivic",
password: "",
hostname: "localhost",
database: "localspot_test#{System.get_env("MIX_TEST_PARTITION")}"
]
end
config :localspot,
Localspot.Repo,
Keyword.merge(db_config,
pool: Ecto.Adapters.SQL.Sandbox,
pool_size: System.schedulers_online() * 2
)
# We don't run a server during test. If one is required, # We don't run a server during test. If one is required,
# you can enable the server option below. # you can enable the server option below.

View file

@ -41,7 +41,9 @@ defmodule Localspot.Businesses.Import do
require Logger require Logger
@type import_result :: {:ok, %{imported: non_neg_integer(), errors: list()}} | {:error, term()} @type import_result ::
{:ok, %{imported: non_neg_integer(), skipped: non_neg_integer(), errors: list()}}
| {:error, term()}
@doc """ @doc """
Imports businesses from a JSON file path. Imports businesses from a JSON file path.
@ -73,18 +75,20 @@ defmodule Localspot.Businesses.Import do
defp import_businesses(businesses) do defp import_businesses(businesses) do
categories = load_categories() categories = load_categories()
existing_slugs = load_existing_slugs()
results = results =
businesses businesses
|> Enum.with_index(1) |> Enum.with_index(1)
|> Enum.map(fn {business_data, index} -> |> Enum.map(fn {business_data, index} ->
import_single_business(business_data, categories, index) import_single_business(business_data, categories, existing_slugs, index)
end) end)
imported = Enum.count(results, &match?({:ok, _}, &1)) imported = Enum.count(results, &match?({:ok, _}, &1))
skipped = Enum.count(results, &match?({:skipped, _, _}, &1))
errors = Enum.filter(results, &match?({:error, _, _}, &1)) errors = Enum.filter(results, &match?({:error, _, _}, &1))
{:ok, %{imported: imported, errors: errors}} {:ok, %{imported: imported, skipped: skipped, errors: errors}}
end end
defp load_categories do defp load_categories do
@ -96,7 +100,24 @@ defmodule Localspot.Businesses.Import do
end) end)
end end
defp import_single_business(data, categories, index) do defp load_existing_slugs do
Businesses.list_all_businesses()
|> Enum.map(& &1.slug)
|> MapSet.new()
end
defp import_single_business(data, categories, existing_slugs, index) do
slug = Businesses.generate_slug(data["name"] || "")
if MapSet.member?(existing_slugs, slug) do
Logger.info("Skipping duplicate business: #{data["name"]} (slug: #{slug})")
{:skipped, index, slug}
else
do_import_business(data, categories, index)
end
end
defp do_import_business(data, categories, index) do
with {:ok, business_attrs} <- build_business_attrs(data, categories), with {:ok, business_attrs} <- build_business_attrs(data, categories),
{:ok, hours_attrs} <- build_hours_attrs(data), {:ok, hours_attrs} <- build_hours_attrs(data),
{:ok, photos_attrs} <- build_photos_attrs(data), {:ok, photos_attrs} <- build_photos_attrs(data),

View file

@ -144,12 +144,17 @@ defmodule LocalspotWeb.AdminLive.Import do
</.form> </.form>
<div :if={@import_result} class="mt-6"> <div :if={@import_result} class="mt-6">
<div :if={match?(%{imported: _, errors: _}, @import_result)} class="space-y-4"> <div :if={match?(%{imported: _, skipped: _, errors: _}, @import_result)} class="space-y-4">
<div class="alert alert-success"> <div class="alert alert-success">
<.icon name="hero-check-circle" class="w-5 h-5" /> <.icon name="hero-check-circle" class="w-5 h-5" />
<span>Successfully imported {@import_result.imported} business(es)</span> <span>Successfully imported {@import_result.imported} business(es)</span>
</div> </div>
<div :if={@import_result.skipped > 0} class="alert alert-info">
<.icon name="hero-information-circle" class="w-5 h-5" />
<span>Skipped {@import_result.skipped} duplicate(s)</span>
</div>
<div :if={length(@import_result.errors) > 0} class="alert alert-warning"> <div :if={length(@import_result.errors) > 0} class="alert alert-warning">
<.icon name="hero-exclamation-triangle" class="w-5 h-5" /> <.icon name="hero-exclamation-triangle" class="w-5 h-5" />
<div> <div>

View file

@ -11,192 +11,75 @@
# and so on) as they will fail if something goes wrong. # and so on) as they will fail if something goes wrong.
alias Localspot.Repo alias Localspot.Repo
alias Localspot.Businesses.{Category, Business, BusinessHour, BusinessPhoto} alias Localspot.Businesses.Category
# Clear existing data # Create categories (only if they don't exist)
Repo.delete_all(BusinessPhoto) categories_data = [
Repo.delete_all(BusinessHour)
Repo.delete_all(Business)
Repo.delete_all(Category)
# Create categories
categories =
[
%{
name: "Restaurants",
slug: "restaurants",
description: "Local dining establishments",
icon: "hero-cake"
},
%{
name: "Coffee Shops",
slug: "coffee-shops",
description: "Cafes and coffee houses",
icon: "hero-cup-soda"
},
%{name: "Retail", slug: "retail", description: "Shops and stores", icon: "hero-shopping-bag"},
%{
name: "Services",
slug: "services",
description: "Professional services",
icon: "hero-wrench-screwdriver"
},
%{
name: "Arts & Entertainment",
slug: "arts-entertainment",
description: "Galleries, theaters, and venues",
icon: "hero-paint-brush"
}
]
|> Enum.map(fn attrs ->
%Category{}
|> Category.changeset(attrs)
|> Repo.insert!()
end)
[restaurants, coffee, retail, services, arts] = categories
# Sample businesses - using Columbus, OH area coordinates
businesses_data = [
%{ %{
name: "The Cozy Bean", name: "Restaurants",
slug: "the-cozy-bean", slug: "restaurants",
description: description: "Local dining establishments",
"A family-owned coffee shop serving locally roasted beans and homemade pastries since 1998.", icon: "hero-cake"
street_address: "123 High Street",
city: "Columbus",
state: "OH",
zip_code: "43215",
latitude: Decimal.new("39.9612"),
longitude: Decimal.new("-82.9988"),
phone: "6145551234",
email: "hello@cozybean.example",
website: "https://cozybean.example",
locally_owned: true,
category_id: coffee.id
}, },
%{ %{
name: "Mama Rosa's Kitchen", name: "Coffee Shops",
slug: "mama-rosas-kitchen", slug: "coffee-shops",
description: description: "Cafes and coffee houses",
"Authentic Italian cuisine made with recipes passed down through four generations.", icon: "hero-cup-soda"
street_address: "456 Main Street", },
city: "Columbus", %{name: "Retail", slug: "retail", description: "Shops and stores", icon: "hero-shopping-bag"},
state: "OH", %{
zip_code: "43215", name: "Services",
latitude: Decimal.new("39.9650"), slug: "services",
longitude: Decimal.new("-83.0020"), description: "Professional services",
phone: "6145555678", icon: "hero-wrench-screwdriver"
email: "reservations@mamarosas.example",
website: "https://mamarosas.example",
locally_owned: true,
category_id: restaurants.id
}, },
%{ %{
name: "Buckeye Books", name: "Arts & Entertainment",
slug: "buckeye-books", slug: "arts-entertainment",
description: "Independent bookstore specializing in local authors and rare finds.", description: "Galleries, theaters, and venues",
street_address: "789 Oak Avenue", icon: "hero-paint-brush"
city: "Columbus",
state: "OH",
zip_code: "43215",
latitude: Decimal.new("39.9580"),
longitude: Decimal.new("-82.9950"),
phone: "6145559012",
locally_owned: true,
category_id: retail.id
}, },
%{ %{
name: "Short North Gallery", name: "Breweries",
slug: "short-north-gallery", slug: "breweries",
description: "Contemporary art gallery featuring works by Ohio artists.", description: "Craft breweries and taprooms",
street_address: "321 Short North Ave", icon: "hero-beaker"
city: "Columbus",
state: "OH",
zip_code: "43201",
latitude: Decimal.new("39.9750"),
longitude: Decimal.new("-83.0030"),
locally_owned: true,
category_id: arts.id
}, },
%{ %{
name: "Fix-It Fred's", name: "Wineries",
slug: "fix-it-freds", slug: "wineries",
description: description: "Wineries and vineyards",
"Family-owned repair shop for electronics, appliances, and more. If it's broken, Fred can fix it!", icon: "hero-sparkles"
street_address: "555 Repair Lane",
city: "Columbus",
state: "OH",
zip_code: "43215",
latitude: Decimal.new("39.9520"),
longitude: Decimal.new("-83.0100"),
phone: "6145553456",
email: "fred@fixitfreds.example",
locally_owned: true,
category_id: services.id
}, },
%{ %{
name: "German Village Bakery", name: "Outdoor Recreation",
slug: "german-village-bakery", slug: "outdoor-recreation",
description: "Traditional German pastries and breads baked fresh daily.", description: "Outdoor gear, guides, and adventure",
street_address: "888 Schiller Park", icon: "hero-sun"
city: "Columbus", },
state: "OH", %{
zip_code: "43206", name: "Farm Markets",
latitude: Decimal.new("39.9430"), slug: "farm-markets",
longitude: Decimal.new("-82.9920"), description: "Farm stands, orchards, and local produce",
phone: "6145557890", icon: "hero-shopping-cart"
locally_owned: true,
category_id: restaurants.id
} }
] ]
businesses = created_count =
businesses_data categories_data
|> Enum.map(fn attrs -> |> Enum.reduce(0, fn attrs, count ->
%Business{} case Repo.get_by(Category, slug: attrs.slug) do
|> Business.changeset(attrs) nil ->
|> Repo.insert!() %Category{}
|> Category.changeset(attrs)
|> Repo.insert!()
count + 1
_existing ->
count
end
end) end)
# Add hours for each business (most open 9-5 or similar) IO.puts("Seeded #{created_count} new categories (#{length(categories_data)} total defined)")
for business <- businesses do
# Monday through Friday: 9 AM - 5 PM (or restaurant hours)
is_restaurant = business.category_id == restaurants.id
for day <- 1..5 do
%BusinessHour{}
|> BusinessHour.changeset(%{
business_id: business.id,
day_of_week: day,
opens_at: if(is_restaurant, do: ~T[11:00:00], else: ~T[09:00:00]),
closes_at: if(is_restaurant, do: ~T[21:00:00], else: ~T[17:00:00]),
closed: false
})
|> Repo.insert!()
end
# Saturday: shorter hours
%BusinessHour{}
|> BusinessHour.changeset(%{
business_id: business.id,
day_of_week: 6,
opens_at: ~T[10:00:00],
closes_at: ~T[15:00:00],
closed: false
})
|> Repo.insert!()
# Sunday: closed (except restaurants)
%BusinessHour{}
|> BusinessHour.changeset(%{
business_id: business.id,
day_of_week: 0,
opens_at: if(is_restaurant, do: ~T[12:00:00], else: nil),
closes_at: if(is_restaurant, do: ~T[20:00:00], else: nil),
closed: !is_restaurant
})
|> Repo.insert!()
end
IO.puts("Seeded #{length(categories)} categories and #{length(businesses)} businesses")

View file

@ -37,13 +37,40 @@ defmodule Localspot.Businesses.ImportTest do
} }
""" """
assert {:ok, %{imported: 1, errors: []}} = Import.from_json(json) assert {:ok, %{imported: 1, skipped: 0, errors: []}} = Import.from_json(json)
business = Businesses.get_business_by_slug("test-business") business = Businesses.get_business_by_slug("test-business")
assert business.name == "Test Business" assert business.name == "Test Business"
assert business.category_id == category.id assert business.category_id == category.id
end end
test "skips duplicate businesses", %{category: _category} do
json = """
{
"businesses": [
{
"name": "Duplicate Test",
"category": "test-category",
"street_address": "123 Test St",
"city": "Columbus",
"state": "OH",
"zip_code": "43215"
}
]
}
"""
# First import should succeed
assert {:ok, %{imported: 1, skipped: 0, errors: []}} = Import.from_json(json)
# Second import should skip the duplicate
assert {:ok, %{imported: 0, skipped: 1, errors: []}} = Import.from_json(json)
# Should still only have one business
businesses = Businesses.list_businesses(%{query: "Duplicate Test"})
assert length(businesses) == 1
end
test "imports business with hours", %{category: _category} do test "imports business with hours", %{category: _category} do
json = """ json = """
{ {
@ -64,7 +91,7 @@ defmodule Localspot.Businesses.ImportTest do
} }
""" """
assert {:ok, %{imported: 1, errors: []}} = Import.from_json(json) assert {:ok, %{imported: 1, skipped: 0, errors: []}} = Import.from_json(json)
business = Businesses.get_business_by_slug("business-with-hours") business = Businesses.get_business_by_slug("business-with-hours")
assert length(business.hours) == 2 assert length(business.hours) == 2
@ -96,7 +123,7 @@ defmodule Localspot.Businesses.ImportTest do
} }
""" """
assert {:ok, %{imported: 1, errors: []}} = Import.from_json(json) assert {:ok, %{imported: 1, skipped: 0, errors: []}} = Import.from_json(json)
business = Businesses.get_business_by_slug("business-with-photos") business = Businesses.get_business_by_slug("business-with-photos")
assert length(business.photos) == 1 assert length(business.photos) == 1
@ -128,7 +155,7 @@ defmodule Localspot.Businesses.ImportTest do
} }
""" """
assert {:ok, %{imported: 2, errors: []}} = Import.from_json(json) assert {:ok, %{imported: 2, skipped: 0, errors: []}} = Import.from_json(json)
end end
test "reports errors for invalid businesses", %{category: _category} do test "reports errors for invalid businesses", %{category: _category} do
@ -150,7 +177,7 @@ defmodule Localspot.Businesses.ImportTest do
} }
""" """
assert {:ok, %{imported: 1, errors: errors}} = Import.from_json(json) assert {:ok, %{imported: 1, skipped: 0, errors: errors}} = Import.from_json(json)
assert length(errors) == 1 assert length(errors) == 1
end end
@ -179,8 +206,11 @@ defmodule Localspot.Businesses.ImportTest do
""" """
assert {:ok, assert {:ok,
%{imported: 0, errors: [{:error, 1, {:unknown_category, "nonexistent-category"}}]}} = %{
Import.from_json(json) imported: 0,
skipped: 0,
errors: [{:error, 1, {:unknown_category, "nonexistent-category"}}]
}} = Import.from_json(json)
end end
end end