私が思いついた解決策は、ある種のパーティショニングを使用することです。ほとんどの操作は O(1) または O(logn) で行われ、ユーザーによって一度行われるため、時間の複雑さは約 O(n),O(logn) になります。 Dictionary の実装方法によって異なります。
int CountUnique(Person persons)
{
Dictionary<string, int> phones = new Dictionary<string, int>(); //Keep a dictionary where each phone number is mapped to a partition
Dictionary<string, int> email = new Dictionary<string, int>(); //Keep a dictionary where each email is mapped to a partition
bool[,] linked = new bool[n, n]; //Lookup table used to tell if 2 partitions are linked (represents the same person)
int count = 0;
int max = 0;
for (int i = 0; i < n; i++)
{
Person p = persons[i];
int pA = -1, pB = -1; // Partition found using the phone number, Partition found using email
if (phones.ContainsKey(p.Phone))
{
pA = phones[p.Phone];
}
if (emails.ContainsKey(p.Email))
{
pB = emails[p.Email];
}
if (pA == -1 && pB == -1) // First case, not found: Add both phones and email and create a new partition. Number of unique persons is also incremented.
{
phones.Add(p.Phone.Trim(), max);
emails.Add(p.Email.Trim().ToLower(), max);
max++;
count++;
}
else
{
if (pA != -1 && pB != -1 && pA != pB) // Found using both parameters on different partitions
{
if (!linked[pA, pB] && !linked[pB, pA]) // If the partition are not linked, link them
{
count--; // We'lost one partition => one unique person less
linked[pA, pB] = linked[pB, pA] = true;
}
}
if (pA == -1) // We did find an existing email but no phone
{
phones.Add(p.Phone.Trim(), pB); // Add the phone number
max++;
}
if (pB == -1) // We did find an existing phone but no email
{
emails.Add(p.Email.Trim().ToLower(), pA); // Add the email number
max++;
}
}
}
return count;
}