This code uses a technique to map out related entities, placing in the same group. This is useful for applications such as identifying entities that share one or more relationships.
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
-- =============================================
-- Author: Jay Kidd
-- Create date: 2024-05-23
-- Description: Rationalises Entity relationships into a common network
-- =============================================
CREATE OR ALTER PROCEDURE [utl].[Entity Grouping] (
@Relations [utl].[Entity Relations In] READONLY,
@MaxIterations TINYINT = 255 -- max number of UPDATE passes until failure
)
AS
BEGIN
/* LOGIC:
1. Make relationships symmetric, so {A->B} becomes {A-B} + {B->A} to simplify network exploration query to a single JOIN
2. Pick an initial Group that is the lowest id of the entities. End-game is the Group id will be lowest Entity id within the network.
3. Iterate over the relationships:
Look for the lowest group a entity is associated with. If lower than its current group, set the lower value (rationalise the network).
For example, relationships {A->B} and {B->C} would be processed as:
Iteration 1:
input : {A-B}=>'A', {B-A}=>'A', {B-C}=>'B', {C-B}=>'B'
process: {A}'s lowest group is currently 'A', so no change. {B}'s lowest group is 'A', but {B-C}=>'B' so needs to be updated. {C}'s lowest group is currently 'B', so no change.
output : {A-B}=>'A', {B-A}=>'A', {B-C}=>*'A', {C-B}=>'B'
changes: 1
Iteration 2:
input : {A-B}=>'A', {B-A}=>'A', {B-C}=>'A', {C-B}=>'B'
process: {A}'s lowest group is currently 'A', so no change. {B}'s lowest group is 'A', so no change. {C}'s lowest group is currently 'A', but {C-B}=>'B' so needs to be updated.
output : {A-B}=>'A', {B-A}=>'A', {B-C}=>'A', {C-B}=>*'A'
changes: 1
Iteration 3:
input : {A-B}=>'A', {B-A}=>'A', {B-C}=>'A', {C-B}=>'A'
process: {A}'s lowest group is currently 'A', so no change. {B}'s lowest group is 'A', so no change. {C}'s lowest group is currently 'B', so no change.
output : {A-B}=>'A', {B-A}=>'A', {B-C}=>'A', {C-B}=>'A'
changes: 0 = *STOP* Network is stable!
4. Ensure the maximum number of iterations wasn't exceeded; if so error
5. Return distinct entities and their lowest Entity group
*/
-- SET NOCOUNT ON added to prevent extra result sets from interfering with SELECT statements.
SET NOCOUNT ON;
-- setup variables
DECLARE @msg NVARCHAR(max);
-- create an analysis table; make it a temp table to support large datasets that don't fit in memory
DROP TABLE IF EXISTS #entity_grouping_analysis_internal;
CREATE TABLE #entity_grouping_internal (
[Group] UNIQUEIDENTIFIER NOT NULL,
[Entity A] UNIQUEIDENTIFIER NOT NULL,
[Entity B] UNIQUEIDENTIFIER NOT NULL,
PRIMARY KEY CLUSTERED ([Entity A], [Entity B])
);
-- populate it with initial values
INSERT INTO
#entity_grouping_internal
SELECT DISTINCT
CASE WHEN [Entity A] < [Entity B] THEN [Entity A] ELSE [Entity B] END AS [Group], -- choose the lowest id
[Entity A],
[Entity B]
FROM
@Relations AS r
UNION -- needs to be symmetric
SELECT DISTINCT
CASE WHEN [Entity A] < [Entity B] THEN [Entity A] ELSE [Entity B] END AS [Group], -- choose the lowest id
[Entity B] AS [Entity A],
[Entity A] AS [Entity B]
FROM
@Relations AS r
-- create a table to track changes
DECLARE @chg TABLE ([chg] BIT);
-- cycle until no updates
DECLARE @chgs INT = 0, @iterations TINYINT = 0;
WHILE @iterations < @MaxIterations
BEGIN
-- detect wider relationships
;WITH relations AS (
-- look for a lower GROUP within the network
SELECT
c.[Group],
MIN(l.[Group]) AS [Lower]
FROM
#entity_grouping_internal AS c
INNER JOIN
#entity_grouping_internal AS l
ON c.[Entity A] = l.[Entity B]
GROUP BY
c.[Group]
HAVING
MIN(l.[Group]) < c.[Group]
)
-- update
UPDATE
c
SET
c.[Group] = r.[Lower] -- update the group to the lower group id
OUTPUT 1 INTO @chg -- track how many record changes
FROM
#entity_grouping_internal AS c
INNER JOIN
relations AS r
ON c.[Group] = r.[Group] -- group
-- check if any changes
SELECT @chgs = COUNT(*) FROM @chg
SET @msg = ' Iteration ' + FORMAT(@iterations, '#,##0') + ' with ' + FORMAT(@chgs,'#,##0') + ' changes...'
RAISERROR(@msg, 0, 0) WITH NOWAIT;
IF @chgs = 0 BREAK; -- if no changes, break out of looping - the network is now rational and stable!
-- prepare for another cycle
DELETE FROM @chg;
SET @iterations += 1;
END
-- check of max number of iterations failure
IF @MaxIterations <= @iterations
THROW 50000, 'The maximum number of grouping UPDATE iterations was hit, without the network stabilising!', 0
-- show the result
SELECT DISTINCT
c.[Group],
c.[Entity A] AS [Entity Member]
FROM
#entity_grouping_internal AS c
END
GO
Using this code:
/*** CREATE TYPES - This is a one off... ***/
-- create a table type for passing relationships in
-- DROP TYPE [utl].[Entity Relations In]
IF TYPE_ID(N'utl.Entity Relations In') IS NULL
CREATE TYPE [utl].[Entity Relations In] AS TABLE (
[Entity A] UNIQUEIDENTIFIER NOT NULL,
[Entity B] UNIQUEIDENTIFIER NOT NULL,
PRIMARY KEY CLUSTERED ([Entity A], [Entity B]),
CHECK ([Entity A] != [Entity B])
);
-- create a table type for receiving relationships back
-- DROP TYPE [utl].[Entity Groups Out]
IF TYPE_ID(N'utl.Entity Groups Out') IS NULL
CREATE TYPE [utl].[Entity Groups Out] AS TABLE (
[Group] UNIQUEIDENTIFIER NOT NULL,
[Entity Member] UNIQUEIDENTIFIER NOT NULL PRIMARY KEY CLUSTERED
);
/*** CREATE TEST DATA ***/
SET NOCOUNT ON;
DECLARE @TestData AS [utl].[Entity Relations In];
INSERT INTO @TestData VALUES
-- forward relations A-B-C and one back D-C
('AAAAAAAA-0000-0000-0000-000000000000', 'BBBBBBBB-0000-0000-0000-000000000000'),
('BBBBBBBB-0000-0000-0000-000000000000', 'CCCCCCCC-0000-0000-0000-000000000000'),
('DDDDDDDD-0000-0000-0000-000000000000', 'CCCCCCCC-0000-0000-0000-000000000000'),
-- back relations F-E
('FFFFFFFF-0000-0000-0000-000000000000', 'EEEEEEEE-0000-0000-0000-000000000000'),
-- groups of relations 1-3, 2-4, 5-6, 5-2, 1-6 -- checks to ensure groups don't split / normalise together
('11111111-0000-0000-0000-000000000000', '33333333-0000-0000-0000-000000000000'),
('22222222-0000-0000-0000-000000000000', '44444444-0000-0000-0000-000000000000'),
('55555555-0000-0000-0000-000000000000', '66666666-0000-0000-0000-000000000000'),
('55555555-0000-0000-0000-000000000000', '22222222-0000-0000-0000-000000000000'),
('11111111-0000-0000-0000-000000000000', '66666666-0000-0000-0000-000000000000');
-- perform the grouping
DECLARE @Grouped AS [utl].[Entity Groups Out];
INSERT INTO
@Grouped
EXEC [utl].[Entity Grouping] @Relations=@TestData
-- display: In data
SELECT * FROM @TestData
-- display: Grouped data
SELECT * FROM @Grouped
This would create the output:


Leave a comment