Sunday, September 14, 2014

LEFT JOIN GROUP BY HAVING COUNT on Entity Framework

Objective, create a Linq query that shows all persons with pay history count of zero or three on entities


This is how our Linq's query should be like in SQL:

select p.BusinessEntityID, p.FirstName, TheCount = count(ph.BusinessEntityId)
from Person.Person p
left join HumanResources.EmployeePayHistory ph on p.BusinessEntityID = ph.BusinessEntityID
group by p.BusinessEntityID, p.FirstName
having count(ph.BusinessEntityId) = 0 or count(ph.BusinessEntityId) = 3
order by TheCount



The two domain entities however doesn't belong in same aggregate..

namespace Domain
{
    public static class ThePerson
    {
        public class Person
        {
            public int    BusinessEntityId { get; set; }

            public string Title            { get; set; }
            public string FirstName        { get; set; }
            public string MiddleName       { get; set; }
            public string LastName         { get; set; }

            // public virtual ICollection<TheHumanResources.EmployeePayHistory> PayHistories { get; set; }
        }
    }
}


namespace Domain
{
    public static class TheHumanResources
    {
        public class EmployeePayHistory
        {
            public int                       BusinessEntityId { get; set; }
            public DateTime                  RateChangeDate   { get; set; }

            public virtual ThePerson.Person  Person           { get; set; }
            public decimal                   Rate             { get; set; } 
        }
    }
}


namespace DomainMapping
{
    public class TheDbContext : DbContext
    {
        protected override void OnModelCreating(DbModelBuilder modelBuilder)
        {
            base.OnModelCreating(modelBuilder);

            modelBuilder.Entity<ThePerson.Person>().ToTable("Person.Person")
                .HasKey(x => x.BusinessEntityId);
                // .HasMany(x => x.PayHistories).WithRequired().HasForeignKey(x => x.BusinessEntityId);

                
               


            modelBuilder.Entity<TheHumanResources.EmployeePayHistory>()
                .ToTable("HumanResources.EmployeePayHistory")
                .HasKey(x => new { x.BusinessEntityId, x.RateChangeDate } )
                .HasRequired(x => x.Person)
                .WithRequiredDependent()
                .Map(m => m.MapKey("BusinessEntityID"));

        }
    }
}


..hence we can't make a simpler query like this:
var query =
    from person in ctx.Set<ThePerson.Person>().SelectMany(x => x.PayHistories.DefaultIfEmpty(), (person, payHistory) => new { person, payHistory })
    
    group person by new { person.person.BusinessEntityId, person.person.FirstName } into persons
    
    let TheCount = persons.Sum(x => x.payHistory != null ? 1 : 0)
    
    where TheCount == 0 || TheCount == 3
    
    orderby TheCount
    
    select new { persons.Key.BusinessEntityId, persons.Key.FirstName, TheCount };


Output:
SELECT
    [GroupBy1].[K1] AS [BusinessEntityId],
    [GroupBy1].[K2] AS [FirstName],
    [GroupBy1].[A1] AS [C1]
    FROM ( SELECT
        [Join1].[K1] AS [K1],
        [Join1].[K2] AS [K2],
        SUM([Join1].[A1]) AS [A1]
        FROM ( SELECT
            [Extent1].[BusinessEntityId] AS [K1],
            [Extent1].[FirstName] AS [K2],
            CASE WHEN ( NOT (([Extent2].[BusinessEntityId] IS NULL) AND ([Extent2].[RateChangeDate] IS NULL))) THEN 1 ELSE 0 END AS [A1]
            FROM  [Person].[Person] AS [Extent1]
            LEFT OUTER JOIN [HumanResources].[EmployeePayHistory] AS [Extent2] ON [Extent1].[BusinessEntityId] = [Extent2].[Person_BusinessEntityId1]
        )  AS [Join1]
        GROUP BY [K1], [K2]
    )  AS [GroupBy1]
    WHERE [GroupBy1].[A1] IN (0,3)
    ORDER BY [GroupBy1].[A1] ASC




Following is easy to read Linq with left join group by and having, but it's not efficient, this creates a subquery in SELECT:

var query = from person in ctx.Set<ThePerson.Person>()

            join payHistory in ctx.Set<TheHumanResources.EmployeePayHistory>() on person.BusinessEntityId equals payHistory.BusinessEntityId into payHistories
            from payHistoryNullable in payHistories.DefaultIfEmpty()

            let TheCount = payHistories.Count()

            where TheCount == 0 || TheCount == 3

            orderby TheCount ascending

            select new { person.BusinessEntityId, person.FirstName, TheCount };


Output:
SELECT
    [Project1].[BusinessEntityId] AS [BusinessEntityId],
    [Project1].[FirstName] AS [FirstName],
    [Project1].[C1] AS [C1]
    FROM ( SELECT
        [Extent1].[BusinessEntityId] AS [BusinessEntityId],
        [Extent1].[FirstName] AS [FirstName],
        -- subquery        
        (SELECT
            COUNT(1) AS [A1]
            FROM [HumanResources].[EmployeePayHistory] AS [Extent3]
            WHERE [Extent1].[BusinessEntityId] = [Extent3].[BusinessEntityId]) AS [C1]
        FROM  [Person].[Person] AS [Extent1]
        LEFT OUTER JOIN [HumanResources].[EmployeePayHistory] AS [Extent2] ON [Extent1].[BusinessEntityId] = [Extent2].[BusinessEntityId]
    )  AS [Project1]
    WHERE [Project1].[C1] IN (0,3)
    ORDER BY [Project1].[C1] ASC



Not as readable as the Linq query above, but this is better as this uses GROUP BY:

var query = from person in ctx.Set<ThePerson.Person>()

            join payHistory in ctx.Set<TheHumanResources.EmployeePayHistory>() on person.BusinessEntityId equals payHistory.BusinessEntityId into payHistories
            from payHistoryNullable in payHistories.DefaultIfEmpty()

            group payHistoryNullable by new { person.BusinessEntityId, person.FirstName } into payHistoriesNullable

            let TheCount = payHistoriesNullable.Sum(eachPayHistoryNullable => eachPayHistoryNullable != null ? 1 : 0)

            where TheCount == 0 || TheCount == 3

            orderby TheCount ascending
            select new { payHistoriesNullable.Key.BusinessEntityId, payHistoriesNullable.Key.FirstName, TheCount };


Output:

SELECT
    [GroupBy1].[K1] AS [BusinessEntityId],
    [GroupBy1].[K2] AS [FirstName],
    [GroupBy1].[A1] AS [C1]
    FROM ( SELECT
        [Join1].[K1] AS [K1],
        [Join1].[K2] AS [K2],
        SUM([Join1].[A1]) AS [A1]
        FROM ( SELECT
            [Extent1].[BusinessEntityId] AS [K1],
            [Extent1].[FirstName] AS [K2],
            CASE WHEN ( NOT (([Extent2].[BusinessEntityId] IS NULL) AND ([Extent2].[RateChangeDate] IS NULL))) THEN 1 ELSE 0 END AS [A1]
            FROM  [Person].[Person] AS [Extent1]
            LEFT OUTER JOIN [HumanResources].[EmployeePayHistory] AS [Extent2] ON [Extent1].[BusinessEntityId] = [Extent2].[BusinessEntityId]
        )  AS [Join1]
        -- efficient, uses GROUP BY 
        GROUP BY [K1], [K2] 
    )  AS [GroupBy1]
    WHERE [GroupBy1].[A1] IN (0,3)
    ORDER BY [GroupBy1].[A1] ASC            



Happy Coding!

No comments:

Post a Comment