Split one large, denormalized table into a normalized database

I have a large (5 million rows, 300+ columns) csv file that I need to import into a staging table on SQL Server, then run a script to split each row up and paste the data into the corresponding tables in a normalized db. The format of the source table looks something like this:

(fName, lName, licenseNumber1, licenseIssuer1, licenseNumber2, licenseIssuer2..., specialtyName1, specialtyState1, specialtyName2, specialtyState2..., identifier1, identifier2...)

There are 50 columns licenseNumber/licenseIssuer, columns 15 specialtyName/specialtyStateand 15 identifier. There is always at least one of them, but the remaining 49 or 14 may be null. The first identifier is unique, but not used as the primary key of the Person in our scheme.

My database schema is as follows

People(ID int Identity(1,1))
Names(ID int, personID int, lName varchar, fName varchar)
Licenses(ID int, personID int, number varchar, issuer varchar)
Specialties(ID int, personID int, name varchar, state varchar)
Identifiers(ID int, personID int, value)

The database will already be filled with some Peoplebefore adding new ones from csv.

?

select top 1:

WHILE EXISTS (Select top 1 * from staging)
BEGIN
    INSERT INTO People Default Values
    SET @LastInsertedID = SCOPE_IDENTITY() -- might use the output clause to get this instead

    INSERT INTO Names (personID, lName, fName) 
    SELECT top 1 @LastInsertedID, lName, fName from staging

    INSERT INTO Licenses(personID, number, issuer)
    SELECT top 1 @LastInsertedID, licenseNumber1, licenseIssuer1 from staging

    IF (select top 1 licenseNumber2 from staging) is not null
    BEGIN
        INSERT INTO Licenses(personID, number, issuer)
        SELECT top 1 @LastInsertedID, licenseNumber2, licenseIssuer2 from staging
    END

    -- Repeat the above 49 times, etc...

    DELETE top 1 from staging
END

, , . , 300 Fetch INTO.

, ? , , , , INSERT People , - .

, ""? :

FETCH INTO ...@LicenseNumber1, @LicenseIssuer1, @LicenseNumber2, @LicenseIssuer2...
INSERT INTO #LicenseTemp (number, issuer) Values
(@LicenseNumber1, @LicenseIssuer1),
(@LicenseNumber2, @LicenseIssuer2),
... Repeat 48 more times...
.
.
.
INSERT INTO Licenses(personID, number, issuer)
SELECT @LastInsertedID, number, issuer
FROM #LicenseTEMP
WHERE number is not null

, - .

, :

  • , /
  • , / ( ).

, , .

+4
2

, SQL Server, "", .

personId @Shnugo , personId

personID SQL Server 2012 . , . (-) personId, , personID

, 1 n ( , n 50.. )

: , , , .

:

:

 INSERT INTO People (personID) 
 SELECT personId from staging;

:

 INSERT INTO Names (personID, lName, fName) 
 SELECT personId, lName, fName from staging;

: Number

 INSERT INTO Licenses (personId, number, issuer)
 SELECT * FROM (
    SELECT personId, 
           case nbrs.n 
                when 1 then licenseNumber1 
                when 2 then licenseNumber2
                ...
                when 50 then licenseNumber50
            end as licenseNumber,    
           case nbrs.n 
                when 1 then licenseIssuer1 
                when 2 then licenseIssuer2
                ...
                when 50 then licenseIssuer50
            end as licenseIssuer
      from staging 
           cross join 
           (select n from numbers where n>=1 and n<=50) nbrs
  ) WHERE licenseNumber is not null;

:

 INSERT INTO Specialties(personId, name, state)
 SELECT * FROM (
    SELECT personId, 
           case nbrs.n 
                when 1 then specialtyName1
                when 2 then specialtyName2
                ...
                when 15 then specialtyName15
            end as specialtyName,    
           case nbrs.n 
                when 1 then specialtyState1
                when 2 then specialtyState2
                ...
                when 15 then specialtyState15
            end as specialtyState
      from staging 
           cross join 
           (select n from numbers where n>=1 and n<=15) nbrs
 ) WHERE specialtyName is not null;

:

 INSERT INTO Identifiers(personId, value)
 SELECT * FROM (
    SELECT personId, 
           case nbrs.n 
                when 1 then identifier1
                when 2 then identifier2
                ...
                when 15 then identifier15
            end as value
      from staging 
           cross join 
           (select n from numbers where n>=1 and n<=15) nbrs
 ) WHERE value is not null;

, .

+3

:

  • PersonID INT NOT NULL DENSE_RANK() OVER(ORDER BY fname,lname)

  • PersonID

  • GROUP BY, People

:

SELECT AllTogether.PersonID, AllTogether.TheValue
FROM
(
           SELECT PersonID,SomeValue1 AS TheValue FROM StagingTable
 UNION ALL SELECT PersonID,SomeValue2             FROM StagingTable
 UNION ALL ... 
) AS AllTogether
WHERE AllTogether.TheValue IS NOT NULL

UPDATE

: , People

People...

- , ?

UPDATE StagingTable SET PersonID=xyz WHERE ...

, -

UPDATE StagingTable 
SET PersonID=DENSE RANK() OVER(...) + MaxExistingID
WHERE PersonID IS NULL

PersonID, NULL.

+1

Source: https://habr.com/ru/post/1660005/


All Articles