What is the best way to manipulate huge amounts of data in SQL Server?

We need to perform the following operation in our database:

There is table A, which has a column B_ID, which is a foreign key to table B. Table A has many rows that have the same B_ID value, and we want to fix this by cloning the corresponding rows into B and redirecting the rows from A to them.

All this is relatively simple, and we have already created a script that solves this by iterating over the cursor and calling the stored procedure to clone the row in table B. Now the problem is that tables A and B are huge and there are also a huge number of groups in table A, pointing to the same line in B.

As a result, we get (after a couple of minutes of execution) a transaction log and a failure. We even tried to divide the work into batches of a reasonable size and run them one by one, but this also eventually fills the magazine.

Besides somehow clearing the log, is there a way to handle massive data inserts / updates in SQL Server that will be faster and not explode the log at all?

+3
source share
6 answers

( ). @KM , , / ; , .

script :

CREATE TABLE Colors
(
    ColorID int NOT NULL IDENTITY(1, 1) PRIMARY KEY,
    ColorName varchar(50) NOT NULL
)

CREATE TABLE Markers
(
    MarkerID int NOT NULL IDENTITY(1, 1) PRIMARY KEY,
    MarkerName varchar(50) NOT NULL,
    ColorID int NOT NULL,
    CONSTRAINT FK_Markers_Colors FOREIGN KEY (ColorID)
        REFERENCES Colors (ColorID)
)

INSERT Colors (ColorName) VALUES ('Red')
INSERT Colors (ColorName) VALUES ('Green')
INSERT Colors (ColorName) VALUES ('Blue')

INSERT Markers (MarkerName, ColorID) VALUES ('Test1', 1)
INSERT Markers (MarkerName, ColorID) VALUES ('Test2', 1)
INSERT Markers (MarkerName, ColorID) VALUES ('Test3', 1)
INSERT Markers (MarkerName, ColorID) VALUES ('Test4', 2)
INSERT Markers (MarkerName, ColorID) VALUES ('Test5', 2)
INSERT Markers (MarkerName, ColorID) VALUES ('Test6', 3)
INSERT Markers (MarkerName, ColorID) VALUES ('Test7', 3)

, 1: , 1:1. ( , ):

CREATE TABLE #NewColors
(
    MarkerID int NOT NULL,
    ColorName varchar(50) NOT NULL,
    Seq int NOT NULL,
    CONSTRAINT PK_#NewColors PRIMARY KEY (MarkerID)
)

CREATE INDEX IX_#NewColors
ON #NewColors (ColorName, Seq);

WITH Refs AS
(
    SELECT
        MarkerID,
        ColorID,
    ROW_NUMBER() OVER (PARTITION BY ColorID ORDER BY (SELECT 1)) AS Seq
    FROM Markers
)
INSERT #NewColors (MarkerID, ColorName, Seq)
SELECT r.MarkerID, c.ColorName, r.Seq - 1
FROM Refs r
INNER JOIN Colors c
    ON c.ColorID = r.ColorID
WHERE r.Seq > 1

, . :

DECLARE @InsertedColors TABLE
(
    ColorID int NOT NULL PRIMARY KEY,
    ColorName varchar(50) NOT NULL
)

INSERT Colors (ColorName)
OUTPUT inserted.ColorID, inserted.ColorName
INTO @InsertedColors
    SELECT ColorName
    FROM #NewColors nc;

, , (, temp ):

WITH InsertedColorSeq AS
(
    SELECT
        ColorID, ColorName,
        ROW_NUMBER() OVER (PARTITION BY ColorName ORDER BY ColorID) AS Seq
    FROM @InsertedColors
),
Updates AS
(
    SELECT nc.MarkerID, ic.ColorID AS NewColorID
    FROM #NewColors nc
    INNER JOIN InsertedColorSeq ic
    ON ic.ColorName = nc.ColorName
    AND ic.Seq = nc.Seq
)
MERGE Markers m
USING Updates u
    ON m.MarkerID = u.MarkerID
WHEN MATCHED THEN
    UPDATE SET m.ColorID = u.NewColorID;

DROP TABLE #NewColors

, - . .

:

SELECT m.MarkerID, m.MarkerName, c.ColorID, c.ColorName
FROM Markers m
INNER JOIN Colors c
    ON c.ColorID = m.ColorID

:

MarkerID     MarkerName   ColorID   ColorName
1            Test1        1         Red
2            Test2        6         Red
3            Test3        7         Red
4            Test4        2         Green
5            Test5        5         Green
6            Test6        3         Blue
7            Test7        4         Blue

, , ? , . tempdb, temp table/table . , , .

+2

, , , .

, , .., .. .

. , .

+2

, . ? , ?

, , , , ? , , . , , , , . , , , . , .

SSIS, , . , .

+2

, , :

DECLARE @TableA table (RowID int, B_ID int)
INSERT INTO @TableA VALUES (1,1)
INSERT INTO @TableA VALUES (2,1) --need to copy
INSERT INTO @TableA VALUES (3,2)
INSERT INTO @TableA VALUES (4,2) --need to copy
INSERT INTO @TableA VALUES (5,2) --need to copy
INSERT INTO @TableA VALUES (6,1) --need to copy
INSERT INTO @TableA VALUES (7,3)
INSERT INTO @TableA VALUES (8,3) --need to copy
DECLARE @TableB table (B_ID int, BValues varchar(10))
INSERT INTO @TableB VALUES (1,'one')
INSERT INTO @TableB VALUES (2,'two')
INSERT INTO @TableB VALUES (3,'three')

DECLARE @Max_B_ID int
SELECT @Max_B_ID=MAX(B_ID) FROM @TableB

--if you are using IDENTITY, turn them off here
INSERT INTO @TableB 
        (B_ID, BValues)
        --possibly capture the data to eliminate duplication??
        --OUTPUT INSERTED.tableID, INSERTED.datavalue
        --INTO @y 
    SELECT
        dt.NewRowID, dt.BValues
        FROM (SELECT 
                  RowID, a.B_ID
                      ,@Max_B_ID+ROW_NUMBER() OVER(order by a.B_ID) AS NewRowID,b.BValues
                  FROM (SELECT
                            RowID, B_ID
                            FROM (SELECT 
                                      RowID, a.B_ID, ROW_NUMBER() OVER(PARTITION by a.B_ID order by a.B_ID) AS RowNumber
                                      FROM @TableA a
                                 ) dt
                            WHERE dt.RowNumber>1
                       )a
                      INNER JOIN @TableB  b ON a.B_ID=b.B_ID
             ) dt


UPDATE aa
    SET B_ID=NewRowID
    FROM @TableA   aa
        INNER JOIN (SELECT
                        dt.NewRowID, dt.BValues,dt.RowID
                        FROM (SELECT 
                                  RowID, a.B_ID
                                      ,@Max_B_ID+ROW_NUMBER() OVER(order by a.B_ID) AS NewRowID,b.BValues
                                  FROM (SELECT
                                            RowID, B_ID
                                            FROM (SELECT 
                                                      RowID, a.B_ID, ROW_NUMBER() OVER(PARTITION by a.B_ID order by a.B_ID) AS RowNumber
                                                      FROM @TableA a
                                                 ) dt
                                            WHERE dt.RowNumber>1
                                       )a
                                      INNER JOIN @TableB  b ON a.B_ID=b.B_ID
                             ) dt
                   ) dt2 ON aa.RowID=dt2.RowID

SELECT * FROM @TableA
SELECT * FROM @TableB

OUTPUT:

RowID       B_ID
----------- -------
1           1
2           4
3           2
4           6
5           7
6           5
7           3
8           8

(8 row(s) affected)

B_ID        BValues
----------- -------
1           one
2           two
3           three
4           one
5           one
6           two
7           two
8           three

(8 row(s) affected)
+2

( A B) ( A B), , , A, B .

B, . - :

update tableA SET
  col1 = B.col1,
  col2 = B.col2
from tableA A
inner join tableB on (B.ID = A.B_ID)
0

, :

, (A, B) , (C) ExtractData.sql:

select
    A.id,
    A.xxx,
    A.yyy,
    B.*
from
   A

   JOIN B
     on B.id = A.id

cmd , :

sqlcmd.exe -S [Server] -U [user] -P [pass] -d [dbname] -i DataExtract.sql -s "|" -h -1 -W -o ExtractData.dat

, :

ALTER DATABASE [database name] SET RECOVERY SIMPLE

a TRUNCATE TABLE C ( - , ).

cmd C:

bcp.exe dbname.dbo.C in ExtractData.dat -S [Server] -U [user] -P [pass] -t "|" -e ExtractData.err -r \n -c

ExtractData.err, , C, // , .

FULL :

ALTER DATABASE [database name] SET RECOVERY FULL
0

Source: https://habr.com/ru/post/1735617/


All Articles