Improved clustered columnstore index rebuild–potential segment fullness

Earlier, I posted a maintenance script that could be used with clustered columnstore indexes to try to determine when they should be rebuilt. Reorganize is not a very useful operation for these indexes as it basically just forces a close of the existing delta store, ready for compression.

One of the problems with a clustered columnstore index is that it doesn’t work as well if the segments have been created at less than the maximum potential size of 1048576 rows per segment. Over use of reorganize options could cause this, but more likely it will be to do with loading the index with small batches of data, rather than loading large batches in BULK INSERT operations.

I’ve updated the maintenance script so that you can now also choose to rebuild if your segments are much less full than their optimal size ie: you could have 1,048,560,000 rows of data in 10,000 segments. So if you say 90 percent as the cutoff and your segments hold less than 90 percent of their potential, that will also trigger a rebuild.

Hope this helps someone:

-———————————————————————

-- Rebuild clustered columnstore indexes when necessary

-- Dr Greg Low v2.0

-———————————————————————

-- rebuild when more than supplied % of rows have been deleted

-- rebuild when any segments contain more than supplied % deleted rows

-- rebuild if more than supplied number of segments are empty

-- rebuild when segments are on average less than the supplied % of

--     maximum capacity

DECLARE @DeletedTotalRowPercentage int = 10;   

DECLARE @DeletedSegmentsRowPercentage int = 20;

DECLARE @EmptySegmentsAllowed int = 0;

DECLARE @SegmentPotentialFullnessPercentage int = 90;

-———————————————————————

DECLARE @IndexesToRebuild TABLE (SchemaName sysname,

                                 TableName sysname,

                                 IndexName sysname);

WITH ClusteredColumnstoreIndexes

AS

( SELECT t.object_id AS ObjectID,

         SCHEMA_NAME(t.schema_id) AS SchemaName,

         t.name AS TableName,

         i.name AS IndexName

  FROM sys.indexes AS i

  INNER JOIN sys.tables AS t

  ON i.object_id = t.object_id

  WHERE i.type = 5

),

RowGroups

AS

( SELECT csrg.object_id AS ObjectID,

         csrg.total_rows AS TotalRows,

         csrg.deleted_rows AS DeletedRows,

         csrg.deleted_rows * 100.0 / csrg.total_rows AS DeletedPercentage,

         CASE WHEN csrg.total_rows = csrg.deleted_rows

              THEN 1 ELSE 0

         END AS IsEmptySegment

  FROM sys.column_store_row_groups AS csrg

),

IndexStats

AS

( SELECT cci.ObjectID,

         cci.SchemaName,

         cci.TableName,

         cci.IndexName,

         SUM(CAST(rg.TotalRows AS decimal(18,0))) AS TotalRows,

         SUM(CAST(rg.DeletedRows AS decimal(18,0))) AS DeletedRows,

         SUM(CAST(rg.DeletedRows AS decimal(18,0))) * 100.0

           / SUM(CAST(rg.TotalRows AS decimal(18,0)))
           AS DeletedPercentage,

         SUM(rg.IsEmptySegment) aS EmptySegments,

         COUNT(rg.TotalRows) AS TotalSegments

  FROM ClusteredColumnstoreIndexes AS cci

  INNER JOIN RowGroups AS rg

  ON cci.ObjectID = rg.ObjectID

  GROUP BY cci.ObjectID, cci.SchemaName, cci.TableName, cci.IndexName

)

INSERT @IndexesToRebuild (SchemaName, TableName, IndexName)

SELECT s.SchemaName, s.TableName, s.IndexName

FROM IndexStats AS s

WHERE s.DeletedPercentage > @DeletedTotalRowPercentage

OR s.EmptySegments > @EmptySegmentsAllowed

OR EXISTS(SELECT 1 FROM RowGroups AS rg

                   WHERE rg.ObjectID = s.ObjectID

                   AND rg.DeletedPercentage > @DeletedSegmentsRowPercentage)

OR (s.TotalRows * 100 / (s.TotalSegments * 1048576.0))
            < @SegmentPotentialFullnessPercentage;

DECLARE @SchemaName sysname;

DECLARE @TableName sysname;

DECLARE @IndexName sysname;

DECLARE @SqlCommand nvarchar(max);

DECLARE IndexList CURSOR FAST_FORWARD READ_ONLY

FOR

  SELECT SchemaName, TableName, IndexName

  FROM @IndexesToRebuild

  ORDER BY SchemaName, TableName, IndexName;

OPEN IndexList;

FETCH NEXT FROM IndexList INTO @SchemaName, @TableName, @IndexName;

WHILE @@FETCH_STATUS = 0

BEGIN

  SET @SqlCommand = N’ALTER INDEX ’ + QUOTENAME(@IndexName)

                  + N’ ON ’ + QUOTENAME(@SchemaName)

                  + N’.’ + QUOTENAME(@TableName)

                  + N’ REBUILD PARTITION = ALL;';

  PRINT @SqlCommand;

  EXEC(@SqlCommand); 

  FETCH NEXT FROM IndexList INTO @SchemaName, @TableName, @IndexName;

END;

CLOSE IndexList;

DEALLOCATE IndexList;

2015-01-09