Search for a String in all Tables of a Database

When I work in customer sites, there is a quite a difference between how different sites get data for testing. But it basically comes down to a variation of one of these five:

  • Testing? That’s for wimps. (Well perhaps for users)
  • I type in all the test data that I need.
  • We use a data generator to produce our test data.
  • We just periodically get a copy of production data.
  • We get production data and it gets masked before we can use it.

It’s the last option that I’m concerned with today. Masking data is challenging and usually much more difficult than imagined. This week, I was working with a masked database and we ended up finding all client details tucked inside a chunk of XML that was used to hold the metadata for a series of images. They had done a good job of removing it from all the normal columns from all tables in the database, but it’s so easy to miss extras like this.

So after we fixed the issue, I was left wondering how effectively it really had all been masked.

The following script takes a while to run on a large database. It takes a string to search for, and looks to find any locations in the database that hold that string. Optionally, it dumps out a copy of all rows containing the string. It does this for char, nchar, varchar, nvarchar, varchar(max), nvarchar(max), and xml data types.

I hope someone finds it useful.

-- Search for a string anywhere in a database

-- v1.0 Dr Greg Low, 11 June 2016

DECLARE @StringToSearchFor nvarchar(max) = N’Jones';

DECLARE @IncludeActualRows bit = 1;

DECLARE @SchemaName sysname;

DECLARE @TableName sysname;

DECLARE @ColumnName sysname;

DECLARE @IsNullable bit;

DECLARE @TableObjectID int;

DECLARE @Message nvarchar(max);

DECLARE @FullTableName nvarchar(max);

DECLARE @BaseDataTypeName sysname;

DECLARE @WereStringColumnsFound bit;

DECLARE @Predicate nvarchar(max);

DECLARE @SQL nvarchar(max);

DECLARE @SummarySQL nvarchar(max) = N’';

DECLARE @NumberOfTables int;

DECLARE @TableCounter int = 0;

IF OBJECT_ID(N’tempdb..#FoundLocations’) IS NOT NULL

BEGIN

       DROP TABLE #FoundLocations;

END;

CREATE TABLE #FoundLocations

(

    FullTableName nvarchar(max),

    NumberOfRows bigint

);

SET @NumberOfTables = (SELECT COUNT(*) FROM sys.tables AS t

                                       WHERE t.is_ms_shipped = 0

                                       AND t.type = N’U’);

DECLARE TableList CURSOR FAST_FORWARD READ_ONLY

FOR

SELECT SCHEMA_NAME(schema_id) AS SchemaName, name AS TableName, object_id AS TableObjectID

FROM sys.tables AS t

WHERE t.is_ms_shipped = 0

AND t.type = N’U'

ORDER BY SchemaName, TableName;

OPEN TableList;

FETCH NEXT FROM TableList INTO @SchemaName, @TableName, @TableObjectID;

WHILE @@FETCH_STATUS = 0

BEGIN

    SET @TableCounter += 1;

    SET @FullTableName = QUOTENAME(@SchemaName) + N’.’ + QUOTENAME(@TableName);

    SET @Message = N’Checking table '

                      + CAST(@TableCounter AS nvarchar(20))

                      + N’ of '

                      + CAST(@NumberOfTables AS nvarchar(20))

                      + N’: '

                      + @FullTableName;

    PRINT @Message;

    SET @WereStringColumnsFound = 0;

    SET @Predicate = N’';

    DECLARE ColumnList CURSOR FAST_FORWARD READ_ONLY

    FOR

    SELECT c.name AS ColumnName, t.name AS BaseDataTypeName

    FROM sys.columns AS c

    INNER JOIN sys.types AS t

    ON t.system_type_id = c.system_type_id

    AND t.user_type_id = c.system_type_id – note: want the base type not the actual type

    WHERE c.object_id = @TableObjectID

    AND t.name IN (N’text’, N’ntext’, N’varchar’, N’nvarchar’, N’char’, N’nchar’, N’xml')

       AND (c.max_length >= LEN(@StringToSearchFor) OR c.max_length < 0) – allow for max types

    ORDER BY ColumnName;

    OPEN ColumnList;

    FETCH NEXT FROM ColumnList INTO @ColumnName, @BaseDataTypeName;

    WHILE @@FETCH_STATUS = 0

    BEGIN

          SET @WereStringColumnsFound = 1;

          IF @Predicate <> N''

          BEGIN

          SET @Predicate += N’ OR ‘;

          END;

          SET @Predicate += CASE WHEN @BaseDataTypeName = N’xml’

                                 THEN N’CAST(’ + QUOTENAME(@ColumnName) + N’ AS nvarchar(max))'

                                 ELSE QUOTENAME(@ColumnName)

                            END

                          + N’ LIKE N’’%’ + @StringToSearchFor + N’%’’';

          FETCH NEXT FROM ColumnList INTO @ColumnName, @BaseDataTypeName;

    END;

    CLOSE ColumnList;

    DEALLOCATE ColumnList;

    IF @WereStringColumnsFound <> 0

          BEGIN

          SET @SQL = N’SET NOCOUNT ON;

                       INSERT #FoundLocations (FullTableName, NumberOfRows)

                       SELECT N’’’ + @FullTableName + N’’’, COUNT_BIG(*) FROM '

                   + @FullTableName

                        + N’ WHERE '

                        + @Predicate

                                            + N’;';

          EXECUTE (@SQL);

          IF (SELECT NumberOfRows FROM #FoundLocations WHERE FullTableName = @FullTableName) > 0

          BEGIN

              SET @SummarySQL += N’SELECT * FROM ’ + @FullTableName + N’ WHERE ’ + @Predicate + N’;’ + NCHAR(13) + NCHAR(10);

          END;

    END;

    FETCH NEXT FROM TableList INTO @SchemaName, @TableName, @TableObjectID;

END;

CLOSE TableList;

DEALLOCATE TableList;

SELECT *

FROM #FoundLocations

WHERE NumberOfRows > 0

ORDER BY FullTableName;

DROP TABLE #FoundLocations;

IF @SummarySQL <> N’’ AND @IncludeActualRows <> 0

BEGIN

    EXECUTE (@SummarySQL);

END;

2016-06-12