Saturday, 16 February 2013

How To Estimate Data Utilization


SQL Server collects stats about every time an index is used and how it is used (i.e. whether a user seeked or scanned the index, etc.). It also provides a DMV to view these stats:sys.dm_db_index_usage_stats.
This DMV provides a wealth of great information, but to answer our question of “What data is actually being used?”, we have to refine our criteria. Are we talking in terms of table counts or data size? I’d argue that data size is more important than table counts; one unqueried millow-row table is more wasteful than a hundred ten-row tables.




USE master;
GO
 
/* 
    This will give you an approximation of how much data is being utilized on a server.
    Since the data is only valid as of the last server reboot, we should start off with
    an idea of how much data we've accrued.  
*/
 
/* Find out when the server was last rebooted */
-- 2008
SELECT sqlserver_start_time FROM sys.dm_os_sys_info;
-- 2005
SELECT create_date FROM sys.databases WHERE name = 'tempdb';
 
 
/* Create a temporary table to hold our data, since we're going to iterate through databases */
IF OBJECT_ID('tempdb..#Results') IS NOT NULL
    DROP TABLE #Results;
 
CREATE TABLE #Results
(
      databaseName  NVARCHAR(128)
    , tableName     NVARCHAR(128)
    , indexID       INT
    , records       BIGINT
    , activity      BIGINT
    , totalPages    BIGINT
);
 
/*  
    sp_foreachdb was written by SQL MVP Aaron Bertrand and can be downloaded 
    at http://www.mssqltips.com/sqlservertip/2201/making-a-more-reliable-and-flexible-spmsforeachdb/
    Alternatively, you can also use sys.sp_MSforeachdb
*/
--EXECUTE master.dbo.sp_foreachdb
EXECUTE sys.sp_MSforeachdb
'   USE ?; 
 
    -- You can gleam a lot of information about historical data usage from partitions
    -- but for now, we will just roll up any partitions we may have
    WITH myCTE AS
    (
        SELECT p.[object_id] AS objectID
            , p.index_id
            , SUM(p.[rows]) AS records
            , SUM(au.total_pages) AS totalPages
        FROM sys.partitions AS p WITH (NOLOCK)
        JOIN sys.allocation_units AS au WITH (NOLOCK)
            ON p.hobt_id = au.container_id
        GROUP BY p.[object_id] 
            , p.index_id
    )
 
    -- Grab all tables and join to our usage stats DMV
    INSERT INTO #Results
    SELECT DB_NAME() AS databaseName
        , t.name
        , x.index_id
        , MAX(x.records) AS records
        , ISNULL(SUM(us.user_lookups + us.user_scans + us.user_seeks), 0) AS activity
        , SUM(x.totalPages) AS totalPages
    FROM sys.tables AS t WITH (NOLOCK)
    JOIN myCTE AS x
        ON t.[object_id] = x.objectID
    LEFT JOIN sys.dm_db_index_usage_stats AS us WITH (NOLOCK)
        ON us.[object_id] = x.objectID
        AND us.index_id = x.index_id
        AND us.database_id = DB_ID()
    GROUP BY t.name
    , x.index_id;'
 
/* Because we're looping through databases, make sure we're not missing any major ones */
SELECT * FROM sys.databases WHERE name NOT IN (SELECT databaseName FROM #Results);
 
/* Retrieve actual % data utilization, which is performed at the index level */
SELECT databaseName
    , SUM(queriedPages) AS TotalQueriedPages
    , SUM(totalPages) AS TotalPages
    , CAST(SUM(queriedPages) AS FLOAT) / REPLACE(SUM(totalPages), 0, 1) AS '%DataUtil'
FROM (
    SELECT databaseName
        , tableName
        , indexID
        , CASE -- If we have any activity at all on an index, count it as activity
            WHEN activity = 0 THEN 0.0
            ELSE totalPages
          END AS queriedPages
        , totalPages
    FROM #Results
    WHERE databaseName NOT IN ('master', 'tempdb', 'msdb', 'model')
) x
GROUP BY databaseName
ORDER BY databaseName;
 
/* Retrieve % content utilization, which is performed at the table level */
SELECT databaseName
    , SUM(queriedPages) AS TotalQueriedPages
    , SUM(totalPages) AS TotalPages
    , CAST(SUM(queriedPages) AS FLOAT) / REPLACE(SUM(totalPages), 0, 1) AS '%ContentUtil'
FROM (
    SELECT databaseName
        , tableName
        , MAX(records) AS records
        , CASE WHEN SUM(activity) > 0 THEN SUM(totalPages) ELSE 0 END AS queriedPages
        , SUM(totalPages) AS totalPages
    FROM #Results
    WHERE databaseName NOT IN ('master', 'tempdb', 'msdb', 'model')
    GROUP BY databaseName
        , tableName
) x
GROUP BY databaseName
ORDER BY databaseName;




The first result set examines the utilization of indexes, and the second result set examines the utilization of data at the content (table) level. For example, if we look at Database6, we’ll see that we are only utilizing 77% of our indexes, but we’re looking at 99% of our table data. So this is a good indicator that we have some unused indexes to clean up in that database.

No comments:

Post a Comment