Wednesday, 30 October 2013

Issue with BLOB rows



Issue with BLOB rows-
Method 1 Set the SubscriptionStreams parameter of the Distribution Agent to a value of 1. Method 2 You can set the OleDbStreamThreshold parameter of the Distribution Agent to a value that is greater than the largest data size for binary large object (BLOB) columns that have to be replicated. Then, the Distribution Agent will not bind binary large object (BLOB) data as a stream. Starting from SQL Server 2008, binary large object (BLOB) data is bound as a stream when the data size of the binary large object (BLOB) data is less than the value of the OleDbStreamThreshold parameter. This behavior is true by default.

USE TestDB;
GO
EXEC sp_configure 'show advanced options', 1 ;
RECONFIGURE ;
GO
EXEC sp_configure 'max text repl size';
GO

-- OUTPUT:
-- name                    minimum  maximum     config_value run_value
-- ----------------------- -------- ----------- ------------ -----------
-- max text repl size (B)  -1       2147483647  65536        65536
Output clearly shows maximum value of 65536 bytes. Now, Execute the below set of statements to resolve the issue.
-- For SQL Server 2005
USE TestDB;
GO
EXEC sp_configure 'show advanced options', 1 ;
RECONFIGURE ;
GO
EXEC sp_configure 'max text repl size', 2147483647 ; 
GO
RECONFIGURE;
GO

-- For SQL Server 2008 (and 2008 R2)
USE TestDB;
GO
EXEC sp_configure 'show advanced options', 1 ;
RECONFIGURE ;
GO
EXEC sp_configure 'max text repl size', -1 ;
GO
RECONFIGURE;
GO

Tuesday, 15 October 2013

SQL SERVER REPLICATION

Setting up transactional replication using T-SQL

Step 1: Set up a shared folder for snapshots.

Step 2: Configure the distributor and publisher:

use master
exec sp_adddistributor @distributor = N'SSLMATTB2'
 , @password = N''
GO
exec sp_adddistributiondb @database = N'distribution'
 , @data_folder = N'C:\MSSQL\SQLData'
 , @log_folder = N'C:\MSSQL\SQLLogs'
 , @log_file_size = 2
 , @min_distretention = 0
 , @max_distretention = 72
 , @history_retention = 48
 , @security_mode = 1
GO

use [distribution] 
if (not exists (
  select * 
  from sysobjects 
  where name = 'UIProperties' and type = 'U ')) 
 create table UIProperties(id int)

if (exists (
  select * 
  from ::fn_listextendedproperty('SnapshotFolder'
   , 'user'
   , 'dbo'
   , 'table'
   , 'UIProperties'
   , null, null))) 
 EXEC sp_updateextendedproperty N'SnapshotFolder'
  , N'C:\MSSQL\SQL_Share'
  , 'user'
  , dbo
  , 'table'
  , 'UIProperties' 
else 
 EXEC sp_addextendedproperty N'SnapshotFolder'
  , N'C:\MSSQL\SQL_Share'
  , 'user'
  , dbo
  , 'table'
  , 'UIProperties'
GO

exec sp_adddistpublisher @publisher = N'sslmattb2'
 , @distribution_db = N'distribution'
 , @security_mode = 1
 , @working_directory = N'C:\MSSQL\SQL_Share'
 , @trusted = N'false'
 , @thirdparty_flag = 0
 , @publisher_type = N'MSSQLSERVER'
GO

Step 3: Configure a database for replication, create a publication,  and add an article:

use [AdventureWorks2008]
exec sp_replicationdboption @dbname = N'AdventureWorks2008'
 , @optname = N'publish'
 , @value = N'true'
GO

use [AdventureWorks2008]
exec sp_addpublication @publication = N'AW_products'
 , @sync_method = N'concurrent'
 , @retention = 0
 , @allow_push = N'true'
 , @allow_pull = N'true'
 , @allow_anonymous = N'false'
 , @enabled_for_internet = N'false'
 , @snapshot_in_defaultfolder = N'true'
 , @compress_snapshot = N'false'
 , @ftp_port = 21
 , @allow_subscription_copy = N'false'
 , @add_to_active_directory = N'false'
 , @repl_freq = N'continuous'
 , @status = N'active'
 , @independent_agent = N'true'
 , @immediate_sync = N'false'
 , @allow_sync_tran = N'false'
 , @allow_queued_tran = N'false'
 , @allow_dts = N'false'
 , @replicate_ddl = 1
 , @allow_initialize_from_backup = N'false'
 , @enabled_for_p2p = N'false'
 , @enabled_for_het_sub = N'false'
GO

exec sp_addpublication_snapshot @publication = N'AW_products'
 , @frequency_type = 1
 , @frequency_interval = 1
 , @frequency_relative_interval = 1
 , @frequency_recurrence_factor = 0
 , @frequency_subday = 8
 , @frequency_subday_interval = 1
 , @active_start_time_of_day = 0
 , @active_end_time_of_day = 235959
 , @active_start_date = 0
 , @active_end_date = 0
 , @job_login = null
 , @job_password = null
 , @publisher_security_mode = 1

use [AdventureWorks2008]
exec sp_addarticle @publication = N'AW_products'
 , @article = N'Product'
 , @source_owner = N'Production'
 , @source_object = N'Product'
 , @type = N'logbased'
 , @description = null
 , @creation_script = null
 , @pre_creation_cmd = N'drop'
 , @schema_option = 0x000000000803509F
 , @identityrangemanagementoption = N'manual'
 , @destination_table = N'Product'
 , @destination_owner = N'Production'
 , @vertical_partition = N'false'
 , @ins_cmd = N'CALL sp_MSins_ProductionProduct'
 , @del_cmd = N'CALL sp_MSdel_ProductionProduct'
 , @upd_cmd = N'SCALL sp_MSupd_ProductionProduct'
GO

Step 4: Backup the database on the publisher and restore to the subscription instance.

Step 5: Configure a subscription (because I am creating a push subscription this script should be run on the publisher).


use [AdventureWorks2008]
exec sp_addsubscription @publication = N'AW_pub'
 , @subscriber = N'sslmattb2\INST2'
 , @destination_db = N'AW_products'
 , @subscription_type = N'Push'
 , @sync_type = N'automatic'
 , @article = N'all'
 , @update_mode = N'read only'
 , @subscriber_type = 0
exec sp_addpushsubscription_agent @publication = N'AW_pub'
 , @subscriber = N'sslmattb2\INST2'
 , @subscriber_db = N'AW_products'
 , @job_login = N'NT AUTHORITY\SYSTEM'
 , @job_password = null
 , @subscriber_security_mode = 1
 , @frequency_type = 64
 , @frequency_interval = 0
 , @frequency_relative_interval = 0
 , @frequency_recurrence_factor = 0
 , @frequency_subday = 0
 , @frequency_subday_interval = 0
 , @active_start_time_of_day = 0
 , @active_end_time_of_day = 235959
 , @active_start_date = 20120514
 , @active_end_date = 99991231
 , @enabled_for_syncmgr = N'False'
 , @dts_package_location = N'Distributor'
GO
Transactional Replication is what we used to call "tightly coupled" replication, or 
realtime replication. Essentially, transactional replication uses the Microsoft 
Distributed Transaction Coordinator, and it requires all of the servers involved in 
replication to complete their transactions before they are finalized. In other words, 
if any of the servers that are involved in replication are not available, no transactions 
can complete. The upside is that there can never be any conflicts because all of the 
servers involved in the replication are always consistent with each other.


Merge replication is a little bit looser than that. If a server isn't available, the updates 
to that server are queued up, and when the server becomes available it will receive all 
of its updates. Unfortunately, if two users update a record at the same time, merge 
replication will result in conflicts which will need to be manually resolved.


Either one of these types of replication is valid when there are multiple servers which may update 
a database. The differences are in overhead and reliability. Transactional replication 
will have a higher overhead in terms of CPU utilization, network traffic, disk time, 
and record locks. Merge replication has more administrative overhead, because someone 
has to handle conflict resolution.


So, when do you use Transactional Replication? Well, if you can deal with the overhead 
(minimal if you are careful) and if you can guarantee 
that all of the servers will always be able to talk to each other, then you can use 
Transactional Replication. Transactional replication shouldn't be used for failover. 
Why not? Well, if one of the servers involved in the replication becomes disabled because 
of a hardware issue (It's never a software issue, after all, Windows 2000 never crashes, 
right?) then there won't be any writes allowed in the data involved in replication even 
on the server that is still online. A good example of a system that would work well with 
transactional replication is an accounting system that is used to generate a lot of 
reports with minimal updates. Then the reporting load could be balanced across the two 
boxes and the data could be guaranteed consistent.


Merge replication is great for doing things like load balancing for geographically redundant 
sites. For example, if you have an Internet order taking system, having multiple SQL 
Servers avaiable to replicate those transactions around is a good idea in case there is a 
fibre cut or some such nonsense. Then when full service is restored the servers will 
probably come back online with no issues except for a manual conflicts to resolve. 
Since it is an order taking system, chances are pretty good that the only conflicts 
will be related to orders that were 
being changed when the server failed.
Transactional Replication is what we used to call "tightly coupled" replication, or realtime replication. Essentially, transactional replication uses the Microsoft Distributed Transaction Coordinator, and it requires all of the servers involved in replication to complete their transactions before they are finalized. In other words, if any of the servers that are involved in replication are not available, no transactions can complete. The upside is that there can never be any conflicts because all of the servers involved in the replication are always consistent with each other. Merge replication is a little bit looser than that. If a server isn't available, the updates to that server are queued up, and when the server becomes available it will receive all of its updates. Unfortunately, if two users update a record at the same time, merge replication will result in conflicts which will need to be manually resolved. Either one of these types of replication is valid when there are multiple servers which may update a database. The differences are in overhead and reliability. Transactional replication will have a higher overhead in terms of CPU utilization, network traffic, disk time, and record locks. Merge replication has more administrative overhead, because someone has to handle conflict resolution. So, when do you use Transactional Replication? Well, if you can deal with the overhead (minimal if you are careful) and if you can guarantee that all of the servers will always be able to talk to each other, then you can use Transactional Replication. Transactional replication shouldn't be used for failover. Why not? Well, if one of the servers involved in the replication becomes disabled because of a hardware issue (It's never a software issue, after all, Windows 2000 never crashes, right?) then there won't be any writes allowed in the data involved in replication even on the server that is still online. A good example of a system that would work well with transactional replication is an accounting system that is used to generate a lot of reports with minimal updates. Then the reporting load could be balanced across the two boxes and the data could be guaranteed consistent. Merge replication is great for doing things like load balancing for geographically redundant sites. For example, if you have an Internet order taking system, having multiple SQL Servers avaiable to replicate those transactions around is a good idea in case there is a fibre cut or some such nonsense. Then when full service is restored the servers will probably come back online with no issues except for a manual conflicts to resolve. Since it is an order taking system, chances are pretty good that the only conflicts will be related to orders that were being changed when the server failed. - See more at: http://www.sqlteam.com/article/choosing-a-replication-type#sthash.3bzrWpnd.dpuf
Transactional Replication is what we used to call "tightly coupled" replication, or realtime replication. Essentially, transactional replication uses the Microsoft Distributed Transaction Coordinator, and it requires all of the servers involved in replication to complete their transactions before they are finalized. In other words, if any of the servers that are involved in replication are not available, no transactions can complete. The upside is that there can never be any conflicts because all of the servers involved in the replication are always consistent with each other. Merge replication is a little bit looser than that. If a server isn't available, the updates to that server are queued up, and when the server becomes available it will receive all of its updates. Unfortunately, if two users update a record at the same time, merge replication will result in conflicts which will need to be manually resolved. Either one of these types of replication is valid when there are multiple servers which may update a database. The differences are in overhead and reliability. Transactional replication will have a higher overhead in terms of CPU utilization, network traffic, disk time, and record locks. Merge replication has more administrative overhead, because someone has to handle conflict resolution. So, when do you use Transactional Replication? Well, if you can deal with the overhead (minimal if you are careful) and if you can guarantee that all of the servers will always be able to talk to each other, then you can use Transactional Replication. Transactional replication shouldn't be used for failover. Why not? Well, if one of the servers involved in the replication becomes disabled because of a hardware issue (It's never a software issue, after all, Windows 2000 never crashes, right?) then there won't be any writes allowed in the data involved in replication even on the server that is still online. A good example of a system that would work well with transactional replication is an accounting system that is used to generate a lot of reports with minimal updates. Then the reporting load could be balanced across the two boxes and the data could be guaranteed consistent. Merge replication is great for doing things like load balancing for geographically redundant sites. For example, if you have an Internet order taking system, having multiple SQL Servers avaiable to replicate those transactions around is a good idea in case there is a fibre cut or some such nonsense. Then when full service is restored the servers will probably come back online with no issues except for a manual conflicts to resolve. Since it is an order taking system, chances are pretty good that the only conflicts will be related to orders that were being changed when the server failed. - See more at: http://www.sqlteam.com/article/choosing-a-replication-type#sthash.3bzrWpnd.dpuf
 Replication Optimization Tips

1. Avoid publishing unnecessary data.

Try to restrict the amount of published data. This can result in significant performance benefits as 
SQL Server will publish only the amount of data required. At the same time, this can reduce network 
traffic and boost the overall replication performance.


2. Place the published database log and distribution database log on separate disk drives.

Because logging is more write-intensive, it is important that the disk arrays containing the SQL 
Server log files have sufficient disk I/O performance. Separating the logs onto two drives ensures 
high disk I/O performance.


3. Do not configure the distribution database to expand or shrink automatically.

Microsoft recommends to set a fixed size for the distribution database. Setting a database to 
automatically grow results in some performance degradation; thus, you should set a reasonable 
initial size of the distribution database.


4. Place the distribution component of replication on its own dedicated server.

This topology is used for performance reasons when the level of replication activity increases 
or the server resources become constrained. It reduces Publisher loading, but it increases 
overall network traffic. This topology requires separate Microsoft SQL Server installations -- 
one for the Publisher and one for the Distributor.


5. Run the Snapshot Agent as infrequently as possible.

The Snapshot Agent bulk copies data from the Publisher to the Distributor, which results 
in some performance degradation. Try to schedule the agent during CPU idle time and slow 
production periods to minimize performance loss.


6. Avoid using continuous replication.

If possible, schedule replication to occur at regular intervals instead of using continuous replication.


7. Avoid replicating text, ntext and image columns.

These data types require more storage space and processing than other column data types.


8. Replicate the execution of stored procedures when a large number of rows are affected.

For example, instead of replicating a very large number of insert, update and delete statements, 
you can create a stored procedure which contains all of these statements. Replicate to subscriber 
only the execution of this stored procedure. This can reduce network traffic and boost overall 
replication performance.


9. Set the "Maximize Throughput for Network Applications" option.

This can increase SQL Server performance as Windows NT will allocate more RAM to SQL Server 
than to its file cache. To set this option, do the following:

    Double-click the Network icon in Control Panel.
    Click the Services tab.
    Click Server to select it, and then click the Properties button.
    Click Maximize Throughput for Network Applications, and then click OK.
    Restart the computer.


10. Specify the 'min server memory' option.

This option is used to set a minimum amount of memory allocated to SQL Server. If the server 
is a remote Distributor or a combined Publisher and Distributor, Microsoft recommends that 
the 'min server memory' option be set to at least 16 MB of memory to avoid low memory 
availability during replication activities, .

You can also change these options when SQL Server works on the same computer with other 
applications. In this case, the 'min server memory' option is used to allow SQL Server to 
work when other applications attempt to use all available memory.


11. Try to enable pull or anonymous subscriptions to increase the Distributor performance.

This can increase the Distributor performance, because the Distribution Agent processing 
will be moved from the Distributor to Subscribers.


12. Increase the MaxBcpThreads property of the Snapshot Agent.

This property specifies the number of bulk copy operations that can be performed in parallel. 
By increasing this value, bulk copy operations can run faster, because they will be performed 
in parallel.

To increase the MaxBcpThreads value in the Snapshot Agent profile, you can do the following:

    Run SQL Server Enterprise Manager.
    Expand a server group, then expand a server.
    Expand Replication Monitor, then expand the Agents and click the Snapshot Agents folder.
    Right-click appropriate publication and select Agent Profiles...
    Click the New Profile button to create the new profile with the appropriate MaxBcpThreads value.
    Choose the newly created profile.

Note. Do not set this property too high -- it can result in some performance degradation, 
because SQL Server will have to spend extra time managing the extra threads. First increase 
this property to 2 and continue monitoring performance.


13. Set the OutputVerboseLevel property of the Distribution Agent, the Log Reader Agent, 
the Merge Agent, and the Snapshot Agent to 0.

This property specifies whether the output should be verbose. There are three available values:

    0 - only error messages are printed
    1 - all of the progress report messages are printed
    2 - all error messages and progress report messages are printed 

The default value is 2. You can increase performance by printed only error messages.

To set the OutputVerboseLevel value to 0, you can do the following:

    Run SQL Server Enterprise Manager.
    Expand a server group, then expand a server.
    Expand Replication Monitor, then expand the Agents and click the appropriate agent folder.
    Right-click appropriate publication and select Agent Properties...
    On the Steps tab, double-click the Run agent step, and then add the -OutputVerboseLevel 
 0 in the Command text box.


14. You can minimize the performance effect of history logging by selecting 1 for the 
HistoryVerboseLevel property of the Distribution Agent, the Log Reader Agent, the Merge Agent, 
and the Snapshot Agent.

This property specifies the amount of history logged during distribution operation 
(for a Distribution Agent), during a log reader operation (for a Log Reader Agent), 
during a merge operation (for a Merge Agent), or during a snapshot operation (for a Snapshot Agent).

To set the HistoryVerboseLevel value to 1, you can do the following:

    Run SQL Server Enterprise Manager.
    Expand a server group, then expand a server.
    Expand Replication Monitor, then expand the Agents and click the appropriate agent folder.
    Right-click appropriate publication and select Agent Properties...
    On the Steps tab, double-click the Run agent step, and then add the -HistoryVerboseLevel 
 1 in the Command text box.


15. If you work with SQL Server 2000, consider using the -UseInprocLoader agent property.

If this option was set, the in-process BULK INSERT command will be used when applying snapshot 
files to the Subscriber. You cannot use this property with character mode bcp, this property 
cannot be used by OLE DB or ODBC Subscribers.

To set the UseInprocLoader property, you can do the following:

    Run SQL Server Enterprise Manager.
    Expand a server group, then expand a server.
    Expand Replication Monitor, then expand the Agents and click the Distribution Agents or 
 Merge Agents folder.
    Right-click appropriate publication and select Agent Properties...
    On the Steps tab, double-click the subscription agent step, and then add the -UseInprocLoader 
 property in the Command text box.


16. Increase the Log Reader Agent ReadBatchSize parameter.

This parameter specifies the maximum number of transactions read out of the transaction log 
of the publishing database. The default value is 500. This option should be used when a large 
number of transactions are written to a publishing database but only a small subset of those 
are marked for replication.


17. If you work with transactional replication, increase the Distribution Agent CommitBatchSize 
parameter.

This parameter specifies the number of transactions to be issued to the Subscriber before a 
COMMIT statement is issued. The default value is 100.


18. Create an index on each of the columns used in the filter's WHERE clause.

If you do not use indexes on the columns used in filters, then SQL Server must perform a table scan.


29. If you work with merge replication, use static instead of dynamic filters.

Because SQL Server requires more overhead to process the dynamic filters than static filters, 
for best performance you should use static filters whenever possible. 
 
 




 
 
 
 
 
 
 
 
 
 
 

Monday, 16 September 2013

The transaction log for database tempdb is full

If you get the error message for Tempdb Transaction log is full.

Msg 9002, Level 17, State 4, Procedure sp_helpdb, Line 19

The transaction log for database 'tempdb' is full. To find out why space in the log cannot be reused, see the log_reuse_wait_desc column in sys.databases
Solution: 1

-- First check Tempdb Tran log file size using  dbcc sqlperf(logspace)

-- tempdb 999.9922 99.99628 0

USE MASTER
GO
ALTER DATABASE TEMPDB MODIFY FILE (NAME='templog', SIZE=1500MB)
Solution:2  ALTER DATABASE Tempdb  ADD LOG FILE  ( NAME = tempdblog2, FILENAME = 'E:\MSSQL.1\MSSQL\DATA\tempdblog2.ldf', SIZE = 10MB, MAXSIZE = 100MB, FILEGROWTH = 10MB)








Tuesday, 3 September 2013

Script to Compress Tables and Indexes

This script is for SQL Server 2008 Enterprise Edition. Run the script in SSMS with "results to text". It will generate the compression statements for all tables and indexes for the database it which is run in. It creates the compression scripts in ascending order based on table/index size so smaller tables/indexes are compressed first creating additional space for larger tables/indexes to be compressed.
--Creates the ALTER TABLE Statements

SET NOCOUNT ON
SELECT 'ALTER TABLE ' + '[' + s.[name] + ']'+'.' + '[' + o.[name] + ']' + ' REBUILD WITH (DATA_COMPRESSION=PAGE);'
FROM sys.objects AS o WITH (NOLOCK)
INNER JOIN sys.indexes AS i WITH (NOLOCK)
ON o.[object_id] = i.[object_id]
INNER JOIN sys.schemas AS s WITH (NOLOCK)
ON o.[schema_id] = s.[schema_id]
INNER JOIN sys.dm_db_partition_stats AS ps WITH (NOLOCK)
ON i.[object_id] = ps.[object_id]
AND ps.[index_id] = i.[index_id]
WHERE o.[type] = 'U'
ORDER BY ps.[reserved_page_count]




--Creates the ALTER INDEX Statements

SET NOCOUNT ON
SELECT 'ALTER INDEX '+ '[' + i.[name] + ']' + ' ON ' + '[' + s.[name] + ']' + '.' + '[' + o.[name] + ']' + ' REBUILD WITH (DATA_COMPRESSION=PAGE);'
FROM sys.objects AS o WITH (NOLOCK)
INNER JOIN sys.indexes AS i WITH (NOLOCK)
ON o.[object_id] = i.[object_id]
INNER JOIN sys.schemas s WITH (NOLOCK)
ON o.[schema_id] = s.[schema_id]
INNER JOIN sys.dm_db_partition_stats AS ps WITH (NOLOCK)
ON i.[object_id] = ps.[object_id]
AND ps.[index_id] = i.[index_id]
WHERE o.type = 'U' AND i.[index_id] >0
ORDER BY ps.[reserved_page_count]


Script out/store all indexes on a server

The last script will basically loop through the entire server/database your specify and create the indexes for you, sending you an email for each failed index that fails to get created.
The first script creates a table to store the indexes.
The second script creates the SQL Agent Job.  You can eitherpaste the main segment of code directly into the job step where it says "INSERT CODE FROM ABOVE INTO THIS JOB STEP", or create a stored-procedure, and use that instead.
Deploy the job/procedure to any server you wish to keep back up your index definitions, setting an appropriate schedule for it to run.
Use the final portion of code to loop through the table created in step 1, to automagically create the indexes on the target server.
/* Create table to hold indexes */
CREATE TABLE [dbo].[MasterIndexes](
 [ServerName] [varchar](75) NOT NULL,
 [DBName] [varchar](75) NOT NULL,
 [IndexTable] [varchar](75) NOT NULL,
 [Type] [varchar](3) NOT NULL,
 [IndexName] [varchar](500) NOT NULL,
 [FileGroup] [varchar](25) NOT NULL,
 [IndexText] [nvarchar](max) NOT NULL,
 CONSTRAINT [PK_MasterIndexes] PRIMARY KEY CLUSTERED 
(
 [ServerName] ASC,
 [DBName] ASC,
 [IndexTable] ASC,
 [Type] ASC,
 [IndexName] ASC
)WITH (PAD_INDEX  = OFF, STATISTICS_NORECOMPUTE  = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS  = ON, ALLOW_PAGE_LOCKS  = ON) ON [PRIMARY]
) ON [PRIMARY]

GO

/* Code to gather indexes in each database on the server */
SET NOCOUNT ON 
GO

IF  OBJECT_ID(N'TempDB..#Results') IS NOT NULL
DROP TABLE #Results
GO
IF  OBJECT_ID(N'TempDB..#Indexes') IS NOT NULL
DROP TABLE #Indexes
GO
IF  OBJECT_ID(N'TempDB..#RecCount') IS NOT NULL
DROP TABLE #RecCount
GO

DECLARE @i   int
DECLARE @Recs  int
DECLARE @Table  sysname
DECLARE @SQL  varchar(MAX)
DECLARE @Version varchar(3), @Count int
SET @Version = UPPER(CONVERT(varchar(3), SERVERPROPERTY('edition')))

/* ######################################### START MAIN PROCEDURE HERE ########################################## */

/* Create Temp Table to store the results in */
CREATE TABLE #Results (
 Idx int IDENTITY(1,1), TName sysname
)

/* Stores the record counts for the indexes in each database */
CREATE TABLE #RecCount ( 
 RecCount int
)

CREATE TABLE #Indexes (
 DBName sysname, [TableName] sysname, TableID int, IndexID int, IndexName sysname, Sts tinyint, 
 IsUnique tinyint, IsClustered tinyint, IndexFillFactor tinyint, FileGroup varchar(75), 
 [Online] varchar(3), keycolumns varchar(8000), includes varchar(8000)
)

/* Remove Prior Server's Index Records */
DELETE FROM [YOURSERVER].IndexManagement.dbo.MasterIndexes
WHERE ServerName = @@SERVERNAME
 
/* Fetch All the DB's on the Server */
INSERT INTO #Results
 EXEC sp_MSForEachDB 'Use [?]; SELECT DB_NAME()'

/* Get rid of the ones we don't want to index */
DELETE FROM #Results 
 WHERE TName IN ('MASTER', 'TEMPDB', 'MODEL', 'MSDB', 'DISTRIBUTION', 'AdventureWorks', 'ReportServer', 'ReportServerTempDB')

/* Loop through the DB's and kick off the magic */
SET @recs = (SELECT COUNT(1) FROM #Results)
 WHILE @Recs <> 0
 BEGIN
  SET @TABLE = (SELECT Top 1 TName FROM #Results )

  SET @SQL = '
  DECLARE @Version varchar(3), @Count int
  SET @Version = UPPER(CONVERT(varchar(3), SERVERPROPERTY(''edition'')))

  USE  ' + CAST(@Table as varchar(250)) + '; 
  INSERT INTO #Indexes
  SELECT 
   ''' + RTRIM(@TABLE) + ''',
   OBJECT_NAME(i.object_id) [tablename], 
   i.object_id [tableid], 
   i.index_id [indexid], 
   i.name [indexname],
   1 [status],
   isunique = INDEXPROPERTY(i.object_id, i.name, ''isunique''),
   isclustered = INDEXPROPERTY(i.object_id, i.name, ''isclustered''),
   indexfillfactor = INDEXPROPERTY(i.object_id, i.name, ''indexfillfactor''),
   f.name [filegroup],
   ''ON'' [Online], NULL, NULL
  FROM sys.indexes i 
  INNER JOIN sys.all_objects o ON i.object_id = o.object_id AND o.type = ''U'' AND o.is_ms_shipped = 0
  INNER JOIN sys.filegroups f ON i.data_space_id = f.data_space_id
  WHERE
   i.index_id >= 1
   AND OBJECTPROPERTY(i.object_id, ''IsUserTable'') = 1 
   AND INDEXPROPERTY(i.object_id, i.name, ''indexfillfactor'') >= 0
   AND i.index_id < 255                      
   AND LEFT(OBJECT_NAME(i.object_id), 3) NOT IN (''sys'', ''dt_'', ''MSp'', ''z_d'')  
   AND o.name NOT IN (
   SELECT DISTINCT
       a.name
     FROM    ' + RTRIM(@TABLE) + '.sys.sysobjects AS a WITH ( READUNCOMMITTED )
       JOIN ' + RTRIM(@TABLE) + '.sys.syscolumns AS b WITH ( READUNCOMMITTED ) ON a.id = b.id
       JOIN ' + RTRIM(@TABLE) + '.sys.syscolumns AS c WITH ( READUNCOMMITTED ) ON c.xtype = b.xtype
     WHERE   b.xType IN ( ''34'', ''35'', ''99'', ''165'',  ''241'' ))
  UNION ALL
  SELECT 
   ''' + RTRIM(@TABLE) + ''',
   OBJECT_NAME(i.object_id) [tablename], 
   i.object_id [tableid], 
   i.index_id [indexid], 
   i.name [indexname],
   1 [status],
   isunique = INDEXPROPERTY(i.object_id, i.name, ''isunique''),
   isclustered = INDEXPROPERTY(i.object_id, i.name, ''isclustered''),
   indexfillfactor = INDEXPROPERTY(i.object_id, i.name, ''indexfillfactor''),
   f.name [filegroup],
   ''OFF'' [Online], NULL, NULL
  FROM sys.indexes i 
  INNER JOIN sys.all_objects o ON i.object_id = o.object_id AND o.type = ''U'' AND o.is_ms_shipped = 0
  INNER JOIN sys.filegroups f ON i.data_space_id = f.data_space_id
  WHERE
   i.index_id >= 1
   AND OBJECTPROPERTY(i.object_id, ''IsUserTable'') = 1 
   AND INDEXPROPERTY(i.object_id, i.name, ''indexfillfactor'') >= 0
   AND i.index_id < 255                      
   AND LEFT(OBJECT_NAME(i.object_id), 3) NOT IN (''sys'', ''dt_'', ''MSp'', ''z_d'')  
   AND o.name IN (
   SELECT DISTINCT
       a.name
     FROM    ' + RTRIM(@TABLE) + '.sys.sysobjects AS a WITH ( READUNCOMMITTED )
       JOIN ' + RTRIM(@TABLE) + '.sys.syscolumns AS b WITH ( READUNCOMMITTED ) ON a.id = b.id
       JOIN ' + RTRIM(@TABLE) + '.sys.syscolumns AS c WITH ( READUNCOMMITTED ) ON c.xtype = b.xtype
     WHERE   b.xType IN ( ''34'', ''35'', ''99'', ''165'',  ''241'' ))

  SET @Count = @@ROWCOUNT
  INSERT INTO #RecCount SELECT ISNULL(@Count, 0) '
  EXECUTE (@SQL)

  SET @SQL = '
  DECLARE @Version varchar(3), @Count int
  SET @Version = UPPER(CONVERT(varchar(3), SERVERPROPERTY(''edition'')))
  DECLARE
   @isql_key varchar(8000),
   @isql_incl varchar(8000),
   @tableid int,
   @indexid int
  DECLARE index_cursor CURSOR
  FOR
  SELECT
   tableid,
   indexid
  FROM
   #Indexes  
  OPEN index_cursor
  FETCH NEXT FROM index_cursor INTO @tableid, @indexid
  WHILE @@fetch_status <> -1 
   BEGIN
      
    SELECT
     @isql_key = '''',
     @isql_incl = ''''
    
    SELECT 
     @isql_key = CASE ic.is_included_column
          WHEN 0 THEN CASE ic.is_descending_key
            WHEN 1 THEN @isql_key + COALESCE(sc.name, '''') + '' DESC, ''
            ELSE @isql_key + COALESCE(sc.name, '''') + '' ASC, ''
             END
          ELSE @isql_key
        END,
           
      @isql_incl = CASE ic.is_included_column
           WHEN 1 THEN CASE ic.is_descending_key
             WHEN 1 THEN @isql_incl + COALESCE(sc.name, '''') + '', ''
             ELSE @isql_incl + COALESCE(sc.name, '''') + '', ''
              END
           ELSE @isql_incl
         END
    FROM 
     ' + RTRIM(@TABLE) + '.sys.indexes i
    INNER JOIN ' + RTRIM(@TABLE) + '.sys.index_columns AS ic
     ON (ic.column_id > 0
      AND (ic.key_ordinal > 0
        OR ic.partition_ordinal = 0
        OR ic.is_included_column != 0))
        AND (ic.index_id = CAST(i.index_id AS int)
       AND ic.object_id = i.object_id)
    INNER JOIN ' + RTRIM(@TABLE) + '.sys.columns AS sc
     ON sc.object_id = ic.object_id
        AND sc.column_id = ic.column_id
    WHERE
     i.index_id > 1
     AND i.index_id < 255
     AND i.object_id = @tableid
     AND i.index_id = @indexid
    ORDER BY
     i.name,
     CASE ic.is_included_column
       WHEN 1 THEN ic.index_column_id
       ELSE ic.key_ordinal
     END
    
    IF LEN(@isql_key) > 1 
     SET @isql_key = LEFT(@isql_key, LEN(@isql_key) - 1)
     
    IF LEN(@isql_incl) > 1 
     SET @isql_incl = LEFT(@isql_incl, LEN(@isql_incl) - 1)
    
    UPDATE
     #Indexes
    SET 
     keycolumns = @isql_key,
     includes = @isql_incl
    WHERE
     tableid = @tableid
     AND indexid = @indexid
    FETCH NEXT FROM index_cursor INTO @tableid, @indexid
   END
  CLOSE index_cursor
  DEALLOCATE index_cursor'
  EXECUTE (@SQL)
      
 
 /* Add Updated Index Records to the Master List */
 IF ((SELECT COUNT(1) FROM #RecCount) > 0)
 BEGIN
 INSERT INTO [LINKEDSERVER].IndexManagement.dbo.MasterIndexes
  SELECT CAST(@@SERVERNAME as varchar(75)), CAST(@Table as varchar(75))
   , tablename [Table], CASE WHEN (IsClustered = 0) THEN 'NCI' ELSE 'CLU' END [Type],
   INDEXNAME [Index], [FileGroup],
  'USE ' + CAST(@Table as varchar(250)) 
   + '; IF  EXISTS (SELECT * FROM sys.indexes WHERE object_id = OBJECT_ID(N''[dbo].' + QUOTENAME(TABLENAME) 
   + ''') AND name = N''' + INDEXNAME + ''') DROP INDEX ' + QUOTENAME(INDEXNAME) + ' ON [dbo].' + QUOTENAME(TABLENAME) 
   + ' WITH ( ONLINE = OFF );  CREATE ' + CASE WHEN ISUNIQUE = 1 THEN 'UNIQUE '
       ELSE ''
     END + CASE WHEN ISCLUSTERED = 1 THEN 'CLUSTERED ' ELSE 'NONCLUSTERED '
    END + 'INDEX ' + QUOTENAME(INDEXNAME) + ' ON dbo.' + QUOTENAME(TABLENAME) + ' (' + keycolumns + ')'
  + CASE WHEN INDEXFILLFACTOR = 0
     AND ISCLUSTERED = 1
     AND INCLUDES = '' THEN ''
      WHEN INDEXFILLFACTOR = 0
     AND ISCLUSTERED = 0
     AND INCLUDES = '' THEN ' WITH (MAXDOP=4, ONLINE = ' 
      + CASE WHEN (@Version = 'STA') THEN 'OFF' ELSE [ONLINE] END + ') ON [' 
      + FILEGROUP + ']['
      WHEN INDEXFILLFACTOR <> 0
     AND ISCLUSTERED = 0
     AND INCLUDES = '' THEN ' WITH (MAXDOP=4, ONLINE = ' 
      + CASE WHEN (@Version = 'STA') THEN 'OFF' ELSE [ONLINE] END + ', FILLFACTOR = ' 
      + CONVERT(varchar(10), INDEXFILLFACTOR) + ') ON ['
      + FILEGROUP + ']'                
      WHEN INDEXFILLFACTOR = 0
     AND ISCLUSTERED = 0
     AND INCLUDES <> '' THEN ' INCLUDE (' + INCLUDES + ') WITH (MAXDOP=4, ONLINE = '
     + CASE WHEN (@Version = 'STA') THEN 'OFF' ELSE [ONLINE] END + ') ON ['
     + FILEGROUP + ']'
      ELSE ' INCLUDE(' + INCLUDES + ') WITH (MAXDOP=4, FILLFACTOR = ' 
     + CONVERT(varchar(10), INDEXFILLFACTOR) + ', ONLINE = '
     + CASE WHEN (@Version = 'STA') THEN 'OFF' ELSE [ONLINE] END + ') ON ['
     + FILEGROUP + ']'
    END
 FROM #Indexes
 ORDER BY tablename, indexid, indexname
 END

  DELETE FROM #Results WHERE TName = @Table
  SET @recs = (SELECT COUNT(1) FROM #Results)
  TRUNCATE TABLE #RecCount
  TRUNCATE TABLE #Indexes
 END 

DROP TABLE #Results
DROP TABLE #Indexes

SET NOCOUNT OFF
GO



/* Script to create the Agent job */
USE [msdb]
GO

/****** Object:  Job [DBA - Generate Index Create Scripts for ALL servers]    Script Date: 08/26/2013 22:11:06 ******/
BEGIN TRANSACTION
DECLARE @ReturnCode INT
SELECT @ReturnCode = 0
/****** Object:  JobCategory [Database Maintenance]    Script Date: 08/26/2013 22:11:07 ******/
IF NOT EXISTS (SELECT name FROM msdb.dbo.syscategories WHERE name=N'Database Maintenance' AND category_class=1)
BEGIN
EXEC @ReturnCode = msdb.dbo.sp_add_category @class=N'JOB', @type=N'LOCAL', @name=N'Database Maintenance'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback

END

DECLARE @jobId BINARY(16)
EXEC @ReturnCode =  msdb.dbo.sp_add_job @job_name=N'DBA - Generate Index Create Scripts for ALL servers', 
  @enabled=1, 
  @notify_level_eventlog=2, 
  @notify_level_email=2, 
  @notify_level_netsend=0, 
  @notify_level_page=0, 
  @delete_level=0, 
  @description=N'1/4/2013 - Tweaked query to look through all DB''s on the entrie server, inserting them into the MasterIndexes table in the IndexManagement DB.  Runs Daily', 
  @category_name=N'Database Maintenance', 
  @owner_login_name=N'SomeUser', 
  @notify_email_operator_name=N'DBA', @job_id = @jobId OUTPUT
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
/****** Object:  Step [Fetch Server Indexes]    Script Date: 08/26/2013 22:11:07 ******/
EXEC @ReturnCode = msdb.dbo.sp_add_jobstep @job_id=@jobId, @step_name=N'Fetch Server Indexes', 
  @step_id=1, 
  @cmdexec_success_code=0, 
  @on_success_action=1, 
  @on_success_step_id=0, 
  @on_fail_action=2, 
  @on_fail_step_id=0, 
  @retry_attempts=0, 
  @retry_interval=0, 
  @os_run_priority=0, @subsystem=N'TSQL', 
  @command=N'INSERT CODE FROM ABOVE INTO THIS JOB STEP', 
  @database_name=N'master', 
  @flags=4
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_update_job @job_id = @jobId, @start_step_id = 1
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_add_jobschedule @job_id=@jobId, @name=N'Daily', 
  @enabled=1, 
  @freq_type=4, 
  @freq_interval=1, 
  @freq_subday_type=1, 
  @freq_subday_interval=0, 
  @freq_relative_interval=0, 
  @freq_recurrence_factor=0, 
  @active_start_date=20121212, 
  @active_end_date=99991231, 
  @active_start_time=233700, 
  @active_end_time=235959, 
  @schedule_uid=N'cc8f8a83-7c7f-4315-b159-6832fdab97f3'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
EXEC @ReturnCode = msdb.dbo.sp_add_jobserver @job_id = @jobId, @server_name = N'(local)'
IF (@@ERROR <> 0 OR @ReturnCode <> 0) GOTO QuitWithRollback
COMMIT TRANSACTION
GOTO EndSave
QuitWithRollback:
    IF (@@TRANCOUNT > 0) ROLLBACK TRANSACTION
EndSave:

GO



DECLARE @Indexes TABLE (idx int IDENTITY( 1,1 ), IndexText varchar (MAX))
DECLARE @idx int , @iText varchar (MAX)

INSERT INTO @Indexes
SELECT IndexText
FROM [IndexManagement]. [dbo].[MasterIndexes]
WHERE
    ServerName = 'YOU SERVER'
    AND DBName IN ('YOUR DATABASES')
    AND [Type] <> 'CLU'
ORDER BY DBName DESC

WHILE (SELECT TOP 1 idx FROM @Indexes) > 0
BEGIN
        SELECT TOP 1 @idx = Idx , @iText = IndexText FROM @Indexes
        BEGIN TRY
               PRINT 'CREATING INDEX: ' + @iText
               EXEC (@iText )
               WAITFOR DELAY '00:00:00:250'
        END TRY
       
        BEGIN CATCH
        /* Required Parameters: @To, @Origin, @Object
                 If @Origin = 1 – This means a Stored-Procedure, specify @Object = Proc Name
                 If @Origin = 2 – This means a SQL Agent Job, specify @Object = Job Name
                 If @Origin = 3 – This means standard TSQL, specify @Object = ‘TSQL’ */
                 EXEC MyDatabase. dbo.dba_SendEmailNotification @Origin = 3,
                       @Object = 'TSQL - Issue creating indexes' , @Msg = @iText,@spid = @@SPID
        END CATCH
        DELETE FROM @Indexes WHERE idx = @idx
END

Thursday, 15 August 2013

Stored Procedure Optimization Tips – Best Practices

  • Include SET NOCOUNT ON statement: With every SELECT and DML statement, the SQL server returns a message that indicates the number of affected rows by that statement. This information is mostly helpful in debugging the code, but it is useless after that. By setting SET NOCOUNT ON, we can disable the feature of returning this extra information. For stored procedures that contain several statements or contain Transact-SQL loops, setting SET NOCOUNT to ON can provide a significant performance boost because network traffic is greatly reduced.
CREATE PROC dbo.ProcName AS SET NOCOUNT ON;--Procedure code hereSELECT column1 FROM dbo.TblTable1 -- Reset SET NOCOUNT to OFFSET NOCOUNT OFF;GO
  • Use schema name with object name: The object name is qualified if used with schema name. Schema name should be used with the stored procedure name and with all objects referenced inside the stored procedure. This help in directly finding the complied plan instead of searching the objects in other possible schema before finally deciding to use a cached plan, if available. This process of searching and deciding a schema for an object leads to COMPILE lock on stored procedure and decreases the stored procedure’s performance. Therefore, always refer the objects with qualified name in the stored procedure like
SELECT * FROM dbo.MyTable -- Preferred method -- Instead ofSELECT * FROM MyTable -- Avoid this method --And finally call the stored procedure with qualified name like:EXEC dbo.MyProc -- Preferred method --Instead ofEXEC MyProc -- Avoid this method
  • Do not use the prefix “sp_” in the stored procedure name: If a stored procedure name begins with “SP_,” then SQL server first searches in the master database and then in the current session database. Searching in the master database causes extra overhead and even a wrong result if another stored procedure with the same name is found in master database.
  • Use IF EXISTS (SELECT 1) instead of (SELECT *): To check the existence of a record in another table, we uses the IF EXISTS clause. The IF EXISTS clause returns True if any value is returned from an internal statement, either a single value “1” or all columns of a record or complete recordset. The output of the internal statement is not used. Hence, to minimize the data for processing and network transferring, we should use “1” in the SELECT clause of an internal statement, as shown below:
IF EXISTS (SELECT 1 FROM sysobjects WHERE name = 'MyTable' AND type = 'U')
  • Use the sp_executesql stored procedure instead of the EXECUTE statement. The sp_executesql stored procedure supports parameters. So, using the sp_executesql stored procedure instead of the EXECUTE statement improve the re-usability of your code. The execution plan of a dynamic statement can be reused only if each and every character, including case, space, comments and parameter, is same for two statements. For example, if we execute the below batch:
DECLARE @Query VARCHAR(100)DECLARE @Age INT SET @Age = 25 SET @Query = 'SELECT * FROM dbo.tblPerson WHERE Age = ' + CONVERT(VARCHAR(3),@Age)EXEC (@Query)
If we again execute the above batch using different @Age value, then the execution plan for SELECT statement created for @Age =25 would not be reused. However, if we write the above batch as given below,
DECLARE @Query NVARCHAR(100)SET @Query = N'SELECT * FROM dbo.tblPerson WHERE Age = @Age'EXECUTE sp_executesql @Query, N'@Age int', @Age = 25
the compiled plan of this SELECT statement will be reused for different value of @Age parameter. The reuse of the existing complied plan will result in improved performance.
  • Try to avoid using SQL Server cursors whenever possible: Cursor uses a lot of resources for overhead processing to maintain current record position in a recordset and this decreases the performance. If we need to process records one-by-one in a loop, then we should use the WHILE clause. Wherever possible, we should replace the cursor-based approach with SET-based approach. Because the SQL Server engine is designed and optimized to perform SET-based operation very fast. Again, please note cursor is also a kind of WHILE Loop.
  • Keep the Transaction as short as possible: The length of transaction affects blocking and deadlocking. Exclusive lock is not released until the end of transaction. In higher isolation level, the shared locks are also aged with transaction. Therefore, lengthy transaction means locks for longer time and locks for longer time turns into blocking. In some cases, blocking also converts into deadlocks. So, for faster execution and less blocking, the transaction should be kept as short as possible.
  • Use TRY-Catch for error handling: Prior to SQL server 2005 version code for error handling, there was a big portion of actual code because an error check statement was written after every t-sql statement. More code always consumes more resources and time. In SQL Server 2005, a new simple way is introduced for the same purpose. The syntax is as follows:
BEGIN TRY --Your t-sql code goes hereEND TRY BEGIN CATCH --Your error handling code goes hereEND CATCH
Don't use "SELECT*" in a SQL query
  • Unnecessary columns may get fetched that will add expense to the data retrieval time.
  • The database engine cannot utilize the benefit of "Covered Index" (discussed in the previous article), and hence the query performs slowly.
Avoid unnecessary columns in the SELECT list and unnecessary tables in join conditions
  • Selecting unnecessary columns in a Select query adds overhead to the actual query, specially if the unnecessary columns are of LOB types.
  • Including unnecessary tables in join conditions forces the database engine to retrieve and fetch unnecessary data and increases the query execution time.
Do not use the COUNT() aggregate in a subquery to do an existence check
  • Do not use:
  • SELECT column_list FROM table WHERE 0 < (SELECT count(*) FROM table2 WHERE ..)
    Instead, use:
    SELECT column_list FROM table WHERE EXISTS (SELECT * FROM table2 WHERE ...)
  • When you use COUNT(), SQL Server does not know that you are doing an existence check. It counts all matching values, either by doing a table scan or by scanning the smallest non-clustered index.
  • When you use EXISTS, SQL Server knows you are doing an existence check. When it finds the first matching value, it returns TRUE and stops looking. The same applies to using COUNT() instead of IN or ANY.
Try to avoid joining between two types of columns
  • When joining between two columns of different data types, one of the columns must be converted to the type of the other. The column whose type is lower is the one that is converted.
  • If you are joining tables with incompatible types, one of them can use an index, but the query optimizer cannot choose an index on the column that it converts. For example:
  • SELECT column_list FROM small_table, large_table WHERE
    smalltable.float_column = large_table.int_column 
    In this case, SQL Server converts the integer column to float, because int is lower in the hierarchy than float. It cannot use an index on large_table.int_column, although it can use an index on smalltable.float_column.
Try to avoid deadlocks
  • Always access tables in the same order in all your Stored Procedures and triggers consistently.
  • Keep your transactions as short as possible. Touch as few data as possible during a transaction.
  • Never, ever wait for user input in the middle of a transaction.
Write TSQL using "Set based approach" rather than "Procedural approach"
  • The database engine is optimized for Set based SQL. Hence, Procedural approach (use of Cursor or UDF to process rows in a result set) should be avoided when large result sets (more than 1000) have to be processed.
  • How can we get rid of "Procedural SQL"? Follow these simple tricks:
    • Use inline sub queries to replace User Defined Functions.
    • Use correlated sub queries to replace Cursor based code.
    • If procedural coding is really necessary, at least, use a table variable instead of a cursor to navigate and process the result set.
For more info on "set" and "procedural" SQL, see Understanding "Set based" and "Procedural" approaches in SQL.
Try not to use COUNT(*) to obtain the record count in a table
  • To get the total row count in a table, we usually use the following Select statement:
  • SELECT COUNT(*) FROM dbo.orders
    This query will perform a full table scan to get the row count.
  • The following query would not require a full table scan. (Please note that this might not give you 100% perfect results always, but this is handy only if you don't need a perfect count.)
  • SELECT rows FROM sysindexes 
    WHERE id = OBJECT_ID('dbo.Orders') AND indid < 2
Try to avoid dynamic SQL
Unless really required, try to avoid the use of dynamic SQL because:
  • Dynamic SQL is hard to debug and troubleshoot.
  • If the user provides the input to the dynamic SQL, then there is possibility of SQL injection attacks.
Try to avoid the use of temporary tables
  • Unless really required, try to avoid the use of temporary tables. Rather use table variables.
  • In 99% of cases, table variables reside in memory, hence it is a lot faster. Temporary tables reside in the TempDb database. So operating on temporary tables require inter database communication and hence will be slower.
Instead of LIKE search, use full text search for searching textual data
Full text searches always outperform LIKE searches.
  • Full text searches will enable you to implement complex search criteria that can't be implemented using a LIKE search, such as searching on a single word or phrase (and optionally, ranking the result set), searching on a word or phrase close to another word or phrase, or searching on synonymous forms of a specific word.
  • Implementing full text search is easier to implement than LIKE search (especially in the case of complex search requirements).
  • For more info on full text search, see http://msdn.microsoft.com/en-us/library/ms142571(SQL.90).aspx
Try to use UNION to implement an "OR" operation
  • Try not to use "OR" in a query. Instead use "UNION" to combine the result set of two distinguished queries. This will improve query performance.
  • Better use UNION ALL if a distinguished result is not required. UNION ALL is faster than UNION as it does not have to sort the result set to find out the distinguished values.
Implement a lazy loading strategy for large objects
  • Store Large Object columns (like VARCHAR(MAX), Image, Text etc.) in a different table than the main table, and put a reference to the large object in the main table.
  • Retrieve all the main table data in a query, and if a large object is required to be loaded, retrieve the large object data from the large object table only when it is required.
Use VARCHAR(MAX), VARBINARY(MAX), and NVARCHAR(MAX)
  • In SQL Server 2000, a row cannot exceed 8000 bytes in size. This limitation is due to the 8 KB internal page size of SQL Server. So to store more data in a single column, you need to use TEXT, NTEXT, or IMAGE data types (BLOBs) which are stored in a collection of 8 KB data pages.
  • These are unlike the data pages that store other data in the same table. These pages are arranged in a B-tree structure. These data cannot be used as variables in a procedure or a function, and they cannot be used inside string functions such as REPLACE, CHARINDEX, or SUBSTRING. In most cases, you have to use READTEXT, WRITETEXT, and UPDATETEXT.
  • To solve this problem, use VARCHAR(MAX), NVARCHAR(MAX), and VARBINARY(MAX) in SQL Server 2005. These data types can hold the same amount of data BLOBs can hold (2 GB), and they are stored in the same type of data pages used for other data types.
  • When data in a MAX data type exceeds 8 KB, an over-flow page is used (in the ROW_OVERFLOW allocation unit), and a pointer to the page is left in the original data page in the IN_ROW allocation unit.
Implement the following good practices in User Defined Functions
  • Do not call functions repeatedly within your Stored Procedures, triggers, functions, and batches. For example, you might need the length of a string variable in many places of your procedure, but don't call the LEN function whenever it's needed; instead, call the LEN function once, and store the result in a variable for later use.
Implement the following good practices in Stored Procedures
  • Do not use "SP_XXX" as a naming convention. It causes additional searches and added I/O (because the system Stored Procedure names start with "SP_"). Using "SP_XXX" as the naming convention also increases the possibility of conflicting with an existing system Stored Procedure.
  • Use "Set Nocount On" to eliminate extra network trip.
  • Use the WITH RECOMPILE clause in the EXECUTE statement (first time) when the index structure changes (so that the compiled version of the Stored Procedure can take advantage of the newly created indexes).
  • Use default parameter values for easy testing.
Implement the following good practices in Triggers
  • Try to avoid the use of triggers. Firing a trigger and executing the triggering event is an expensive process.
  • Never use triggers that can be implemented using constraints.
  • Do not use the same trigger for different triggering events (Insert, Update, Delete).
  • Do not use transactional code inside a trigger. The trigger always runs within the transactional scope of the code that fires the trigger.
Implement the following good practices in Views
  • Use views for re-using complex TSQL blocks, and to enable it for indexed views (Will be discussed later).
  • Use views with the SCHEMABINDING option if you do not want to let users modify the table schema accidentally.
  • Do not use views that retrieve data from a single table only (that will be an unnecessary overhead). Use views for writing queries that access columns from multiple tables.
Implement the following good practices in Transactions
  • Prior to SQL Server 2005, after BEGIN TRANSACTION and each subsequent modification statement, the value of @@ERROR had to be checked. If its value was non-zero, then the last statement caused an error, and if an error occurred, the transaction had to be rolled back and an error had to be raised (for the application). In SQL Server 2005 and onwards, the Try...Catch block can be used to handle transactions in TSQL. So try to use Try...Catch based transactional code.
  • Try to avoid nested transactions. Use the @@TRANCOUNT variable to determine whether a transaction needs to be started (to avoid nested transactions).
  • Start a transaction as late as possible and commit/rollback the transaction as fast as possible to reduce the time period of resource locking.

Optimization cont'd

If we will follow following tips or rules while write sql queries we can improve the performance of sql queries:

1. Help Query optimizer to estimate correct statistics

In sql server execution plan is decided by:

1. Cardinality: The total number of rows processed at each level of query.

2. Query Plan: Algorithm to use to perform task like searching, sorting etc.

When we execute a sql queries, create index, insert records in a table etc sql server automatically creates or updates the statistics of key fields of tables. In simple words we can say statistics is object which keeps information about total numbers of distinct records in a table.

There a few cases where query optimizer is unable to get correct information about statistics which decrease the performance of a sql query. For examples:

First we are creating a tblMessage table and inserting 97526 records into it and creating three indexes on it:

CREATE TABLE tblMessage (
    ntMessageID BIGINT IDENTITY PRIMARY KEY,
    ntSeverity INT,
    vcMessage VARCHAR(500),
    dtDate DATETIME
)

INSERT INTO tblMessage
SELECT
    Severity,
    LEFT ([TEXT], 500),
    DATEADD (YEAR,severity,GETDATE())  
FROM Sys.messages

CREATE NONCLUSTERED INDEX NC_Severity
ON tblMessage(ntSeverity)
INCLUDE (vcMessage)

CREATE NONCLUSTERED INDEX NC_Msg
ON tblMessage(vcMessage)

CREATE NONCLUSTERED INDEX NC_Date
ON tblMessage(dtDate)
INCLUDE (ntSeverity,vcMessage)

Note: We are setting statistics IO on to know the total logical reads:

SET STATISTICS IO ON

Scenario 1:

a. Consider on the following two sql queries:

Query 1:

SELECT vcMessage FROM tblMessage
WHERE LEFT (vcMessage,1) = 'a'

Query 2:

SELECT vcMessage FROM tblMessage
WHERE vcMessage LIKE 'a%'

Both of them will perform same task from query point of view. Now check the logical read in message tab:

Logical read (Query 1):  1471
Logical read (Query 2):  79

So, Query 1 has to go for approximately 18 times more logical read than query 2.

Now check the actual execution plan of both of the query:

We can observe query 1 is using index scan while query 2 is index seek and overall cost of query 1 is 96% while of query 2 is only 4%.

So, now we can say query2 is much better than query 1.

b. Consider on the following two sql queries:

Query 1:

SELECT * FROM tblMessage WHERE YEAR (dtDate) = '2012'

Query 2:

SELECT * FROM tblMessage
WHERE dtDate >= '2012-01-01' AND dtDate < '2013-01-01'

Both of them will perform same task from query point of view. Now check the logical read in message tab:

Logical read (Query 1):  1589
Logical read (Query 2):  10

So, Query 1 has to go for approximately 160 times more logical read than query 2.

Now if we will check the actual execution plan of both of the query we will observe query 1 is using index scan while query 2 is index seek and overall cost of query 1 is 99% while of query 2 is only 1%.

In the same way if we will analyze the following queries query 2 will perform much better than query 1:

C.

Query 1:

SELECT vcMessage FROM tblMessage
WHERE (ntSeverity * 5)/2 = 35

Query 2:

SELECT vcMessage FROM tblMessage
WHERE ntSeverity = (35/5) * 2

Logical read (Query 1): 1490
Logical read (Query 2): 16
Batch query cost (Query 1): 100%
Batch query cost (Query 2): 0%

D.

Query 1:

SELECT vcMessage FROM tblMessage
WHERE LOWER (vcMessage) = LOWER ('Text')

Query 2:

SELECT vcMessage FROM tblMessage
WHERE vcMessage = 'Text'

Or

SELECT vcMessage FROM tblMessage
WHERE vcMessage = 'Text' COLLATE SQL_Latin1_General_CP1_CI_AS

Logical read (Query 1): 1471
Logical read (Query 2): 3
Batch query cost (Query 1): 100%
Batch query cost (Query 2): 0%

Note: Default collation of sql server is case in sensitive so no need of user LOWER function to perform case insensitive comparison. If default collation of your database or table or column is  case sensitive then we should use COLLATE clause with any case in insensitive collation.

E.

Query 1:

SELECT vcMessage FROM tblMessage
WHERE CAST (ntSeverity AS VARCHAR) = '14'

Query 2:

SELECT vcMessage FROM tblMessage
WHERE ntSeverity = CAST ('14' AS INT)

Logical read (Query 1): 1490
Logical read (Query 2): 16
Batch query cost (Query 1): 99%
Batch query cost (Query 2): 1%

Question: Why query 2 is performing better than query 1 in all of the above examples?

Answer: If you will notice the WHERE clause in all above examples in query 1, you will get that we are performing some operations with the field of table tblMessage while in query 2 equivalent value has used in constant expression.

If we perform following operations on field of any table in query predicate:

1. Using any system function or user defined function
2. Scalar operation like addition, multiplication etc.
3. Type casting

In this situation sql server query optimizer is not able to estimate correct cardinality using statistics.

Query 1:

SELECT vcMessage FROM tblMessage
WHERE LEFT (vcMessage, 1) = 'a'

Query 2:

SELECT vcMessage FROM tblMessage
WHERE vcMessage LIKE 'a%'

Both of the above queries select total records: 4171

If we check the detail section of execution plan we will get:

Actual cardinality: 4171
Cardinality (Query 1): 2432
Cardinality (Query 2):  4131.76

So cardinality of query 2 (4131.76) is very near to correct cardinality i.e. total numbers records filter by WHERE clause and pass to SELECT clause (4171)

Tips: If possible, don't perform any operation on the any field of a table in WHERE Clause, ON Clause, HAVING Clause. Instead of this, write equivalent constant expression.

Scenario 2:

Consider on the following two sql statements:

Query 1:

SELECT vcMessage FROM tblmessage
WHERE vcMessage LIKE 'Text%'

Query 2:

DECLARE @Msg AS VARCHAR (500) = 'Text%'
SELECT vcMessage FROM tblmessage
WHERE vcMessage LIKE @Msg

If we will check the execution plan of both the queries:

Total query cost of query 2 is 96% while query 1 is only 4%!!!

What wrong with query 2?

Again, same cause. Query optimizer is not able to get correct cardinality use statistics.

1. Actual cardinality: 55
2. Carnality (Query 1):  40.9086
3. Carnality (Query 2): 6222.16

If we will use local variables in query predicate,   sql server query optimizer is not able to estimate correct cardinality using statistics.

Tips:

Don't use local variables in WHERE Clause, ON Clause, HAVING Clause. Instead of this, use constant expressions.

Question: How to resolve this problem when this variable is the parameter of stored procedures or functions?

Scenario 3:

Consider on the following sql statement:

Query 1:

SELECT vcMessage FROM tblMessage
WHERE ntSeverity * ntMessageID = 0

There any many business logic where we use more than one field and perform some arithmetic operations in query predicate.  This is called cross relationship among fields. Above query is example of cross relationship of field ntSeverity and ntMessageID 

If there is any cross relationship among different fields of a table in query predicate, sql server query optimizer is not able to estimate correct cardinality using statistics.

To resolve this problem sql server has introduced computed column. So we have to create a computed column in the table tblMessage. In this example we are creating a other table tblMessage_Dense, inserting same records and creating a non- clustered index:

CREATE TABLE tblMessage_Dense(
    ntMessageID BIGINT IDENTITY PRIMARY KEY,
    ntSeverity INT,
    vcMessage VARCHAR(500),
    dtDate DATETIME,
    ntDensity AS ntSeverity * ntMessageID
)

INSERT tblMessage_Dense(ntSeverity,vcMessage,dtDate)
SELECT
    Severity,
    LEFT ([TEXT], 500),
    DATEADD (YEAR,severity,GETDATE())  
FROM Sys.messages

CREATE NONCLUSTERED INDEX NC_Density
ON tblMessage_Dense(ntDensity)
INCLUDE (vcMessage)

And our new sql query will be:

Query 2:

SELECT vcMessage FROM tblMessage_Dense
WHERE ntDensity = 0

Ok, now compare the performance of both the sql queries:

Logical read (Query 1): 1490
Logical read (Query 2):  9

So, Query 1 has to go for approximately 165 times more logical read than query 2.

Now check the actual execution plan of both of the query:

We can observe query 1 is using index scan while query 2 is index seek and overall cost of query 1 is 99% while of query 2 is only 1%.

So, now we can say query 2 is much better than query 1.

Note: Good news, a non – persisted computed column is not stored physically in memory.

Suppose in our application there a table tblQueue:

CREATE TABLE tblQueue (
    ntQueueID BIGINT IDENTITY PRIMARY KEY,
    vcQueueName VARCHAR(500)
)

INSERT tblQueue
SELECT  '1_' +
    CAST(message_id AS VARCHAR) + '_' +
    CAST(severity AS VARCHAR) + '_' +
    CAST(language_id AS VARCHAR)  FROM Sys.messages

This application very frequently executes a sql query which gets batch id form a column vcQueueName:

Query 1:

SELECT * FROM tblQueue
WHERE CAST (REVERSE (LEFT (REVERSE (vcQueueName), CHARINDEX ('_', REVERSE (vcQueueName)) - 1)) AS INT) = 1031

In this situation, it would be good idea to create a computed column BatchID in the table. In this example we are creating a new table tblQueue_Batch: 

CREATE TABLE tblQueue_Batch (
    ntQueueID BIGINT IDENTITY PRIMARY KEY,
    vcQueueName VARCHAR(500),
    ntBatchID AS  CAST( REVERSE( LEFT( REVERSE( vcQueueName), CHARINDEX('_',REVERSE(vcQueueName)) - 1)) AS INT)
)

INSERT tblQueue_Batch
SELECT '1_' +
    CAST (message_id AS VARCHAR) + '_' +
    CAST (severity AS VARCHAR) + '_' +
    CAST (language_id AS VARCHAR) FROM Sys.messages

CREATE NONCLUSTERED INDEX NC_Batch
ON tblQueue_Batch (ntBatchID)
INCLUDE (vcQueueName)

This sql query 2 is much more efficient than previous one:

Query 2:

SELECT * FROM tblQueue_Batch  WHERE ntBatchID = 1031

Logical read (Query 1): 430
Logical read (Query 2): 44
Batch query cost (Query 1): 91%
Batch query cost (Query 2): 9%

Tips:

If there is any cross relationship among fields or there is a complex expression in a field in a query predicates, it is better to create a computed column and then create a non-clustered index on it.

Scenario 4:

In sql server query optimizer can estimate correct cardinality if sql statement use variable in query predicate only if that variable is parameter of stored procedures or functions. For example:

We are creating two stored procedures:

CREATE PROC uspGetMsg_1 (
    @Severity AS INT
)
AS
BEGIN
    SELECT vcMessage FROM tblMessage
WHERE ntSeverity = @Severity
END

Go

CREATE PROC uspGetMsg_2
AS
BEGIN
    SELECT vcMessage FROM tblMessage
WHERE ntSeverity = 15
END

Query 1:

EXECUTE uspGetMsg_1 15

Query 2:

EXECUTE uspGetMsg_2

From query performance point of view both queries has exactly same execution cost. There are no differences, either we use constant expression or pass it as a parameter.

Now we are editing the stored procedures uspGetMsg_2:

ALTER PROC uspGetMsg_2 (
    @Severity AS INT
)
AS
BEGIN

    SET @Severity = @Severity - 1
    SELECT vcMessage FROM tblMessage
WHERE ntSeverity = @Severity
END

Now consider on the following two sql statements:

Query 1:

EXECUTE uspGetMsg_1 15

Query 2:

EXECUTE uspGetMsg_2 16

If we will check the execution plan of both of the queries:

Execution cost of query 2 is 95% while query 1 is only 5%

1. Actual cardinality: 3102
2. Cardinality (Query 1): 3102
3. Cardinality (Query 2): 69839

So, again query 2 is not able to estimate correct cardinality. If you will check, then you will find query 2 is using carnality of query :

SELECT vcMessage FROM tblMessage WHERE ntSeverity = 16

While we have updated the value varaible @Severity to 15. So ,it should use the cardanility of query:

SELECT vcMessage FROM tblMessage WHERE ntSeverity = 15

But it is not true. So we can say query optimizer estimate the cardinality from statistics on the basis value passed to parameter of a function or a stored procedure not the updated value of the parameter inside the function or stored procedures:

Tips:

If possible, don't update the value of parameters of a function or stored procedure before using in sql statement, instead of this pass the updated value to the parameters.

Scenario 5:

In sql server, we empty the proc cache by using sql statement:

DBCC FREEPROCCACHE

Note: Don't use this query at production server.

Consider on following two sql statements:

Query 1:

EXECUTE uspGetMsg_1 16

Query 2:

EXECUTE uspGetMsg_1 15

If we will check the execution plan we will find:

Query 1:

Actual cardinality: 69839
Estimated cardinality: 69839

Query 2:

Actual cardinality: 3102
Estimated cardinality: 69839

Now empty the proc cache:

DBCC FREEPROCCACHE

Now execute same two queries but in reverse order that is:

Query 1:

EXECUTE uspGetMsg_1 15

Query 2:

EXECUTE uspGetMsg_1 16

If we will check the execution plan we will find:

Query 1:

Actual cardinality: 3102
Estimated cardinality: 3102

Query 2:

Actual cardinality: 69839
Estimated cardinality: 3102

Here we can observe estimated cardinally of query 2 is equal to estimated cardinality of query 1. In sql server creates the statistics of any parameters of stored procedures or functions on the basis of value to pass parameters when it has executed first time. Next time onward it uses same cardinality if statistics, query plan etc doesn't change. Due to wrong estimation of cardinality query optimizer can choose wrong execution plan which may decrease the performance of queries.

We can optimize a sql query by specifying the value in the sql statement which mostly passes the application by using query hints OPTIMIZE FOR. It will override the default behavior that is instead of optimizing the sql query on the basis of what first time values had passed.  For example:

CREATE PROC uspGetMsg(
    @Severity AS INT
)
AS
BEGIN
    SELECT vcMessage FROM tblMessage
    WHERE ntSeverity = @Severity
    OPTION (OPTIMIZE FOR (@Severity = 15))
END

Tips:

Use OPTIMIZE FOR clause when you want to optimize a sql query on the basis of specific parameter value.

Scenario 6:

There are many situations where we need to update the value of the parameter according to some condition inside a stored procedure of a function. For example:

CREATE PROC uspGetMsg(
    @Severity AS INT
)
AS
BEGIN

    IF @Severity IS NULL
         SET @Severity = (SELECT MAX(ntSeverity) FROM tblMessage)
       
    SELECT vcMessage FROM tblMessage WHERE ntSeverity = @Severity
END

Consider a situation when our application has executed the following sql query at the first time:

EXECUTE uspGetMsg NULL

Query optimizer will estimate the cardinality on the basis of:

SELECT vcMessage FROM tblMessage
WHERE ntSeverity = NULL

As we know NULL is never equal to any value. We use IS NULL to compare the null values. So this query will not return any result set. If a query doesn't return any value it cardinality will be one.

Let's check the cardinality in the execution plan:
Actual cardinality: 110
Estimated cardinality: 1

It will affect the performance of this query drastically.  Solution of this problem is, to create two stored procedures. For example:

CREATE PROC uspGetMsg (
    @Severity AS INT
)
AS
BEGIN

    IF @Severity IS NULL
         SET @Severity = (SELECT MAX (ntSeverity) FROM tblMessage)
       
    EXECUTE uspGetMsgNotNull @Severity
END

Go

CREATE PROC uspGetMsgNotNull @Severity
    @Severity AS INT
)
AS
BEGIN  
    SELECT vcMessage FROM tblMessage WHERE ntSeverity = @Severity
END

And from application use only the stored procedure uspGetMsg.

Tips:

If you want to update the value parameter of a stored procedure or a function create a similar procedure or function and execute it form base procedure or function by passing the updated value as a parameter.

Scenario 7:

In sql server, default setting of auto creates and updates statistics is on except asynchronous statistics. It means query optimizer automatically creates and updates the statistics according to requirement. More specifically it auto creates the statistics of table for primary key columns when we create a table, for key columns of clustered and non-clustered index when we create indexes, for query predicates when we execute the query and updates the statistics when threshold limit reach after updation, deletion or insertions of records.

Limitation: Query optimizer can create only single columns statistics and also it cannot create filtered statistics.

So, there are many cases where we can improve the performance of sql statements by executing the user defined statistics. For example:

SELECT vcMessage FROM tblMessage
WHERE ntMessageId < 1000
    AND ntSeverity IN (10, 11, 12, 16)

If we will check the execution plan we will get:

Actual cardanality: 705
Estimated cardanality: 839.486

If we will create following filtered index:

CREATE STATISTICS Stat_Severity
ON tblMessage(ntMessageId)
WHERE ntSeverity IN (10, 11, 12, 16)

Now estimated cardinality of previous query will be: 705.671

So, now query will perform better.

Tips:

Create user defined multi column statistics if query predicates have more than one fields of a table.