Case
A client wants to export data to XML files, but SSIS has no XML destination. A script component could help him but he doesn't want to create a new script for each file/source just because the file/source lay-out is different.
Starting point
Col1 | Col2 | Col3 |
test | Test | TEST |
bla | Bla | BLA |
xxx | Xxx | XXX |
Desired situation
<?xml version="1.0" ?>
<ROOT>
<ROW>
<col1>test
</col1>
<col2>Test
</col2>
<col3>TEST
</col3>
</ROW>
<ROW>
<col1>bla
</col1>
<col2>Bla
</col2>
<col3>BLA
</col3>
</ROW>
<ROW>
<col1>xxx
</col1>
<col2>Xxx
</col2>
<col3>XXX
</col3>
</ROW>
</ROOT>
Solution
There is a request for an XML destination on the
Microsoft site which you can support.
There is an open source component:
XML Destination and there are some
third party components, but you can do it yourself with a Script component.
Let's create a flexible XML destination that is the same for each file/source. This could be resolved by reflection (
again).
1) Source
Create a random source (flatfile / OLE DB).
2) Script component
Drag a script component to your data flow and select
Destination. Connect it to the Source component.
|
Script component destination |
3) Input columns
Edit the Script components and select all input columns (or just the ones you need)
|
Select all columns as input |
4) Destination
Create a new connection named
xmldocument in the Connection Managers tab. The file type should be FILE and create a new file.
|
FILE |
|
Create file |
5) The Script
Now the script (C# and VB.net for this example). SSIS creates 3 methods which we will adjust and we need a fourth method to remove forbidden xml characters such as < and >.
// C# Code
// Flexible script that creates a XML document
// using the SSIS columns as nodes.
using System;
using System.Data;
using System.Reflection; // Added
using System.Xml; // Added
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;
[Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute]
public class ScriptMain : UserComponent
{
// Variable that contains the XML document
XmlTextWriter textWriter;
// Start of XML document
public override void PreExecute()
{
base.PreExecute();
// Create a new XML document and use the filepath in the connection as XML-file
textWriter = new XmlTextWriter(this.Connections.xmldocument.ConnectionString.ToString(), null);
// Start writing the XML document:
textWriter.WriteStartDocument();
// Create root element
textWriter.WriteStartElement("ROOT");
}
// Close of XML document
public override void PostExecute()
{
base.PostExecute();
// Close root element:
textWriter.WriteEndElement();
// Stop writing the XML document
textWriter.WriteEndDocument();
// Close document
textWriter.Close();
}
// Method that will be started for each record in you dataflow
public override void Input0_ProcessInputRow(Input0Buffer Row)
{
// Row type to get the value of a column
Type rowType = Row.GetType();
String columnValue = "";
// Create row element:
textWriter.WriteStartElement("ROW");
// Loop through all columns and create a column element: valuevalue
foreach (IDTSInputColumn100 column in this.ComponentMetaData.InputCollection[0].InputColumnCollection)
{
// Use the SSIS column name as element name:
textWriter.WriteStartElement(column.Name);
// Get column value, will fail if null
try
{
columnValue = rowType.GetProperty(column.Name).GetValue(Row, null).ToString();
}
catch
{
// Default value for null values: "null", "" or null
columnValue = "null";
}
finally
{
textWriter.WriteString(removeForbiddenXmlChars(columnValue));
}
// Close column element:
textWriter.WriteEndElement();
}
// Close row element:
textWriter.WriteEndElement();
// Output the number of processed rows. 103 = RowsWritten
this.ComponentMetaData.IncrementPipelinePerfCounter(103, 1);
}
// Remove forbidden chars that could damage your XML document
private string removeForbiddenXmlChars(string columnValue)
{
return columnValue.Replace("&", "&").Replace("<", "<").Replace(">", ">");
}
}
And the VB.net code
' VB.Net code
' Flexible script that creates a XML document
' using the SSIS columns as nodes.
Imports System
Imports System.Data
Imports System.Reflection ' Added
Imports System.Xml ' Added
Imports System.Math
Imports Microsoft.SqlServer.Dts.Pipeline.Wrapper
Imports Microsoft.SqlServer.Dts.Runtime.Wrapper
<microsoft.sqlserver.dts.pipeline.ssisscriptcomponententrypointattribute> _
<clscompliant(false)> _
Public Class ScriptMain
Inherits UserComponent
Dim textWriter As XmlTextWriter
Public Overrides Sub PreExecute()
MyBase.PreExecute()
' Create a new XML document and use the filepath in the connection as XML-file
textWriter = New XmlTextWriter(Me.Connections.xmldocument.ConnectionString.ToString(), System.Text.Encoding.Default)
'Start writing the XML document:
textWriter.WriteStartDocument()
'Create root element
textWriter.WriteStartElement("ROOT")
End Sub
Public Overrides Sub PostExecute()
MyBase.PostExecute()
'Close root element:
textWriter.WriteEndElement()
'Stop writing the XML document
textWriter.WriteEndDocument()
'Close document
textWriter.Close()
End Sub
Public Overrides Sub Input0_ProcessInputRow(ByVal Row As Input0Buffer)
' Row type to get the value of a column
Dim rowType As Type = Row.GetType()
Dim columnValue As String = ""
' Create row element:
textWriter.WriteStartElement("ROW")
' Loop through all columns and create a column element: valuevalue
For Each column As IDTSInputColumn100 In Me.ComponentMetaData.InputCollection(0).InputColumnCollection
' Use the SSIS column name as element name:
textWriter.WriteStartElement(column.Name)
' Get column value, will fail if null
Try
columnValue = rowType.GetProperty(column.Name).GetValue(Row, Nothing).ToString()
Catch
' Default value for null values: "null", "" or null
columnValue = "null"
Finally
textWriter.WriteString(removeForbiddenXmlChars(columnValue))
End Try
' Close column element:
textWriter.WriteEndElement()
Next
' Close row element:
textWriter.WriteEndElement()
' Output the number of processed rows. 103 = RowsWritten
Me.ComponentMetaData.IncrementPipelinePerfCounter(103, 1)
End Sub
'Remove forbidden chars that could damage your XML document
Private Function removeForbiddenXmlChars(ByVal columnValue As String) As String
removeForbiddenXmlChars = columnValue.Replace("&", "&").Replace("<", "<").Replace(">", ">")
End Function
End Class
Note: there are probably some more forbidden XML chars like ' (') and " (") that you need to replace.
UPDATE:
I have a newer version which doesn't use the ugly try catch for null values, but you have to
change the .Net Framework version to 3.5.
// C# Code
// Flexible script that creates a XML document
// using the SSIS columns as nodes.
using System;
using System.Data;
using Microsoft.SqlServer.Dts.Pipeline.Wrapper;
using Microsoft.SqlServer.Dts.Runtime.Wrapper;
using System.Reflection; // Added
using System.Xml; // Added
using System.Linq; // Added
[Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute]
public class ScriptMain : UserComponent
{
// Variable that contains the XML document
XmlTextWriter textWriter;
// Start of XML document
public override void PreExecute()
{
base.PreExecute();
// Create a new XML document and use the filepath in the connection as XML-file
textWriter = new XmlTextWriter(this.Connections.xmldocument.ConnectionString.ToString(), null);
// Start writing the XML document:
textWriter.WriteStartDocument();
// Create root element
textWriter.WriteStartElement("ROOT");
}
// Close of XML document
public override void PostExecute()
{
base.PostExecute();
// Close root element:
textWriter.WriteEndElement();
// Stop writing the XML document
textWriter.WriteEndDocument();
// Close document
textWriter.Close();
}
// Method that will be started for each record in you dataflow
public override void Input0_ProcessInputRow(Input0Buffer Row)
{
// Row type to get the value of a column
var properties = Row.GetType().GetProperties(BindingFlags.Instance | BindingFlags.Public);;
String columnValue = "";
// Create row element:
textWriter.WriteStartElement("ROW");
// Loop through all columns and create a column element: valuevalue
foreach (var property in properties.Where(p => !p.Name.EndsWith("_IsNull", StringComparison.OrdinalIgnoreCase)))
{
// Use the SSIS column name as element name:
textWriter.WriteStartElement(property.Name);
// Get column value, but that method will fail if empty
// so first check if column value is null
if (!CheckNull(Row, properties, property.Name))
{
// Get column value
columnValue = property.GetValue(Row, null).ToString();
}
else
{
// Default value for null values: "null", "" or null
columnValue = "";
}
// write column value, but first remove forbidden chars
textWriter.WriteString(removeForbiddenXmlChars(columnValue));
// Close column element:
textWriter.WriteEndElement();
}
// Close row element:
textWriter.WriteEndElement();
// Output the number of processed rows. 103 = RowsWritten
this.ComponentMetaData.IncrementPipelinePerfCounter(103, 1);
}
// Remove forbidden chars that could damage your XML document
private string removeForbiddenXmlChars(string columnValue)
{
return columnValue.Replace("&", "&").Replace("<", "<").Replace(">", ">");
}
// Method that checks wether a column value is empty.
// It uses the standard boolean [columnName]_IsNull.
private bool CheckNull(Input0Buffer Row, PropertyInfo[] properties, string propertyName)
{
var property = properties.Where(p => p.Name.Equals(propertyName + "_IsNull", StringComparison.OrdinalIgnoreCase)).FirstOrDefault();
if (property != null)
{
if ((bool)property.GetValue(Row, null))
{
return true;
}
}
return false;
}
}
And the VB.net code
' VB.Net code
' Flexible script that creates a XML document
' using the SSIS columns as nodes.
Imports System
Imports System.Data
Imports System.Math
Imports Microsoft.SqlServer.Dts.Pipeline.Wrapper
Imports Microsoft.SqlServer.Dts.Runtime.Wrapper
Imports System.Reflection ' Added
Imports System.Xml ' Added
Imports System.Linq ' Added
<Microsoft.SqlServer.Dts.Pipeline.SSISScriptComponentEntryPointAttribute> _
<CLSCompliant(False)> _
Public Class ScriptMain
Inherits UserComponent
' Variable that contains the XML document
Dim textWriter As XmlTextWriter
' Start of XML document
Public Overrides Sub PreExecute()
MyBase.PreExecute()
' Create a new XML document and use the filepath in the connection as XML-file
textWriter = New XmlTextWriter(Me.Connections.xmldocument.ConnectionString.ToString(), System.Text.Encoding.Default)
'Start writing the XML document:
textWriter.WriteStartDocument()
'Create root element
textWriter.WriteStartElement("ROOT")
End Sub
' Close of XML document
Public Overrides Sub PostExecute()
MyBase.PostExecute()
'Close root element:
textWriter.WriteEndElement()
'Stop writing the XML document
textWriter.WriteEndDocument()
'Close document
textWriter.Close()
End Sub
' Method that will be started for each record in you dataflow
Public Overrides Sub Input0_ProcessInputRow(ByVal Row As Input0Buffer)
' Row type to get the value of a column
Dim properties = Row.[GetType]().GetProperties(BindingFlags.Instance Or BindingFlags.[Public])
Dim columnValue As [String] = ""
' Create row element:
textWriter.WriteStartElement("ROW")
' Loop through all columns and create a column element: valuevalue
For Each [property] As Object In properties.Where(Function(p) Not p.Name.EndsWith("_IsNull", StringComparison.OrdinalIgnoreCase))
' Use the SSIS column name as element name:
textWriter.WriteStartElement([property].Name)
' Get column value, but that method will fail if empty
' so first check if column value is null
If Not CheckNull(Row, properties, [property].Name) Then
' Get column value
columnValue = [property].GetValue(Row, Nothing).ToString()
Else
' Default value for null values: "null", "" or null
columnValue = ""
End If
' write column value, but first remove forbidden chars
textWriter.WriteString(removeForbiddenXmlChars(columnValue))
' Close column element:
textWriter.WriteEndElement()
Next
' Close row element:
textWriter.WriteEndElement()
' Output the number of processed rows. 103 = RowsWritten
Me.ComponentMetaData.IncrementPipelinePerfCounter(103, 1)
End Sub
'Remove forbidden chars that could damage your XML document
Private Function removeForbiddenXmlChars(ByVal columnValue As String) As String
removeForbiddenXmlChars = columnValue.Replace("&", "&").Replace("<", "<").Replace(">", ">")
End Function
' Method that checks wether a column value is empty.
' It uses the standard boolean [columnName]_IsNull.
Private Function CheckNull(ByVal Row As Input0Buffer, ByVal properties As PropertyInfo(), ByVal propertyName As String) As Boolean
Dim [property] = properties.Where(Function(p) p.Name.Equals(propertyName & "_IsNull", StringComparison.OrdinalIgnoreCase)).FirstOrDefault()
If [property] IsNot Nothing Then
If CBool([property].GetValue(Row, Nothing)) Then
Return True
End If
End If
Return False
End Function
End Class