Tip General purpose DataTable conversion to/from .xls, .xlsx, .csv, .txt, .ods

Rob Sherratt

Well-known member
Joined
Dec 30, 2012
Messages
64
Location
Corfu. Greece
Programming Experience
10+
I have started using these low-cost software libraries in my application software:

xPortTools Components

In not much more than 15 minutes I was able to create and read .xlsx and .xls files, processing these from/to a very large DataTable.
There is no linkage to any Microsoft libraries such as Microsoft Office, therefore the resulting application is completely portable and independent of Office versions.
The cost is $89 for individual libraries, or $199 for the whole set. I paid $159 for just the Excel import and Excel export libraries, since I had .csv formats sorted already.
The license allows the developer to deploy his/her software and the xporttools run-time to as many users as you wish, and there are no commercial use restrictions.
Also the level of support I have received from xporttools is fantastic, most questions receiving detailed and accurate answers within 5 minutes.
Here is "my code" for creating an .xlsx file from a very large DataTable, over 65 Mbytes in size. It took 1 second to create the .xlsx file. It is blindingly fast!
I say "my code", which is actually untrue, I just copied and pasted most of this from the support web site, where these is excellent documentation.

    Imports VM.xPort
    Imports System.IO

    Private Function Table_To_XLSX(MyDataTable As DataTable, FileName As String) As Boolean

        ' Saves a DataTable as an .xlsx file. Uses the xPortTools library
        ' [url=http://www.xporttools.net]xPortTools Components[/url]

        Try
            Dim xporter As VM.xPort.DS2XL
            Dim headerStyle As VM.xPort.Style
            Dim contentStyle As VM.xPort.Style
            Dim customFont As System.Drawing.Font
            Dim formatStyle As VM.xPort.Style

            xporter = New VM.xPort.DS2XL()

            'Set columns width in 1/256th of the character's width. For example, if column
            'width should be 10 characters then we set width value to 2560. 
            ' Set all column widths to 20 characters.
            SetColumnWidth(xporter.ColWidths, "AllColumnWidths", MyDataTable.TableName, 0, MyDataTable.Columns.Count - 1, 20 * 256)

            'Make header row higher (header's index in DataTable is -1) setting it to 25 points 
            '(value of 1 is 1/20th of a point).
            SetRowHeight(xporter.RowHeights, "Header", MyDataTable.TableName, -1, -1, 60 * 20)

            'Set row height for all other rows to 15 points.
            SetRowHeight(xporter.RowHeights, "AllRows", MyDataTable.TableName, 0, MyDataTable.Rows.Count - 1, 15 * 20)

            'Define style for the header. 
            headerStyle = New VM.xPort.Style("HeaderStyle", MyDataTable.TableName, -1, 0, -1, MyDataTable.Columns.Count - 1)

            'Create font that will be used to format header text.
            customFont = New System.Drawing.Font("Calibri", 11, FontStyle.Bold)
            headerStyle.Font = customFont
            headerStyle.ForeColor = Color.White
            headerStyle.HorizontalAlignment = Style.xpHAlignment.Center
            headerStyle.VerticalAlignment = Style.xpVAlignment.Center
            headerStyle.ReadingDirection = Style.xpReadingDirection.Context
            headerStyle.LeftBorderLine = Style.xpBorderLineStyle.Thin
            headerStyle.RightBorderLine = Style.xpBorderLineStyle.Thin
            headerStyle.TopBorderLine = Style.xpBorderLineStyle.Thin
            headerStyle.BottomBorderLine = Style.xpBorderLineStyle.Thin
            headerStyle.BackgroundColor = Color.FromArgb(0, 36, 64, 98)
            headerStyle.LeftBorderColor = Color.White
            headerStyle.RightBorderColor = Color.White
            headerStyle.TopBorderColor = Color.White
            headerStyle.BottomBorderColor = Color.White
            headerStyle.WrapText = True

            'Add custom style to the collection of styles. If we do not add style, it will not be applied.
            xporter.Styles.Add(headerStyle)

            'Create style for content
            contentStyle = New Style("ContentStyle", MyDataTable.TableName, 0, 0, MyDataTable.Rows.Count - 1, MyDataTable.Columns.Count - 1)

            'Create style for data cells
            customFont = New System.Drawing.Font("Calibri", 11, FontStyle.Regular)
            contentStyle.Font = customFont
            contentStyle.HorizontalAlignment = Style.xpHAlignment.Left
            contentStyle.VerticalAlignment = Style.xpVAlignment.Top
            contentStyle.ReadingDirection = Style.xpReadingDirection.Context
            contentStyle.LeftBorderLine = Style.xpBorderLineStyle.Thin
            contentStyle.RightBorderLine = Style.xpBorderLineStyle.Thin
            contentStyle.TopBorderLine = Style.xpBorderLineStyle.Thin
            contentStyle.BottomBorderLine = Style.xpBorderLineStyle.Thin
            contentStyle.BackgroundColor = Color.FromArgb(0, 220, 230, 241)
            contentStyle.LeftBorderColor = Color.White
            contentStyle.RightBorderColor = Color.White
            contentStyle.TopBorderColor = Color.White
            contentStyle.BottomBorderColor = Color.White
            contentStyle.WrapText = True

            'Add custom style to the collection of styles
            xporter.Styles.Add(contentStyle)

            'Make a clone of the contentStyle style and set only specific properties of custom style. 
            formatStyle = contentStyle.Clone("DateValues", MyDataTable.TableName, 0, _
            MyDataTable.Columns("Application Date").Ordinal, MyDataTable.Rows.Count - 1, _
            MyDataTable.Columns("Application Date").Ordinal)

            'Set format of the date values in specific column. 
            'Refer to documentation about which built-in Excel formats are available. 
            ' Use custom (not built-in) format string.
            formatStyle.FormatIndex = VM.xPort.Style.xpFormat.Custom
            formatStyle.Format = "MMM dd, yyyy"

            'Add custom style to the collection of styles.
            xporter.Styles.Add(formatStyle)

            xporter.Export(MyDataTable, FileName, xpOutputFormat.XLSX, True, True)

            Return True

        Catch ex As Exception
            ' In place of "PrintLine" use your favorite text output method.
            PrintLine(ex.ToString())
            Return False
        End Try

    End Function

    Private Sub SetColumnWidth(ByVal widths As ColWidthCollection, _
                               ByVal styleName As String, _
                               ByVal dataTableName As String, _
                               ByVal startCol As Integer, _
                               ByVal endCol As Integer, _
                               ByVal width As Integer)

        Dim customColumnWidth As ColWidth

        customColumnWidth = New ColWidth(styleName, dataTableName, startCol, endCol, width)
        widths.Add(customColumnWidth)

    End Sub

    Private Sub SetRowHeight(ByVal heights As RowHeightCollection, _
                             ByVal styleName As String, _
                             ByVal dataTableName As String, _
                             ByVal startRow As Integer, _
                             ByVal endRow As Integer, _
                             ByVal height As Integer)

        Dim customRowHeight As RowHeight

        customRowHeight = New RowHeight(styleName, dataTableName, startRow, endRow, height)
        heights.Add(customRowHeight)

    End Sub


I stress that I am not associated with the company xporttools in any way, but I am a "happy customer" and I wanted to
let you know how good this software is. I asked Val Mazur, who is one of xporttools technical experts, if she would contribute
and provide support on this forum, and she is willing if there is sufficient interest from forum members.
 
Last edited:
Just to complete the "solution", here is generic and general purpose code to import any .xlsx file or .xls file of any size, dimension, data type, column headings etc, and produce a corresponding DataTable. It even automatically finds the first Worksheet name, so you are not worried what the first worksheet is called. The code populates a single DataTable; you would have to modify the software if you want to read in a second or third worksheet to another DataTable. This software has been fully tested and debugged with Visual Studio Express 2012. It is based on an example from xPortTools Inc, and then modified to get the first worksheet name automatically and provide generic parameters. Approximately 65 Mbytes of data were imported to a DataTable and displayed in a DataGridView in less than a second.

Imports System
Imports System.IO
Imports System.Net
Imports System.Data
Imports System.Threading
Imports System.Globalization
Imports System.Windows.Forms
Imports System.Text.RegularExpressions
Imports VM.xPort                       
Imports VM.xPort.ExcelClient

    Private Function Excel_To_Table(ByVal MyFileName As String, ByVal Format As String, ByRef MyDataTable As DataTable) As Boolean

        ' Read the first worksheet in an Excel file (long path name = MyFileName) and retrun the contents as MyDataTable
        ' Format is a string, either "xlsx" or "xls" with no dot.
        ' Result is True if successful, False if a problem was encountered.

        Try
            Dim LineCount As Integer = 0
            Dim ProgressCount As Integer = 0
            Dim ProgressStep As Integer = 0
            Dim connection As ExcelConnection
            Dim adapter As ExcelDataAdapter
            Dim command As ExcelCommand
            Dim result As New DataSet
            Dim SchemaDataTable As New DataTable
            Dim FirstSheetName As String

            If Not System.IO.File.Exists(MyFileName) Then
                PrintLine("The file """ & MyFileName & """ does not exist.")
                Return False
            End If

            ' Open the Excel file schema and get the first worksheet name

            GetSpreadsheetNames(MyFileName, Format, SchemaDataTable)

            ' Select the first sheet name to make this generic, at the moment this
            ' is hard coded.

            FirstSheetName = SchemaDataTable.Rows(0).Item("TABLE_NAME").ToString()
            SchemaDataTable.Dispose()

            ' Create connection to the Excel file and specify that first row in a 
            ' spreadsheets is a header that contains column names (HDR=true)

            connection = New ExcelConnection("Data Source=" & MyFileName & ";Format=" & Format & ";HDR=true;")
            connection.Open()

            ' Create an ExcelCommand to specify from which spreadsheets inside of  
            ' workbook to query data

            command = New ExcelCommand(FirstSheetName, connection)
            command.CommandType = CommandType.TableDirect 'It is always TableDirect

            ' Create an ExceldataAdapter to retrieve data into the DataSet

            adapter = New ExcelDataAdapter(command)

            result = New DataSet
            adapter.Fill(result)

            ' Return the first DataTable from the first worksheet

            MyDataTable = result.Tables(0)

            ' Release all the opened resources 

            adapter.Dispose()
            adapter = Nothing
            connection.Close()
            connection.Dispose()
            connection = Nothing
            command.Dispose()
            command = Nothing
            result.Dispose()
            result = Nothing

            Return True

        Catch ex As Exception
            PrintLine(ex.ToString())
            Return False
        End Try

    End Function


    Private Sub GetSpreadsheetNames(ByVal MyFileName As String, ByVal format As String, ByRef MyDataTable As DataTable)

        Dim connection As ExcelConnection

        'Open connection to Excel file

        connection = GetConnection(MyFileName, format, False)

        'Query schema information about available spreadsheets inside of Excel file

        MyDataTable = connection.GetExcelSchemaTable(ExcelConnection.ExcelClientSchemaType.Tables)

        'Release all the opened resources 

        connection.Close()
        connection.Dispose()
        connection = Nothing

    End Sub


    Private Function GetConnection(ByVal filePath As String, ByVal format As String, ByVal hasHeader As Boolean) As ExcelConnection

        'Sample code creates and opens connection to workbook file

        Dim connection As ExcelConnection

        'Open connection to the Excel file using provided connection values 

        connection = New ExcelConnection("Data Source=" & filePath & ";Format=" & format & ";HDR=" & hasHeader.ToString() & ";")
        connection.Open()

        Return connection

    End Function
 
This is an example button click handler that prompts the user for a file in .xls, .xls or .csv format to import into MyDataTable and parses out the file extension etc.
OpenButton is a Windows Forms Button, double clicking on it in Forms Designer will generate the procedure template as below, and you can copy and paste the rest.
MyDataGridView is a Windows Forms DataGridView configured using the Forms Designer with two scroll bars and a header row.
It is absolutely essential that you set the DataGridView property "CausesValidation" to "False" otherwise your software will grind to a halt when frequent data updates are performed with a bound DataTable.
In this example, there are no calls to 3rd party libraries apart from Microsoft .Net in Visual Studio Express 2012 Free Desktop version.

 
    Private Sub Button_Click_Open(sender As Object, e As EventArgs) Handles OpenButton.Click

        Dim MyDocumentsLocation As String = Environment.GetFolderPath(Environment.SpecialFolder.MyDocuments)
        Dim MyOpenFileDialog As New OpenFileDialog()
        Dim MyInputFile As String
        Dim MyDataTable As New DataTable
        Dim MyFileExtension As String

        Try
            MyOpenFileDialog.InitialDirectory = MyDocumentsLocation
            MyOpenFileDialog.Filter = "XLSX files (*.xlsx)|*.xlsx|XLS files (*.xls)|*.xls|CSV files (*.csv)|*.csv"
            MyOpenFileDialog.FilterIndex = 1
            MyOpenFileDialog.RestoreDirectory = True
            MyOpenFileDialog.SupportMultiDottedExtensions = False
            MyOpenFileDialog.Title = "Select an .xlsx, .xls or .csv file for merging with the lower table."

            If MyOpenFileDialog.ShowDialog() <> System.Windows.Forms.DialogResult.OK Then
                PrintLine("... No input file was selected.")
                Exit Sub
            End If

            MyInputFile = MyOpenFileDialog.FileName
            MyFileExtension = Mid(MyInputFile, InStrRev(MyInputFile, ".") + 1)
            Select Case MyFileExtension

                Case "xlsx"
                    If Not Excel_To_Table(MyInputFile, FileExtension, MyDataTable) Then
                        PrintLine("... A problem has been encountered when reading the .xlsx file.")
                        Exit Sub
                    End If

                Case "xls"
                    If Not Excel_To_Table(MyInputFile, FileExtension, MyDataTable) Then
                        PrintLine("... A problem has been encountered when reading the .xls file.")
                        Exit Sub
                    End If

                Case "csv"
                    If Not CSV_To_Table(MyInputFile, MyDataTable) Then
                        PrintLine("... A problem has been encountered when reading the .csv file.")
                        Exit Sub
                    End If

                Case Else
                    PrintLine("... We support only .xlsx, .xls and.csv file import.")
                    Exit Sub
            End Select

            MyDataGridView.DataSource = MyDataTable
            MyDataGridView.Show()
            Application.DoEvents()
        Exit Sub
    End Sub
 
Last edited:
Here is the general purpose Table_To_CSV software, which works with any DataTable with any column headings, of any size and with cell content of unlimited length (tested up to 32,000 characters per cell).
This makes no use whatever of any proprietary library apart from Microsoft .NET.
Elsewhere, my application truncates text in MyDataTable if its length is > 32,000.
If you are going to import a .csv file into Excel then you need to make sure no cell in your DataTable contains more than approx 32,000 characters/ bytes.
When the file is processed into .csv format, additional delimiters will be added, you need to make allowance for that. Excel treats those delimiters as characters "counted" in its cell size limit of 32,767 characters.
If you do not stick within this limit, Excel grinds to a halt, and corrupts the displayed data. It does no error checking or reporting. It's .csv import algorithm fails in these circumstances because it deletes delimiters such as quotes and commas.
This buggy aspect of Excel has existed ever since Excel '95 and is present in Excel 2012 also.

    Private Function Table_To_CSV(SourceTable As DataTable, FilePathName As String, _
                                    Optional HasHeader As Boolean = False) As Boolean

        ' TableToCSV : Saves a DataTable as a .csv file.
        ' Original functionality by Reed Kimble published here:
        ' [url=http://social.msdn.microsoft.com/Forums/en-US/vblanguage/thread/4c376915-f0cd-4411-affb-e825c9b1c524/]Reading and Sorting a .CSV file[/url]
        '
        ' Modified by Rob Sherratt,
        '       1. "hasHeader" modifications to allow for header row in the csv files.
        '       2. Exception handling.
        '       3. Resolved problem with original functionality generating .csv files that could not be parsed later.
        '               Fields containing a line-break, double-quote, and/or commas should be quoted. (If they are not, 
        '               the file will likely be impossible to process correctly). A (double) quote character in a field 
        '               must be represented by two (double) quote characters.
        '       4. Bug found in dimensioning NameArray was adding an extra row.

        Const COMMA As String = ","

        Try
            Dim sb As New System.Text.StringBuilder
            If HasHeader Then

                ' Process the Header row for the .csv file by getting column names

                Dim NameArray(SourceTable.Columns.Count - 1) As Object

                For i As Integer = 0 To SourceTable.Columns.Count - 1
                    NameArray(i) = SourceTable.Columns(i).ColumnName

                    ' Protect against any uninitialized column headings.  This should never
                    ' occur and is indicative of a coding error elsewhere, so generate a 
                    ' warning message for diagnostic reasons.

                    If String.IsNullOrEmpty(NameArray(i).ToString) Then
                        PrintLine("... (WARNING) An uninitialized column heading was found in column (" & i & ").")
                    End If
                Next i

                sb.AppendLine(String.Join(COMMA, Array_To_CSV(NameArray)))
            End If

            For Each SourceRow As DataRow In SourceTable.Rows
                sb.AppendLine(String.Join(COMMA, Array_To_CSV(SourceRow.ItemArray)))
                ProgressBar1.Value = RowCount
                Application.DoEvents()
            Next SourceRow

            System.IO.File.WriteAllText(FilePathName, sb.ToString)

            Return True

        Catch ex As Exception
            PrintLine(ex.ToString())
            Return False
        End Try

    End Function

    Private Function Array_To_CSV(ByRef ObjectArray() As Object) As String()

        ' Processes all rows in StringArray, converting them into records suitable for 
        ' writing to a CSV file. Returns the results in a StringArray() the same size as StringArray()

        Const QUOTE As String = """"
        Const CR As String = Chr(13)
        Const LF As String = Chr(10)
        Const COMMA As String = ","
        Dim StringItem As String
        Dim StringArray(ObjectArray.GetLength(0) - 1) As String
        Dim hasComma As Integer
        Dim hasNewLine As Integer
        Dim hasReturn As Integer

        For n As Integer = 0 To ObjectArray.GetLength(0) - 1

            ' Any Quotes in a record must be double Quoted in the CSV file.

            StringItem = Replace(ObjectArray(n).ToString, QUOTE, QUOTE & QUOTE)

            ' If the record has any of a comma, CR or LF character, then the 
            ' record must be "protected" by wrapping Quotes round it.

            hasComma = InStr(StringItem, COMMA)
            hasNewLine = InStr(StringItem, LF)
            hasReturn = InStr(StringItem, CR)
            If (hasComma > 0) Or (hasNewLine > 0) Or (hasReturn > 0) Then
                StringArray(n) = QUOTE & StringItem & QUOTE
            Else
                StringArray(n) = StringItem
            End If
        Next n
        Return StringArray

    End Function
 
One Example of a PrintLine() implementation. Just in case someone says PrintLine does not compile :)
This assumes MyTextBox is a TextBox designed using Forms Designer, with MultiLine=True and with double scrollbars.

Private Sub PrintLine(ByRef MyText As String)
    MyTextBox.AppendLine(MyText)
    Application.DoEvents()
End Sub
 
Back
Top