Process multiple files?

jigax

Active member
Joined
Aug 17, 2006
Messages
43
Programming Experience
Beginner
Hello everyone I made a simple application which converts a PDF to csv. I Need some help on making some changes to have the app read multiple PDF's and convert into one CSV.

Heres my code.
VB.NET:
Imports System.IO.StreamWriter
Imports System.IO

Public Class Form1
    Private Sub Form1_Load(ByVal sender As Object, ByVal e As System.EventArgs) Handles Me.Load
        btn1.Text = "Save"
        btn2.Text = "..."
        btn3.Text = "..."
        lbl1.Text = "Input File"
        lbl2.Text = "Output File"
        Me.Text = "Francis Peña - PDF to CSV"
        OpenFileDialog1.FileName = ""
        Dim fi As New FileInfo("c:\pdf2csv\temp.txt")
        fi.Delete()
    End Sub

    Private Sub btn1_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles btn1.Click
        Try

            Dim oWrite As System.IO.StreamWriter
            oWrite = IO.File.CreateText("C:\pdf2csv\temp.txt")
            Dim doc As org.pdfbox.pdmodel.PDDocument = org.pdfbox.pdmodel.PDDocument.load(OpenFileDialog1.FileName.ToString)
            Dim stripper As New org.pdfbox.util.PDFTextStripper
            oWrite.Write(stripper.getText(doc))
            oWrite.Close()

            'Using reader As New IO.StreamReader(OpenFileDialog1.FileName.ToString)
            Using reader As New IO.StreamReader("C:\pdf2csv\temp.txt")
                Using writer As New IO.StreamWriter(SaveFileDialog1.FileName.ToString)

                    Dim line As String


                    writer.Write("SO Number,")
                    writer.Write("Requested For,")
                    'writer.Write("Planned Start,")
                    writer.Write("Address,")
                    writer.Write("City,")
                    writer.Write("State Zip,")
                    writer.Write("PhoneNumber,")
                    writer.Write("Tech ID,")
                    writer.Write("Brief Desc,")
                    writer.Write("WO ID,")
                    writer.Write("CustLastName,")
                    writer.Write("CustFirstName,")
                    writer.Write("WOType,")
                    writer.Write("SR,")
                    writer.Write("OLI01,")
                    writer.Write("OLI02,")
                    writer.Write("OLI03,")
                    writer.Write("OLI04,")
                    writer.Write("OLI05,")
                    writer.Write("OLI06,")
                    writer.Write("OLI07,")
                    writer.Write("OLI08,")
                    writer.Write("OLI09,")
                    writer.Write("OLI10,")
                    writer.Write("OLI11,")
                    writer.Write("OLI12,")
                    writer.Write("OLI13,")
                    writer.Write("OLI14,")
                    writer.Write("OLI15,")
                    writer.WriteLine()
                    line = reader.ReadLine()
                    line = reader.ReadLine()
                    line = reader.ReadLine()
                    line = reader.ReadLine()


                    Do Until reader.EndOfStream
                        Try
                            line = reader.ReadLine
                            line = Replace(line, "Brief Desc: Tech Assigned: ", "")
                            line = Replace(line, "ORDER TYPE: Sales Order ORDER SUB TYPE: ", "")

                            If line.StartsWith("800 xxx-xxxx") Then
                                line = reader.ReadLine
                                line = reader.ReadLine
                            ElseIf line.StartsWith("Planned Start:") Then
                                line = reader.ReadLine
                            ElseIf line.StartsWith("Work Requested") Then
                                line = reader.ReadLine
                                line = reader.ReadLine
                            ElseIf line.StartsWith("System:") Then
                                line = reader.ReadLine
                                line = reader.ReadLine
                                line = reader.ReadLine
                                line = reader.ReadLine
                            ElseIf line.StartsWith("HC") Then
                                writer.Write(Trim(Strings.Left(line, 10)))
                                writer.Write(",")
                                writer.Write(Trim(Strings.Right(line, 13)))
                                line = reader.ReadLine
                                writer.Write(",")
                            ElseIf line.StartsWith("Work Description") Then

                                Do Until line.ToString = ("Qty ItemID Description")
                                    line = reader.ReadLine
                                Loop
                                line = reader.ReadLine
                            End If
                        Catch ex As Exception
                        End Try

                        Try
                            If line.StartsWith("Addtional") Then
                                Try
                                    Do Until line.ToString = ("Service Order")
                                        line = reader.ReadLine
                                    Loop
                                Catch ex As Exception

                                End Try
                            End If
                        Catch ex As Exception
                        End Try
                        line = Replace(line, " Status: OpenType:", "")
                        line = Replace(line, " Status: aMust DoType:", "")
                        line = Replace(line, ": Directv Call Ctr Cancelled Type: ", "")
                        line = Replace(line, "Qty ItemID Description ", "")



                        'line = Replace(line, "1 ", "")
                        'Write the field value.



                        Try
                            If line.StartsWith("Service Order") Then

                            ElseIf line.StartsWith("1 ") Then
                                writer.Write(Trim(line.Substring(1, 8)))
                            Else
                                writer.Write(Trim(line))
                            End If
                        Catch ex As Exception
                        End Try

                        Try
                            If line.StartsWith("Service Order") Then

                                'Write a line terminator.
                                line = reader.ReadLine
                                line = reader.ReadLine
                                line = reader.ReadLine
                                writer.WriteLine()
                            Else

                                'Write a field terminator.
                                writer.Write(",")
                            End If
                        Catch ex As Exception
                        End Try
                    Loop
                End Using
            End Using

            MsgBox("Done")
        Catch ex As Exception
        End Try
    End Sub

    Private Sub Btn2_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles btn2.Click
        OpenFileDialog1.Filter = "pdf files (*.pdf)|*.pdf|All files (*.*)|*.*"
        OpenFileDialog1.ShowDialog()
        txt1.Text = OpenFileDialog1.FileName.ToString
    End Sub

    Private Sub btn3_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles btn3.Click
        SaveFileDialog1.Filter = "csv files (*.csv)|*.csv"
        SaveFileDialog1.ShowDialog()
        txt2.Text = SaveFileDialog1.FileName.ToString
    End Sub
 
You should be able to set the OpenFileDialog's multiselect to true and then loop throug the file names collection and add their contents to your temp.txt document.

I don't have PDFBox installed but this should be close.

VB.NET:
		Dim write As New IO.StreamWriter("C:\Temp\Temp.txt", False)

		Dim doc As org.pdfbox.pdmodel.PDDocument = Nothing
		Dim stripper As org.pdfbox.util.PDFTextStripper = Nothing

		For Each fileName As String In Me.OpenFileDialog1.FileNames
			doc = org.pdfbox.pdmodel.PDDocument.load(fileName)
			stripper = New org.pdfbox.util.PDFTextStripper
			write.Write(stripper.getText(doc))
		Next
		write.Close()
 
Back
Top