Question iTextSharp generating corrupted PDF

Budius

Well-known member
Joined
Aug 6, 2010
Messages
137
Location
UK
Programming Experience
3-5
hi guys,

My application uses iTextSharp to fill up some form fields in a template pdf, convert the fields into static text (flat the layers), depending on some checkbox selections it deletes or not the images from the PDF and show the result.

I use on my computer FoxIt reader and I had no problems.
The application is deployed in a computer running Adobe Reader 9.0 and runs fine if it's not checked to remove the images. But if the application is checked to remove the images, the resulting .PDF opens and Adobe Reader pops an error message stating:

An error exists on this page. Acrobat may not display the page correctly. Please contact the person who created the PDF document to correct the problem.

As it's doing it only if the images are removed, I guess it's safe to assume it's something to do on the way they are been removed, plus the fact that Adobe Reader is gay.

Here is the code to remove the images, it was adapted from this sourceforge archive

VB.NET:
    ''' <summary>
    ''' Remove Images from the PDF file.
    ''' </summary>
    ''' <param name="PageNumber">Page from which to remove the images. If not supplied, it will remove images from all pages</param>
    ''' <remarks></remarks>
    Public Sub RemoveImages(Optional ByVal PageNumber As Integer = 0)

        Dim j As Integer

        If PageNumber = 0 Then
            For j = 1 To pdfReader.NumberOfPages
                RemoveImageFromPage(j)
            Next
        Else
            If PageNumber > 0 And PageNumber <= pdfReader.NumberOfPages Then _
                    RemoveImageFromPage(PageNumber)
        End If

    End Sub
    Private Sub RemoveImageFromPage(ByVal PageNumber As Integer)
        Dim i As Integer

        Dim writer As PdfWriter
        Dim pg As PdfDictionary
        Dim res As PdfDictionary
        Dim xobj As PdfDictionary
        Dim obj As PdfObject
        Dim tg As PdfDictionary
        Dim type As PdfName

        writer = pdfStamper.Writer
        pg = pdfReader.GetPageN(PageNumber)
        res = pdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES))
        xobj = pdfReader.GetPdfObject(res.Get(PdfName.XOBJECT))

        If Not (xobj Is Nothing) Then
            For i = 0 To xobj.Keys().Count - 1
                obj = xobj.Get(xobj.Keys(i))
                If obj.IsIndirect Then
                    tg = pdfReader.GetPdfObject(obj)
                    If Not (tg Is Nothing) Then
                        type = pdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE))
                        If PdfName.IMAGE.Equals(type) Then
                            pdfReader.KillIndirect(obj)
                        End If
                    End If
                End If
            Next i
        End If

    End Sub

any ideas on why Adobe Reader 9.0 would give me this error if FoxIt doesn't?
 
in case anyone needs or it's curious the solution came from here iText - General - KillIndirect functions creates erroneous Pdf

and my final code is:

VB.NET:
    ''' <summary>
    ''' Remove Images from the PDF file.
    ''' </summary>
    ''' <param name="PageNumber">Page from which to remove the images. If not supplied, it will remove images from all pages</param>
    ''' <remarks></remarks>
    Public Sub RemoveImages(Optional ByVal PageNumber As Integer = 0)

        Dim j As Integer

        If PageNumber = 0 Then
            For j = 1 To pdfReader.NumberOfPages
                RemoveImageFromPage(j)
            Next
        Else
            If PageNumber > 0 And PageNumber <= pdfReader.NumberOfPages Then _
                    RemoveImageFromPage(PageNumber)
        End If

    End Sub
    Private Sub RemoveImageFromPage(ByVal PageNumber As Integer)


        ' Remove the "Do" index, whatwever that is
        ' without this bit it generates some stupid error with Adobe Reader
        ' the solution came from this link
        ' http://itext-general.2136553.n4.nabble.com/KillIndirect-functions-creates-erroneous-Pdf-td2529593.html

        Dim ms As MemoryStream = New MemoryStream
        Dim pdfByteContent() As Byte = pdfReader.GetPageContent(PageNumber)
        Dim cp As PdfContentParser = New PdfContentParser(New PRTokeniser(pdfByteContent))

        Dim first As PdfName = Nothing
        While True
            Dim ar As List(Of PdfObject) = cp.Parse(Nothing)
            If ar.Count = 0 Then
                Exit While
            End If
            If "Do".Equals(ar(ar.Count - 1).ToString) Then
                first = CType(ar(0), PdfName)
            Else
                For Each o As PdfObject In ar
                    o.ToPdf(Nothing, ms)
                    ms.WriteByte(CType(Asc(vbLf), Byte))
                Next
            End If
        End While


        If Not first Is Nothing Then

            Dim i As Integer

            Dim writer As PdfWriter
            Dim pg As PdfDictionary
            Dim res As PdfDictionary
            Dim xobj As PdfDictionary
            Dim obj As PdfObject
            Dim tg As PdfDictionary
            Dim type As PdfName

            writer = pdfStamper.Writer
            pg = pdfReader.GetPageN(PageNumber)
            res = pdfReader.GetPdfObject(pg.Get(PdfName.RESOURCES))
            xobj = pdfReader.GetPdfObject(res.Get(PdfName.XOBJECT))

            If Not (xobj Is Nothing) Then
                For i = 0 To xobj.Keys().Count - 1
                    obj = xobj.Get(xobj.Keys(i))
                    If obj.IsIndirect Then
                        tg = pdfReader.GetPdfObject(obj)
                        If Not (tg Is Nothing) Then
                            type = pdfReader.GetPdfObject(tg.Get(PdfName.SUBTYPE))
                            If PdfName.IMAGE.Equals(type) Then
                                pdfReader.KillIndirect(obj)
                            End If
                        End If
                    End If
                Next i
            End If
        End If

        pdfReader.SetPageContent(PageNumber, ms.GetBuffer)

    End Sub
 
Back
Top