' ' PDFNet Copyright (c) 2001-2008 by PDFTron Systems Inc. All Rights Reserved. ' Imports System Imports pdftron Imports pdftron.Common Imports pdftron.Filters Imports pdftron.SDF Imports pdftron.PDF Module Module1 Sub ProcessElements(ByVal reader As ElementReader) Dim element As Element = reader.Next() While Not IsNothing(Element) ' Read page contents If Element.GetType() = Element.Type.e_path Then ' Process path data... Dim data As Double() = Element.GetPathPoints() Dim sz As Integer = element.GetPointCount() Console.WriteLine("Process Element.Type.e_path") ElseIf Element.GetType() = Element.Type.e_text Then ' Process text strings... Dim sz As Integer = Element.GetTextDataSize() Dim data As String = element.GetTextString() Console.WriteLine("Process Element.Type.e_text") ' Console.WriteLine(data) ElseIf element.GetType() = element.Type.e_image Then ' Process images... Console.WriteLine("Process Element.Type.e_image") ElseIf element.GetType() = element.Type.e_form Then ' Process form XObjects reader.FormBegin() Console.WriteLine("Process Element.Type.e_form") ProcessElements(reader) reader.End() End If element = reader.Next() End While End Sub Sub Main() PDFNet.Initialize() PDFNet.SetResourcesPath("../../../../resources") ' Relative path to the folder containing test files. Dim input_path As String = "../../../TestFiles/" ' Dim output_path As String = "../../../TestFiles/Output/" Console.WriteLine("-------------------------------------------------") Console.WriteLine("Extract text data from all pages in the document.") ' Open the test file Console.WriteLine("Opening the input file...") Dim doc As PDFDoc = New PDFDoc(input_path + "newsletter.pdf") doc.InitSecurityHandler() Dim itr As PageIterator = doc.GetPageIterator() Dim page_reader As ElementReader = New ElementReader While itr.HasNext() ' Read every page Console.WriteLine("Page {0:d} ----------------------------------------", _ itr.GetPageNumber()) page_reader.Begin(itr.Current()) ProcessElements(page_reader) page_reader.End() itr.Next() End While doc.Close() Console.WriteLine("Done.") PDFNet.Terminate() End Sub End Module