Some test text!

< Windows samples

Search PDF Files for Text in VB

Sample VB code for using PDFTron SDK to search text on PDF pages using regular expressions. The TextSearch utility class builds on functionality available in TextExtractor to simplify most common search operations. Learn more about our PDF Indexed Search Library.

Step 1: Get your free trial license key, or sign in

Start Trial
Sign in

Step 2: Add the code:

'
' Copyright (c) 2001-2018 by PDFTron Systems Inc. All Rights Reserved.
'

Imports System
Imports pdftron
Imports pdftron.Common
Imports pdftron.Filters
Imports pdftron.SDF
Imports pdftron.PDF

Module TextSearchTestVB
    Dim pdfNetLoader As PDFNetLoader
    Sub New()
        pdfNetLoader = pdftron.PDFNetLoader.Instance()
    End Sub

    Sub Main()
        PDFNet.Initialize()
        Dim input_path As String = "../../../../TestFiles/"

        Try

            Using doc As PDFDoc = New PDFDoc(input_path & "credit card numbers.pdf")
                doc.InitSecurityHandler()
                Dim page_num As Int32 = 0
                Dim result_str As String = "", ambient_string As String = ""
                Dim hlts As Highlights = New Highlights()
                Dim txt_search As TextSearch = New TextSearch()
                Dim mode As Int32 = CInt((TextSearch.SearchMode.e_whole_word Or TextSearch.SearchMode.e_page_stop Or TextSearch.SearchMode.e_highlight))
                Dim pattern As String = "joHn sMiTh"
                txt_search.Begin(doc, pattern, mode, -1, -1)
                Dim step_ As Integer = 0

                While True
                    Dim code As TextSearch.ResultCode = txt_search.Run(page_num, result_str, ambient_string, hlts)

                    If code = TextSearch.ResultCode.e_found Then

                        If step_ = 0 Then
                            Console.WriteLine(result_str & "'s credit card number is: ")
                            mode = txt_search.GetMode()
                            mode = mode Or CInt((TextSearch.SearchMode.e_reg_expression Or TextSearch.SearchMode.e_highlight))
                            txt_search.SetMode(mode)
                            pattern = "\d{4}-\d{4}-\d{4}-\d{4}"
                            txt_search.SetPattern(pattern)
                            step_ += 1
                        ElseIf step_ = 1 Then
                            Console.WriteLine("  " & result_str)
                            hlts.Begin(doc)

                            While hlts.HasNext()
                                Console.WriteLine("The current highlight is from page: " & hlts.GetCurrentPageNumber())
                                hlts.Next()
                            End While

                            pattern = "\d{4}-\d{6}-\d{5}"
                            txt_search.SetPattern(pattern)
                            step_ += 1
                        ElseIf step_ = 2 Then
                            Console.WriteLine(vbLf & "There is an AMEX card number:" & vbLf & "  " & result_str)
                            mode = txt_search.GetMode()
                            mode = mode Or CInt((TextSearch.SearchMode.e_search_up))
                            txt_search.SetMode(mode)
                            pattern = "[A-z]++ [A-z]++"
                            txt_search.SetPattern(pattern)
                            step_ += 1
                        ElseIf step_ = 3 Then
                            Console.WriteLine("Is the owner's name:" & vbLf & "  " & result_str & "?")
                            hlts.Begin(doc)
                            While hlts.HasNext()
                                Dim cur_page As Page = doc.GetPage(hlts.GetCurrentPageNumber())
                                Dim quads As Double() = hlts.GetCurrentQuads()
                                Dim quad_count As Integer = quads.Length / 8

                                For i As Integer = 0 To quad_count - 1
                                    Dim offset As Integer = 8 * i
                                    Dim x1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
                                    Dim x2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
                                    Dim y1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
                                    Dim y2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
                                    Dim hyper_link As pdftron.PDF.Annots.Link = pdftron.PDF.Annots.Link.Create(doc, New Rect(x1, y1, x2, y2), pdftron.PDF.Action.CreateURI(doc, "http://www.pdftron.com"))
                                    hyper_link.RefreshAppearance()
                                    cur_page.AnnotPushBack(hyper_link)
                                Next

                                hlts.Next()
                            End While

                            Dim output_path As String = "../../../../TestFiles/Output/"
                            doc.Save(output_path & "credit card numbers_linked.pdf", SDFDoc.SaveOptions.e_linearized)
                            Exit While
                        End If
                    ElseIf code = TextSearch.ResultCode.e_page Then
                    Else
                        Exit While
                    End If
                End While
            End Using
        Catch e As PDFNetException
            Console.WriteLine(e.Message)
        End Try
    End Sub
End Module