Some test text!

menu

Search PDF files for text in VB

Switch language

chevron_right
Switch language
C#
C# (.NET Core)
C# (UWP)
VB
C# (Xamarin)

Sample VB code for using PDFTron SDK to search text on PDF pages using regular expressions. The TextSearch utility class builds on functionality available in TextExtractor to simplify most common search operations. Learn more about our VB PDF Library and PDF Indexed Search Library.

Get StartedSamplesDownload

To run this sample, get started with a free trial of PDFTron SDK.

'
' Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
'

Imports System
Imports pdftron
Imports pdftron.Common
Imports pdftron.Filters
Imports pdftron.SDF
Imports pdftron.PDF

Module TextSearchTestVB
    Dim pdfNetLoader As PDFNetLoader
    Sub New()
        pdfNetLoader = pdftron.PDFNetLoader.Instance()
    End Sub

    Sub Main()
        PDFNet.Initialize()
        Dim input_path As String = "../../../../TestFiles/"

        Try

            Using doc As PDFDoc = New PDFDoc(input_path & "credit card numbers.pdf")
                doc.InitSecurityHandler()
                Dim page_num As Int32 = 0
                Dim result_str As String = "", ambient_string As String = ""
                Dim hlts As Highlights = New Highlights()
                Dim txt_search As TextSearch = New TextSearch()
                Dim mode As Int32 = CInt((TextSearch.SearchMode.e_whole_word Or TextSearch.SearchMode.e_page_stop Or TextSearch.SearchMode.e_highlight))
                Dim pattern As String = "joHn sMiTh"
                txt_search.Begin(doc, pattern, mode, -1, -1)
                Dim step_ As Integer = 0

                While True
                    Dim code As TextSearch.ResultCode = txt_search.Run(page_num, result_str, ambient_string, hlts)

                    If code = TextSearch.ResultCode.e_found Then

                        If step_ = 0 Then
                            Console.WriteLine(result_str & "'s credit card number is: ")
                            mode = txt_search.GetMode()
                            mode = mode Or CInt((TextSearch.SearchMode.e_reg_expression Or TextSearch.SearchMode.e_highlight))
                            txt_search.SetMode(mode)
                            pattern = "\d{4}-\d{4}-\d{4}-\d{4}"
                            txt_search.SetPattern(pattern)
                            step_ += 1
                        ElseIf step_ = 1 Then
                            Console.WriteLine("  " & result_str)
                            hlts.Begin(doc)

                            While hlts.HasNext()
                                Console.WriteLine("The current highlight is from page: " & hlts.GetCurrentPageNumber())
                                hlts.Next()
                            End While

                            pattern = "\d{4}-\d{6}-\d{5}"
                            txt_search.SetPattern(pattern)
                            step_ += 1
                        ElseIf step_ = 2 Then
                            Console.WriteLine(vbLf & "There is an AMEX card number:" & vbLf & "  " & result_str)
                            mode = txt_search.GetMode()
                            mode = mode Or CInt((TextSearch.SearchMode.e_search_up))
                            txt_search.SetMode(mode)
                            pattern = "[A-z]++ [A-z]++"
                            txt_search.SetPattern(pattern)
                            step_ += 1
                        ElseIf step_ = 3 Then
                            Console.WriteLine("Is the owner's name:" & vbLf & "  " & result_str & "?")
                            hlts.Begin(doc)
                            While hlts.HasNext()
                                Dim cur_page As Page = doc.GetPage(hlts.GetCurrentPageNumber())
                                Dim quads As Double() = hlts.GetCurrentQuads()
                                Dim quad_count As Integer = quads.Length / 8

                                For i As Integer = 0 To quad_count - 1
                                    Dim offset As Integer = 8 * i
                                    Dim x1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
                                    Dim x2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
                                    Dim y1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
                                    Dim y2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
                                    Dim hyper_link As pdftron.PDF.Annots.Link = pdftron.PDF.Annots.Link.Create(doc, New Rect(x1, y1, x2, y2), pdftron.PDF.Action.CreateURI(doc, "http://www.pdftron.com"))
                                    hyper_link.RefreshAppearance()
                                    cur_page.AnnotPushBack(hyper_link)
                                Next

                                hlts.Next()
                            End While

                            Dim output_path As String = "../../../../TestFiles/Output/"
                            doc.Save(output_path & "credit card numbers_linked.pdf", SDFDoc.SaveOptions.e_linearized)
                            Exit While
                        End If
                    ElseIf code = TextSearch.ResultCode.e_page Then
                    Else
                        Exit While
                    End If
                End While
            End Using
        Catch e As PDFNetException
            Console.WriteLine(e.Message)
        End Try
    End Sub
End Module
close

Free Trial

Get unlimited trial usage of PDFTron SDK to bring accurate, reliable, and fast document processing capabilities to any application or workflow.

Select a platform to get started with your free trial.

Unlimited usage. No email address required.