Some test text!

Search
Hamburger Icon

Search PDF files for text in VB

More languages

More languages
JavaScript
Java (Android)
C++
C#
C# (.NET Core)
Go
Java
Kotlin
Obj-C
JS (Node.js)
PHP
Python
Ruby
Swift
C# (UWP)
VB
C# (Xamarin)

Sample VB code for using Apryse SDK to search text on PDF pages using regular expressions. The TextSearch utility class builds on functionality available in TextExtractor to simplify most common search operations. Learn more about our VB PDF Library and PDF Indexed Search Library.

Get Started Samples Download

To run this sample, get started with a free trial of Apryse SDK.

'
' Copyright (c) 2001-2023 by Apryse Software Inc. All Rights Reserved.
'

Imports System
Imports pdftron
Imports pdftron.Common
Imports pdftron.Filters
Imports pdftron.SDF
Imports pdftron.PDF

Module TextSearchTestVB
    Dim pdfNetLoader As PDFNetLoader
    Sub New()
        pdfNetLoader = pdftron.PDFNetLoader.Instance()
    End Sub

    Sub Main()
        PDFNet.Initialize(PDFTronLicense.Key)
        Dim input_path As String = "../../../../TestFiles/"

        Try

            Using doc As PDFDoc = New PDFDoc(input_path & "credit card numbers.pdf")
                doc.InitSecurityHandler()
                Dim page_num As Int32 = 0
                Dim result_str As String = "", ambient_string As String = ""
                Dim hlts As Highlights = New Highlights()
                Dim txt_search As TextSearch = New TextSearch()
                Dim mode As Int32 = CInt((TextSearch.SearchMode.e_whole_word Or TextSearch.SearchMode.e_page_stop Or TextSearch.SearchMode.e_highlight))
                Dim pattern As String = "joHn sMiTh"
                txt_search.Begin(doc, pattern, mode, -1, -1)
                Dim step_ As Integer = 0

                While True
                    Dim code As TextSearch.ResultCode = txt_search.Run(page_num, result_str, ambient_string, hlts)

                    If code = TextSearch.ResultCode.e_found Then

                        If step_ = 0 Then
                            Console.WriteLine(result_str & "'s credit card number is: ")
                            mode = txt_search.GetMode()
                            mode = mode Or CInt((TextSearch.SearchMode.e_reg_expression Or TextSearch.SearchMode.e_highlight))
                            txt_search.SetMode(mode)
                            pattern = "\d{4}-\d{4}-\d{4}-\d{4}"
                            txt_search.SetPattern(pattern)
                            step_ += 1
                        ElseIf step_ = 1 Then
                            Console.WriteLine("  " & result_str)
                            hlts.Begin(doc)

                            While hlts.HasNext()
                                Console.WriteLine("The current highlight is from page: " & hlts.GetCurrentPageNumber())
                                hlts.Next()
                            End While

                            pattern = "\d{4}-\d{6}-\d{5}"
                            txt_search.SetPattern(pattern)
                            step_ += 1
                        ElseIf step_ = 2 Then
                            Console.WriteLine(vbLf & "There is an AMEX card number:" & vbLf & "  " & result_str)
                            mode = txt_search.GetMode()
                            mode = mode Or CInt((TextSearch.SearchMode.e_search_up))
                            txt_search.SetMode(mode)
                            pattern = "[A-z]++ [A-z]++"
                            txt_search.SetPattern(pattern)
                            step_ += 1
                        ElseIf step_ = 3 Then
                            Console.WriteLine("Is the owner's name:" & vbLf & "  " & result_str & "?")
                            hlts.Begin(doc)
                            While hlts.HasNext()
                                Dim cur_page As Page = doc.GetPage(hlts.GetCurrentPageNumber())
                                Dim quads As Double() = hlts.GetCurrentQuads()
                                Dim quad_count As Integer = quads.Length / 8

                                For i As Integer = 0 To quad_count - 1
                                    Dim offset As Integer = 8 * i
                                    Dim x1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
                                    Dim x2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 0), quads(offset + 2)), quads(offset + 4)), quads(offset + 6))
                                    Dim y1 As Double = Math.Min(Math.Min(Math.Min(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
                                    Dim y2 As Double = Math.Max(Math.Max(Math.Max(quads(offset + 1), quads(offset + 3)), quads(offset + 5)), quads(offset + 7))
                                    Dim hyper_link As pdftron.PDF.Annots.Link = pdftron.PDF.Annots.Link.Create(doc, New Rect(x1, y1, x2, y2), pdftron.PDF.Action.CreateURI(doc, "http://www.pdftron.com"))
                                    hyper_link.RefreshAppearance()
                                    cur_page.AnnotPushBack(hyper_link)
                                Next

                                hlts.Next()
                            End While

                            Dim output_path As String = "../../../../TestFiles/Output/"
                            doc.Save(output_path & "credit card numbers_linked.pdf", SDFDoc.SaveOptions.e_linearized)
                            Exit While
                        End If
                    ElseIf code = TextSearch.ResultCode.e_page Then
                    Else
                        Exit While
                    End If
                End While
            End Using
        Catch e As PDFNetException
            Console.WriteLine(e.Message)
        End Try
        PDFNet.Terminate()
    End Sub
End Module