Some test text!

Text search

Search for text in a PDF in Go

To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.

In this example, we add a link annotation but any other types of annotations can be applied here such as redaction annotations in the case of a search and redact workflow.
doc := NewPDFDoc(filename)
txtSearch := NewTextSearch()
mode := TextSearchE_whole_word | TextSearchE_page_stop
pattern := ""

mode = mode | TextSearchE_reg_expression | TextSearchE_highlight
pattern := "\\d{4}-\\d{4}-\\d{4}-\\d{4}"     //or "(\\d{4}-){3}\\d{4}"

// call Begin() method to initialize the text search.
txtSearch.Begin(doc, pattern, uint(mode))
searchResult := txtSearch.Run()

if searchResult.IsFound(){
  // add a link annotation based on the location of the found instance
  hlts := searchResult.GetHighlights()
  for hlts.HasNext(){
    curPage := doc.GetPage(uint(hlts.GetCurrentPageNumber()))
    quadsInfo := hlts.GetCurrentQuads()
    i := 0
    for i < int(quadsInfo.Size()){
      q := quadsInfo.Get(i)
      // assume each quad is an axis-aligned rectangle 
      x1 := Min(Min(Min(q.GetP1().GetX(), q.GetP2().GetX()), q.GetP3().GetX()), q.GetP4().GetX())
      x2 := Max(Max(Max(q.GetP1().GetX(), q.GetP2().GetX()), q.GetP3().GetX()), q.GetP4().GetX())
      y1 := Min(Min(Min(q.GetP1().GetY(), q.GetP2().GetY()), q.GetP3().GetY()), q.GetP4().GetY())
      y2 := Max(Max(Max(q.GetP1().GetY(), q.GetP2().GetY()), q.GetP3().GetY()), q.GetP4().GetY())
      hyperLink := LinkCreate(doc.GetSDFDoc(), NewRect(x1, y1, x2, y2), ActionCreateURI(doc.GetSDFDoc(), ""))
      i = i + 1

Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.

Get the answers you need: Support

Upcoming Webinar: PDFTron SDK Tech Review | Nov 29, 2022 at 2 pm ET


The Platform


© 2022 PDFTron Systems Inc. All rights reserved.


Terms of Use