Some test text!

menu
Text searchkeyboard_arrow_down

Search for text in a PDF in Node.js

To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.

In this example, we add a link annotation but any other types of annotations can be applied here such as redaction annotations in the case of a search and redact workflow.
const doc = await PDFNet.PDFDoc.createFromURL(filename);
const txtSearch = await PDFNet.TextSearch.create();
let mode = PDFNet.TextSearch.Mode.e_whole_word + PDFNet.TextSearch.Mode.e_page_stop; // Uses both whole word and page stop
let pattern = '';

//use regular expression to find credit card number
mode += PDFNet.TextSearch.Mode.e_reg_expression + PDFNet.TextSearch.Mode.e_highlight;
txtSearch.setMode(mode);
pattern = '\\d{4}-\\d{4}-\\d{4}-\\d{4}'; // or "(\\d{4}-){3}\\d{4}"
txtSearch.setPattern(pattern);

//call Begin() method to initialize the text search.
txtSearch.begin(doc, pattern, mode);
const result = await txtSearch.run();

if (result.code === PDFNet.TextSearch.ResultCode.e_found) {
  // add a link annotation based on the location of the found instance
  hlts = result.highlights;
  await hlts.begin(doc); // is await needed?
  while (await hlts.hasNext()) {
    const curPage = await doc.getPage(await hlts.getCurrentPageNumber());
    const quadArr = await hlts.getCurrentQuads();
    for (let i = 0; i < quadArr.length; ++i) {
      const currQuad = quadArr[i];
      const x1 = Math.min(Math.min(Math.min(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
      const x2 = Math.max(Math.max(Math.max(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
      const y1 = Math.min(Math.min(Math.min(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
      const y2 = Math.max(Math.max(Math.max(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);

      const hyperLink = await PDFNet.LinkAnnot.create(doc, await PDFNet.Rect.init(x1, y1, x2, y2));
      await hyperLink.setAction(await PDFNet.Action.createURI(doc, 'http://www.pdftron.com'));
      await curPage.annotPushBack(hyperLink);
    }
    hlts.next();
  }
}

Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.

Get the answers you need: Support

close

Free Trial

Get unlimited trial usage of PDFTron SDK to bring accurate, reliable, and fast document processing capabilities to any application or workflow.

Select a platform to get started with your free trial.

Unlimited usage. No email address required.