Some test text!

Text search

Search for text in a PDF in Node.js

To search for text in a PDF using regular expression and then apply a link annotation on the highlighted result.

In this example, we add a link annotation but any other types of annotations can be applied here such as redaction annotations in the case of a search and redact workflow.
async function main() {
  const doc = await PDFNet.PDFDoc.createFromURL(filename);
  const txtSearch = await PDFNet.TextSearch.create();
  let mode = PDFNet.TextSearch.Mode.e_whole_word + PDFNet.TextSearch.Mode.e_page_stop; // Uses both whole word and page stop
  let pattern = '';

  //use regular expression to find credit card number
  mode += PDFNet.TextSearch.Mode.e_reg_expression + PDFNet.TextSearch.Mode.e_highlight;
  pattern = '\\d{4}-\\d{4}-\\d{4}-\\d{4}'; // or "(\\d{4}-){3}\\d{4}"

  //call Begin() method to initialize the text search.
  txtSearch.begin(doc, pattern, mode);
  const result = await;

  if (result.code === PDFNet.TextSearch.ResultCode.e_found) {
    // add a link annotation based on the location of the found instance
    hlts = result.highlights;
    await hlts.begin(doc); // is await needed?
    while (await hlts.hasNext()) {
      const curPage = await doc.getPage(await hlts.getCurrentPageNumber());
      const quadArr = await hlts.getCurrentQuads();
      for (let i = 0; i < quadArr.length; ++i) {
        const currQuad = quadArr[i];
        const x1 = Math.min(Math.min(Math.min(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
        const x2 = Math.max(Math.max(Math.max(currQuad.p1x, currQuad.p2x), currQuad.p3x), currQuad.p4x);
        const y1 = Math.min(Math.min(Math.min(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);
        const y2 = Math.max(Math.max(Math.max(currQuad.p1y, currQuad.p2y), currQuad.p3y), currQuad.p4y);

        const hyperLink = await PDFNet.LinkAnnot.create(doc, await PDFNet.Rect.init(x1, y1, x2, y2));
        await hyperLink.setAction(await PDFNet.Action.createURI(doc, ''));
        await curPage.annotPushBack(hyperLink);

Search PDF files for text
Full code sample which shows how to use TextSearch to search text on PDF pages using regular expressions.

Get the answers you need: Support

Upcoming Webinar: Live tech update & run-through: Redaction | March 31st at 11 am PT


The Platform


© 2022 PDFTron Systems Inc. All rights reserved.


Terms of Use