Some test text!

menu

PDF image extraction in JavaScript

More languages

chevron_right
More languages
Java (Android)
C++
C#
C# (.NET Core)
Java
Kotlin
Obj-C
JS (Node.js)
PHP
Python
Ruby
Swift
C# (UWP)
VB
C# (Xamarin)

Sample JavaScript code for using PDFTron SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample). Learn more about our JavaScript PDF Library and PDF Parsing & Content Extraction Library.

Get StartedSamplesDownload

To run this sample, get started with a free trial of PDFTron SDK.

//---------------------------------------------------------------------------------------
// Copyright (c) 2001-2019 by PDFTron Systems Inc. All Rights Reserved.
// Consult legal.txt regarding legal and license information.
//---------------------------------------------------------------------------------------

//-----------------------------------------------------------------------------------
// This sample illustrates one approach to PDF image extraction 
// using PDFNet.
// 
// Note: Besides direct image export, you can also convert PDF images 
// to GDI+ Bitmap, or extract uncompressed/compressed image data directly 
// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv 
// sample project).
//-----------------------------------------------------------------------------------

const { PDFNet } = require('../../lib/pdfnet.js');

((exports) => {
  'use strict';

  exports.runImageExtractTest = () => {

    let image_counter = 0;
    const output_path = '../TestFiles/Output/';

    const imageExtract = async (reader) => {
      let element;
      while ((element = await reader.next()) !== null) {
        switch (await element.getType()) {
          case PDFNet.Element.Type.e_image:
          case PDFNet.Element.Type.e_inline_image:
            console.log('--> Image: ' + ++image_counter);
            console.log('    Width: ' + await element.getImageWidth());
            console.log('    Height: ' + await element.getImageHeight());
            console.log('    BPC: ' + await element.getBitsPerComponent());

            const ctm = await element.getCTM();
            let x2 = 1, y2 = 1;
            await ctm.mult(x2, y2);
            console.log('    Coords: x1=' + ctm.m_h.toFixed(3) + ', y1=' + ctm.m_v.toFixed(3) + ', x2=' + x2 + ', y2=' + y2);

            if (await element.getType() == PDFNet.Element.Type.e_image) {
              const image = await PDFNet.Image.createFromObj(await element.getXObject());
              image.export(output_path + 'image_extract1_' + image_counter);
            }
            break;
          case PDFNet.Element.Type.e_form: // Process form XObjects
            reader.formBegin();
            await imageExtract(reader);
            reader.end();
            break;
        }
      }
    }

    const main = async () => {

      // Example 1: 
      // Extract images by traversing the display list for 
      // every page. With this approach it is possible to obtain 
      // image positioning information and DPI.
      try {
        const doc = await PDFNet.PDFDoc.createFromFilePath('../TestFiles/newsletter.pdf');
        doc.initSecurityHandler();

        const reader = await PDFNet.ElementReader.create();
        const itr = await doc.getPageIterator(1);
        // Read every page
        for (itr; await itr.hasNext(); await itr.next()) {
          const page = await itr.current();
          reader.beginOnPage(page);
          await imageExtract(reader);
          reader.end();
        }
      } catch (err) {
        console.log(err);
      }

      // Example 2: 
      // Extract images by scanning the low-level document.
      try {
        const doc = await PDFNet.PDFDoc.createFromFilePath('../TestFiles/newsletter.pdf');
        doc.initSecurityHandler();
        image_counter = 0;

        const cos_doc = await doc.getSDFDoc();
        const num_objs = await cos_doc.xRefSize();
        for (var i = 0; i < num_objs; i++) {
          const obj = await cos_doc.getObj(i);
          if (obj && !(await obj.isFree()) && await obj.isStream()) {
            // Process only images
            var itr = await obj.find("Type");
            if (!(await itr.hasNext()) || await (await itr.value()).getName() !== "XObject")
              continue;

            itr = await obj.find("Subtype");
            if (!(await itr.hasNext()) || await (await itr.value()).getName() !== "Image")
              continue;
            const image = await PDFNet.Image.createFromObj(obj);
            console.log('--> Image: ' + ++image_counter);
            console.log('    Width: ' + await image.getImageWidth());
            console.log('    Height: ' + await image.getImageHeight());
            console.log('    BPC: ' + await image.getBitsPerComponent());

            image.export(output_path + 'image_extract2_' + image_counter);
          }
        }
      } catch (err) {
        console.log(err);
      }

      console.log('Test Complete!')
    }
    PDFNet.runWithCleanup(main, 0).then(function () { PDFNet.shutdown(); }); // replace with your own license key and remove the license-key.js script tag
  };
  exports.runImageExtractTest();
})(exports);
// eslint-disable-next-line spaced-comment
//# sourceURL=ImageExtractTest.js
close

Free Trial

Get unlimited trial usage of PDFTron SDK to bring accurate, reliable, and fast document processing capabilities to any application or workflow.

Select a platform to get started with your free trial.

Unlimited usage. No email address required.

Join our live demo to learn about use cases & capabilities for WebViewer

Learn more
close