Some test text!

platform
search
< iOS samples

PDF Image Extraction in Swift

Sample Swift code for using PDFTron SDK to extract images from PDF files, along with their positioning information and DPI. Instead of converting PDF images to a Bitmap, you can also extract uncompressed/compressed image data directly using element.GetImageData() (described in the PDF Data Extraction code sample). Learn more about our PDF Parsing & Content Extraction Library.

Step 1: Get your free trial license key, or sign in

Start Trial
Sign in

Step 2: Add the code:

//---------------------------------------------------------------------------------------
// Copyright (c) 2001-2017 by PDFTron Systems Inc. All Rights Reserved.
// Consult legal.txt regarding legal and license information.
//---------------------------------------------------------------------------------------

import PDFNet
import Foundation

//-----------------------------------------------------------------------------------
// This sample illustrates one approach to PDF image extraction
// using PDFNet.
//
// Note: Besides direct image export, you can also convert PDF images
// to GDI+ Bitmap, or extract uncompressed/compressed image data directly
// using element.GetImageData() (e.g. as illustrated in ElementReaderAdv
// sample project).
//-----------------------------------------------------------------------------------

var image_counter: Int = 0

func ImageExtract(reader: PTElementReader) {
    while let element = reader.next() {
        switch element.getType() {
        case e_ptimage, e_ptinline_image:
            image_counter += 1
            print("--> Image: \(image_counter)")
            print("    Width: \(element.getImageWidth())")
            print("    Height: \(element.getImageHeight())")
            print("    BPC: \(element.getBitsPerComponent())")
            
            let ctm: PTMatrix2D = element.getCTM()
            let x2: Double = 1
            let y2: Double = 1
            ctm.mult(PTPDFPoint(px: x2, py: y2))
            print("    Coords: x1=\(ctm.getM_h()), y1=\(ctm.getM_v()), x2=\(x2), y2=\(y2)")
            
            if element.getType() == e_ptimage {
                let image: PTImage = PTImage(image_xobject: element.getXObject())
                let path: String = URL(fileURLWithPath: URL(fileURLWithPath: NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0]).appendingPathComponent("").absoluteString).appendingPathComponent("image_extract1_\(image_counter)").path
                image.export(toFile: path)
            }
        case e_ptform:
            // Process form XObjects
            reader.formBegin()
            ImageExtract(reader: reader)
            reader.end()
        default:
            break
        }
    }
}

func runImageExtractTest() -> Int {
    return autoreleasepool {
        var ret: Int = 0
        
        // Initialize PDFNet
        
        
        // Example 1:
        // Extract images by traversing the display list for
        // every page. With this approach it is possible to obtain
        // image positioning information and DPI.
        do {
            try PTPDFNet.catchException {
                let doc: PTPDFDoc = PTPDFDoc(filepath: Bundle.main.path(forResource: "newsletter", ofType: "pdf"))
                doc.initSecurityHandler()
                
                let reader: PTElementReader = PTElementReader()
                //  Read every page
                let itr: PTPageIterator = doc.getPageIterator(1)
                while itr.hasNext() {
                    reader.begin(itr.current())
                    ImageExtract(reader: reader)
                    reader.end()
                    itr.next()
                }
                
                print("Done...")
            }
        } catch let e as NSError {
            print("\(e)")
            ret = 1
        }
        
        print("----------------------------------------------------------------")
        
        // Example 2:
        // Extract images by scanning the low-level document.
        do {
            try PTPDFNet.catchException {
                let doc: PTPDFDoc = PTPDFDoc(filepath: Bundle.main.path(forResource: "newsletter", ofType: "pdf"))
                doc.initSecurityHandler()
                
                image_counter = 0
                
                let cos_doc: PTSDFDoc = doc.getSDFDoc()
                let num_objs = cos_doc.xRefSize()
                for i in 1..<num_objs {
                    guard let obj: PTObj = cos_doc.getObj(i) else {
                        continue
                    }
                    if !obj.isFree() && obj.isStream() {
                        // Process only images
                        var itr: PTDictIterator = obj.find("Type")
                        if !itr.hasNext() || !(itr.value().getName() == "XObject") {
                            continue
                        }
                        
                        itr = obj.find("Subtype")
                        if !itr.hasNext() || !(itr.value().getName() == "Image") {
                            continue
                        }
                        
                        let image: PTImage = PTImage(image_xobject: obj)
                        image_counter += 1
                        print("-. Image: \(image_counter)")
                        print("    Width: \(image.getWidth())")
                        print("    Height: \(image.getHeight())")
                        print("    BPC: \(image.getBitsPerComponent())")
                        
                        let path: String = URL(fileURLWithPath: NSSearchPathForDirectoriesInDomains(.documentDirectory, .userDomainMask, true)[0]).appendingPathComponent("image_extract2_\(image_counter)").path
                        image.export(toFile: path)
                    }
                }
                
                print("Done...")
            }
        } catch let e as NSError {
            print("\(e)")
            ret = 1
        }
        
        return ret
    }
}