コンテンツへスキップ

powershellでpdfを読み込む

# download itextsharp.dll.
# Invoke-WebRequest -Uri 'https://github.com/itext/itextsharp/releases/download/5.5.10/itextsharp-all-5.5.10.zip'

param(
    [parameter(mandatory=$false)][string]$ITextLibraryPath = "E:\tmp\itextsharp.dll",
    [parameter(mandatory=$false)][string]$InputFilePath    = "E:\tmp\input.pdf",
    [parameter(mandatory=$false)][string]$OutputFilePath   = "E:\tmp\output.txt"
)

[System.Reflection.Assembly]::LoadFrom($ITextLibraryPath) | Out-Null
$reader = New-Object iTextSharp.text.pdf.PdfReader -ArgumentList $InputFilePath
$pages = $reader.numberofpages

for($i=1;$i -le $pages;$i++){
    write-output("=============== page=[{0}] ===============" -f $i) >>$OutputFilePath
    [iTextSharp.text.pdf.parser.PdfTextExtractor]::GetTextFromPage($reader,$i) >> $OutputFilePath
}