#! /bin/bash
# vim: set filetype=bash:

# pdfquant: reduce the filesize of a PDFs composed of scanned images by
# extracting images, quantizing them, and creating a new PDF; relies on
# img2pdf, pdfimages, pdfseparate, and pngquant.

# Copyright (C) 2021-2026 by Brian Lindholm.  This file is part of the
# littleutils utility set.
#
# The pdfquant utility is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 3, or (at your option) any later version.
#
# The pdfquant utility is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with
# the littleutils.  If not, see <https://www.gnu.org/licenses/>.

# get command-line options
declare -i COLORS=8
DITHERING='--ordered'
OPTIMIZE='n'
USE_PARALLEL='y'
QUIET='n'
SUFFIX='-or8'
while getopts fhn:oqs opts ; do
  case $opts in
    f) DITHERING='--floyd=1'
       SUFFIX='-fs8' ;;
    h) echo 'pdfquant 1.4.0'
       echo 'usage: pdfquant [-f(loyd-steinberg)] [-h(elp)] [-n colors] [-o(ptimize)]'
       echo '         [-q(uiet)] [-s(ingle_threaded)] pdf_file...'
       exit 0 ;;
    n) COLORS=$OPTARG ;;
    o) OPTIMIZE='y' ;;
    q) QUIET='y' ;;
    s) USE_PARALLEL='n' ;;
    *) echo 'pdfquant 1.4.0'
       echo 'usage: pdfquant [-f(loyd-steinberg)] [-h(elp)] [-n colors] [-o(ptimize)]'
       echo '         [-q(uiet)] [-s(ingle_threaded)] pdf_file...'
       exit 1 ;;
  esac
done
shift $((${OPTIND}-1))

# double-check parameters
if [ $COLORS -lt 2 ]; then
  COLORS=2
elif [ $COLORS -gt 256 ]; then
  COLORS=256
fi
command -v parallel > /dev/null
if [ $? -ne 0 ]; then
  USE_PARALLEL='n'
fi

# set up traps and create temporary folder
trap 'rm -rf "$TMPFOLDER" ; exit 1' 1 2 3 13 15
TMPFOLDER=$(tempname -D pdfquant_$$) || exit 99

# run through files
while [ $# -gt 0 ]; do

  # make sure that input file is specified
  if [ ! -f "$1" -o ! -r "$1" ]; then
    echo "pdfquant error: input file ${1} is not a readable file"
    shift; continue
  fi
  INPUTFILE=$(realpath "$1")
  pushd "$TMPFOLDER" > /dev/null

  # explode original files into individual pages
  if [ "$QUIET" = 'n' ]; then
    echo 'pdfquant message:  -BEGIN-'
    echo "pdfquant message: splitting $1 into pages..."
  fi
  cd "$TMPFOLDER"
  pdfseparate "$INPUTFILE" page-%06d.pdf

  # convert pages into PNG images
  if [ "$QUIET" = 'n' ]; then
    echo 'pdfquant message: converting pages to PNG...'
  fi
  if [ "$USE_PARALLEL" = 'n' ]; then
    for PAGE in page*.pdf ; do
      pdfimages -png $PAGE ${PAGE%.pdf}
    done
  else
    parallel -s 10000 --plus 'pdfimages -png {} {%.pdf}' ::: page*.pdf
  fi

  # created quantized versions of PNG images
  if [ "$QUIET" = 'n' ]; then
    echo 'pdfquant message: quantizing PNG pages...'
  fi
  if [ "$USE_PARALLEL" = 'n' ]; then
    for PNG in page*.png ; do
      pngquant -s 1 $DITHERING $COLORS $PNG
    done
  else
    parallel -s 10000 pngquant -s 1 $DITHERING $COLORS ::: page*.png
  fi

  # optimize if requested
  if [ "$OPTIMIZE" = 'y' ]; then
    if [ "$QUIET" = 'n' ]; then
      echo 'pdfquant message: optimizing PNG pages...'
    fi
    if [ "$USE_PARALLEL" = 'n' ]; then
      opt-png -q page*${SUFFIX}.png
    else
      parallel -s 10000 opt-png -q ::: page*${SUFFIX}.png
    fi
  fi

  # reassemble PNG images into new PDF
  if [ "$QUIET" = 'n' ]; then
    echo 'pdfquant message: restoring PNG pages to PDF...'
  fi
  img2pdf -D --engine=internal -o "${INPUTFILE%.pdf}-${COLORS}.pdf" page*${SUFFIX}.png

  # clean up afterwards
  if [ "$QUIET" = 'n' ]; then
    echo 'pdfquant message:  -END-'
    echo ''
  fi
  rm -f page*
  popd > /dev/null
  shift

done
rmdir "$TMPFOLDER"
