From 27162a79266d88cce087a8ba881a6bcc6c78c276 Mon Sep 17 00:00:00 2001 From: Uwe Steinmann Date: Thu, 8 Aug 2019 07:47:12 +0200 Subject: [PATCH] add converters for fulltext search --- doc/README.Converters | 46 ++++++++++++++++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 5 deletions(-) diff --git a/doc/README.Converters b/doc/README.Converters index 5256f0f89..784922b4c 100644 --- a/doc/README.Converters +++ b/doc/README.Converters @@ -1,5 +1,33 @@ -Conversion to pdf -================= +Conversion to text for fulltext search +======================================= + +text/plain +text/csv + cat '%s' + +application/pdf + pdftotext -nopgbrk %s - | sed -e 's/ [a-zA-Z0-9.]\{1\} / /g' -e 's/[0-9.]//g' + +application/vnd.openxmlformats-officedocument.wordprocessingml.document + docx2txt '%s' - + +application/msword + catdoc %s + +application/vnd.openxmlformats-officedocument.spreadsheetml.sheet + xlsx2csv %s + +application/vnd.ms-excel + xls2csv %s + +text/html + html2text %s + +Many office formats + unoconv -d document -f txt --stdout '%s' + +Conversion to pdf for pdf preview +================================== text/plain text/csv @@ -22,8 +50,16 @@ application/vnd.ms-excel application/vnd.openxmlformats-officedocument.spreadsheetml.sheet unoconv -d spreadsheet -f pdf --stdout -v '%f' > '%o' -Preview -======== +Conversion to png for preview images +===================================== + +If you have problems running convert on PDF documents then read this page +https://askubuntu.com/questions/1081895/trouble-with-batch-conversion-of-png-to-pdf-using-convert +It basically instructs you to comment out the line + + + +in /etc/ImageMagick-6/policy.xml image/jpg image/jpeg @@ -46,5 +82,5 @@ application/rtf application/vnd.ms-powerpoint text/csv application/vnd.wordperfect - /usr/bin/unoconv -d document -e PageRange=1 -f pdf --stdout -v '%f' | gs -dBATCH -dNOPAUSE -sDEVICE=pngalpha -dPDFFitPage -r72x72 -sOutputFile=- -dFirstPage=1 -dLastPage=1 -q - | convert -resize %wx png:- '%o' + unoconv -d document -e PageRange=1 -f pdf --stdout -v '%f' | gs -dBATCH -dNOPAUSE -sDEVICE=pngalpha -dPDFFitPage -r72x72 -sOutputFile=- -dFirstPage=1 -dLastPage=1 -q - | convert -resize %wx png:- '%o'