mirror of
https://git.code.sf.net/p/seeddms/code
synced 2025-11-27 10:00:41 +00:00
Merge branch 'seeddms-5.1.x' into seeddms-6.0.x
This commit is contained in:
commit
febd5407ba
|
|
@ -350,6 +350,7 @@
|
||||||
- fix folder parameter passed to hook 'folderRowAction'
|
- fix folder parameter passed to hook 'folderRowAction'
|
||||||
- require unrestricted access on document/folder for deletion by rest api
|
- require unrestricted access on document/folder for deletion by rest api
|
||||||
- use php-cache instead of native memcached
|
- use php-cache instead of native memcached
|
||||||
|
- various updates of documentation
|
||||||
|
|
||||||
--------------------------------------------------------------------------------
|
--------------------------------------------------------------------------------
|
||||||
Changes in version 5.1.41
|
Changes in version 5.1.41
|
||||||
|
|
|
||||||
|
|
@ -1,206 +0,0 @@
|
||||||
Commands for converting documents
|
|
||||||
----------------------------------
|
|
||||||
|
|
||||||
This file contains commands for converting different document types
|
|
||||||
into
|
|
||||||
|
|
||||||
* text (for fulltext search)
|
|
||||||
* png (for preview images)
|
|
||||||
* pdf (for pdf documents)
|
|
||||||
|
|
||||||
Such conversions may not necessarily output an excact equivalent of
|
|
||||||
the input file, but outputs a suitable representation, e.g.
|
|
||||||
converting an mp3 file into text may output the metadata or even the
|
|
||||||
lyrics of the song. Converting it into a preview image may result
|
|
||||||
in a picture of the album cover.
|
|
||||||
|
|
||||||
Please note, that when ever a command outputs anything to stderr,
|
|
||||||
this will considered as a failure of the command. Most command line
|
|
||||||
programs have a parameter (.e.g. `-q`) to suppress such an output.
|
|
||||||
|
|
||||||
If you run php-fpm you may encounter problems with charsets based on
|
|
||||||
UTF-8. Programms like `catdoc` read LANG from the environment to
|
|
||||||
set the correct encoding of the output. php-fpm often clears the
|
|
||||||
environment and programms like `catdoc` will not longer output any
|
|
||||||
UTF-8 chars. In such a case you may want to set `clear_env=no` in
|
|
||||||
php-fpm's configuration. On Debian this is done in the file
|
|
||||||
`/etc/php/<php version>/fpm/pool.d/www.conf`. Search for `clear_env`.
|
|
||||||
|
|
||||||
Conversion to text for fulltext search
|
|
||||||
=======================================
|
|
||||||
|
|
||||||
text/plain
|
|
||||||
text/csv
|
|
||||||
application/csv
|
|
||||||
cat '%s'
|
|
||||||
|
|
||||||
application/pdf
|
|
||||||
pdftotext -q -nopgbrk %s - | sed -e 's/ [a-zA-Z0-9.]\{1\} / /g' -e 's/[0-9.]//g'
|
|
||||||
|
|
||||||
If pdftotext takes too long on large document you may want to pass parameter
|
|
||||||
-l to specify the last page to be converted. -q is for suppressing error/warnings
|
|
||||||
send to stderr
|
|
||||||
|
|
||||||
mutool draw -F txt -q -N -o - %s
|
|
||||||
|
|
||||||
application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
|
||||||
docx2txt '%s' -
|
|
||||||
|
|
||||||
application/msword
|
|
||||||
catdoc %s
|
|
||||||
|
|
||||||
application/vnd.oasis.opendocument.text
|
|
||||||
odt2txt %s
|
|
||||||
|
|
||||||
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
|
||||||
xlsx2csv -d tab %s
|
|
||||||
|
|
||||||
application/vnd.ms-excel
|
|
||||||
xls2csv -d tab %s
|
|
||||||
|
|
||||||
text/html
|
|
||||||
html2text %s
|
|
||||||
|
|
||||||
Many office formats
|
|
||||||
unoconv -d document -f txt --stdout '%s'
|
|
||||||
|
|
||||||
Apache Tika is another option for creating plain text from various document
|
|
||||||
types. Just use curl to send the document to your tika server and get the
|
|
||||||
plain text in return.
|
|
||||||
|
|
||||||
curl -s -T '%s' http://localhost:9998/tika --header 'Accept: text/plain'
|
|
||||||
|
|
||||||
Conversion to pdf for pdf preview
|
|
||||||
==================================
|
|
||||||
|
|
||||||
text/plain
|
|
||||||
text/csv
|
|
||||||
application/csv
|
|
||||||
application/vnd.oasis.opendocument.text
|
|
||||||
application/msword
|
|
||||||
application/vnd.wordperfect
|
|
||||||
text/rtf
|
|
||||||
unoconv -d document -f pdf --stdout -v '%f' > '%o'
|
|
||||||
|
|
||||||
image/png
|
|
||||||
image/jpg
|
|
||||||
image/jpeg
|
|
||||||
convert -density 300 '%f' 'pdf:%o'
|
|
||||||
|
|
||||||
image/svg+xml
|
|
||||||
cairosvg -f pdf -o '%o' '%f'
|
|
||||||
|
|
||||||
application/vnd.ms-powerpoint
|
|
||||||
application/vnd.openxmlformats-officedocument.presentationml.presentation
|
|
||||||
application/vnd.oasis.opendocument.presentation
|
|
||||||
unoconv -d presentation -f pdf --stdout -v '%f' > '%o'
|
|
||||||
|
|
||||||
application/vnd.ms-excel
|
|
||||||
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
|
||||||
application/vnd.oasis.opendocument.spreadsheet
|
|
||||||
unoconv -d spreadsheet -f pdf --stdout -v '%f' > '%o'
|
|
||||||
|
|
||||||
message/rfc822
|
|
||||||
java -jar emailconverter-2.5.3-all.jar '%f' -o '%o'
|
|
||||||
|
|
||||||
The emailconverter can be obtained from https://github.com/nickrussler/email-to-pdf-converter
|
|
||||||
It requires wkhtmltopdf which is part of debian.
|
|
||||||
|
|
||||||
text/plain
|
|
||||||
iconv -c -f utf-8 -t latin1 '%f' | a2ps -1 -q -a1 -R -B -o - - | ps2pdf - -
|
|
||||||
|
|
||||||
The parameter `-q` is important because a2ps sends some statistical
|
|
||||||
data to stderr, which makes SeedDMS believe the command has failed.
|
|
||||||
|
|
||||||
application/x-xopp
|
|
||||||
|
|
||||||
xournalpp -p "%o" "%f"
|
|
||||||
|
|
||||||
Converting from application/x-xopp to pdf only works if the xopp file
|
|
||||||
does not use a pdf document as a background, because this pdf is not
|
|
||||||
stored in the xopp fіle.
|
|
||||||
|
|
||||||
Conversion to png for preview images
|
|
||||||
=====================================
|
|
||||||
|
|
||||||
If you have problems running convert on PDF documents then read this page
|
|
||||||
https://askubuntu.com/questions/1081895/trouble-with-batch-conversion-of-png-to-pdf-using-convert
|
|
||||||
It basically instructs you to comment out the line
|
|
||||||
|
|
||||||
<policy domain="coder" rights="none" pattern="PDF" />
|
|
||||||
|
|
||||||
in /etc/ImageMagick-6/policy.xml
|
|
||||||
|
|
||||||
convert determines the format of the converted image from the extension of
|
|
||||||
the output filename. SeedDMS usually sets a propper extension when running
|
|
||||||
the command, but nevertheless it is good practice to explicitly set the output
|
|
||||||
format by prefixing the output filename with 'png:'. This is of course always
|
|
||||||
needed if the output goes to stdout.
|
|
||||||
|
|
||||||
image/jpg
|
|
||||||
image/jpeg
|
|
||||||
image/png
|
|
||||||
convert -resize %wx '%f' 'png:%o'
|
|
||||||
|
|
||||||
image/svg+xml
|
|
||||||
cairosvg -f png --output-width %w -o '%o' '%f'
|
|
||||||
|
|
||||||
text/plain
|
|
||||||
convert -density 100 -resize %wx 'text:%f[0]' 'png:%o'
|
|
||||||
|
|
||||||
application/pdf
|
|
||||||
gs -dBATCH -dNOPAUSE -sDEVICE=png16m -dPDFFitPage -r72x72 -sOutputFile=- -dFirstPage=1 -dLastPage=1 -q '%f' | convert -resize %wx png:- '%o'
|
|
||||||
|
|
||||||
convert -density 100 -resize %wx '%f[0]' 'png:%o'
|
|
||||||
|
|
||||||
mutool draw -F png -w %w -q -N -o '%o' '%f' 1
|
|
||||||
|
|
||||||
pdftocairo '%f' -png -singlefile -scale-to-x %w -scale-to-y -1 - > '%o'
|
|
||||||
|
|
||||||
pdftocairo needs to output to stdout because the output file name passed
|
|
||||||
to pdftocairo will be suffixed with png
|
|
||||||
|
|
||||||
application/postscript
|
|
||||||
convert -density 100 -resize %wx '%f[0]' 'png:%o'
|
|
||||||
|
|
||||||
text/plain
|
|
||||||
iconv -c -f utf-8 -t latin1 '%f' | a2ps -1 -q -a1 -R -B -o - - | gs -dBATCH -dNOPAUSE -sDEVICE=png16m -dFirstPage=1 -dLastPage=1 -dPDFFitPage -r72x72 -sOutputFile=- -q - | convert -resize %wx png:- 'png:%o'
|
|
||||||
|
|
||||||
On Linux systems you will have to set the desired value in /etc/papersize for a2ps
|
|
||||||
e.g. a4, or letter. Unfortunately, a2ps cannot process utf-8 encoded files. That's
|
|
||||||
why the input needs to be recoded with iconv or recode.
|
|
||||||
|
|
||||||
application/msword
|
|
||||||
application/vnd.oasis.opendocument.spreadsheet
|
|
||||||
application/vnd.oasis.opendocument.text
|
|
||||||
application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
|
||||||
application/vnd.ms-excel
|
|
||||||
application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
|
||||||
text/rtf
|
|
||||||
application/vnd.ms-powerpoint
|
|
||||||
text/csv
|
|
||||||
application/csv
|
|
||||||
application/vnd.wordperfect
|
|
||||||
unoconv -d document -e PageRange=1 -f pdf --stdout -v '%f' | gs -dBATCH -dNOPAUSE -sDEVICE=pngalpha -dPDFFitPage -r72x72 -sOutputFile=- -dFirstPage=1 -dLastPage=1 -q - | convert -resize %wx png:- 'png:%o'
|
|
||||||
|
|
||||||
video/webm
|
|
||||||
video/mp4
|
|
||||||
This will take 12th frame of a video and converts into a png. It requires
|
|
||||||
ffmpeg to be installed.
|
|
||||||
|
|
||||||
convert -resize %wx "%f[12]" "png:%o"
|
|
||||||
|
|
||||||
You may as well use ffmpeg right away
|
|
||||||
|
|
||||||
ffmpeg -i "%f" -ss 00:00:02 -frames:v 1 -loglevel quiet -vf scale=%w:-1 -f apng "%o"
|
|
||||||
|
|
||||||
audio/mpeg
|
|
||||||
|
|
||||||
sox "%f" -n spectrogram -x 600 -Y 550 -r -l -o - | convert -resize %wx png:- "png:%o"
|
|
||||||
|
|
||||||
application/x-xopp
|
|
||||||
xournalpp -i "%o" --export-png-width=%w "%f"
|
|
||||||
|
|
||||||
Converting from application/x-xopp to png only works if the xopp file
|
|
||||||
does not use a pdf document as a background, because this pdf is not
|
|
||||||
stored in the xopp fіle.
|
|
||||||
254
doc/README.Converters.md
Normal file
254
doc/README.Converters.md
Normal file
|
|
@ -0,0 +1,254 @@
|
||||||
|
# Commands for converting documents
|
||||||
|
|
||||||
|
SeedDMS has a very sophisticated file conversion process which could
|
||||||
|
be used to convert any format into any other format, if there is either
|
||||||
|
a command (on the command line) or a SeedDMS extension with php code
|
||||||
|
doing the conversion. This could of course use an external service
|
||||||
|
(e.g. Tika) for doing the conversion. There are already several
|
||||||
|
extensions for this purpose and SeedDMS provides some buildin
|
||||||
|
conversions as well. Traditionally, conversion was just used
|
||||||
|
internally by SeedDMS (and this is still the main purpose), but
|
||||||
|
this may not be the only use case.
|
||||||
|
|
||||||
|
This file only contains commands for converting different document
|
||||||
|
types into
|
||||||
|
|
||||||
|
* text (for fulltext search)
|
||||||
|
* png (for preview images)
|
||||||
|
* pdf (for pdf documents)
|
||||||
|
|
||||||
|
Most of the required commands can easily be installed on a Linux
|
||||||
|
server, which is the preferred plattform anyway. Other operating
|
||||||
|
systems may work as well, but your milage may vary.
|
||||||
|
|
||||||
|
The conversion commands can be configured in the settings of SeedDMS.
|
||||||
|
|
||||||
|
A conversion may not necessarily output an excact equivalent of
|
||||||
|
the input file, but outputs a suitable representation, e.g.
|
||||||
|
converting an mp3 file into text may output the metadata or even the
|
||||||
|
lyrics of the song. Converting it into a preview image may result
|
||||||
|
in a picture of the album cover, or a graphical representation
|
||||||
|
of the spectrum.
|
||||||
|
|
||||||
|
Please note, that whenever a command outputs anything to stderr,
|
||||||
|
this will be considered as a failure of the command. Most command line
|
||||||
|
programs have a parameter (.e.g. `-q`) to suppress such an output.
|
||||||
|
|
||||||
|
If you run php-fpm you may encounter problems with charsets based on
|
||||||
|
UTF-8. Programms like `catdoc` read LANG from the environment to
|
||||||
|
set the correct encoding of the output. php-fpm often clears the
|
||||||
|
environment and programms like `catdoc` will not longer output any
|
||||||
|
UTF-8 chars. In such a case you may want to set `clear_env=no` in
|
||||||
|
php-fpm's configuration. On Debian this is done in the file
|
||||||
|
`/etc/php/<php version>/fpm/pool.d/www.conf`. Search for `clear_env`.
|
||||||
|
|
||||||
|
The following sections will list possible conversion commands for
|
||||||
|
extracting text, creating an image, and converting to pdf.
|
||||||
|
|
||||||
|
## Conversion to text for fulltext search
|
||||||
|
|
||||||
|
### text/plain, text/csv, application/csv
|
||||||
|
|
||||||
|
`cat '%s'`
|
||||||
|
|
||||||
|
Unless you run a very old version of SeedDMS, you will never need
|
||||||
|
this command for converting text files. SeedDMS has this trivial
|
||||||
|
converter build in.
|
||||||
|
|
||||||
|
### application/pdf
|
||||||
|
|
||||||
|
`pdftotext -q -nopgbrk %s - | sed -e 's/ [a-zA-Z0-9.]\{1\} / /g' -e 's/[0-9.]//g'`
|
||||||
|
|
||||||
|
If pdftotext takes too long on large document, then you may want to
|
||||||
|
pass parameter `-l` to specify the last page to be converted. `-q` is
|
||||||
|
for suppressing error/warnings send to stderr
|
||||||
|
|
||||||
|
`mutool draw -F txt -q -N -o - %s`
|
||||||
|
|
||||||
|
### application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
||||||
|
|
||||||
|
`docx2txt '%s' -`
|
||||||
|
|
||||||
|
### application/msword
|
||||||
|
|
||||||
|
`catdoc %s`
|
||||||
|
|
||||||
|
### application/vnd.oasis.opendocument.text
|
||||||
|
|
||||||
|
`odt2txt %s`
|
||||||
|
|
||||||
|
### application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
||||||
|
|
||||||
|
`xlsx2csv -d tab %s`
|
||||||
|
|
||||||
|
### application/vnd.ms-excel
|
||||||
|
|
||||||
|
`xls2csv -d tab %s`
|
||||||
|
|
||||||
|
### text/html
|
||||||
|
|
||||||
|
`html2text %s`
|
||||||
|
|
||||||
|
### Many office formats
|
||||||
|
|
||||||
|
Many office formats can be converted with `unoconv`, though this turned
|
||||||
|
out in the past to sometimes crash or taking a long time.
|
||||||
|
|
||||||
|
`unoconv -d document -f txt --stdout '%s'`
|
||||||
|
|
||||||
|
Apache Tika is another option for creating plain text from various document
|
||||||
|
types. Just use `curl` to send the document to your tika server and get the
|
||||||
|
plain text in return.
|
||||||
|
|
||||||
|
`curl -s -T '%s' http://localhost:9998/tika --header 'Accept: text/plain'`
|
||||||
|
|
||||||
|
Of course this requires to first install Apache Tika when using the docker
|
||||||
|
image.
|
||||||
|
|
||||||
|
Finally, there is a SeedDMS extension
|
||||||
|
[unoserver](https://codeberg.org/SeedDMS/unoserver) which is based
|
||||||
|
on a project also called
|
||||||
|
[unoserver](https://github.com/unoconv/unoserver) and which is
|
||||||
|
available as docker image, making it quite easy to setup. Read the
|
||||||
|
documentation of the extension for more information.
|
||||||
|
|
||||||
|
## Conversion to pdf for pdf preview
|
||||||
|
|
||||||
|
### text/plain, text/csv, application/csv, application/vnd.oasis.opendocument.text application/msword, application/vnd.wordperfect, text/rtf
|
||||||
|
|
||||||
|
`unoconv -d document -f pdf --stdout -v '%f' > '%o'`
|
||||||
|
|
||||||
|
### image/png, image/jpg, image/jpeg
|
||||||
|
|
||||||
|
`convert -density 300 '%f' 'pdf:%o'`
|
||||||
|
|
||||||
|
Actually `convert` can be used for many other image formats. There is
|
||||||
|
also a SeedDMS extension called
|
||||||
|
[convert_image](https://codeberg.org/SeedDMS/convert_image) which
|
||||||
|
embedds the image into a pdf file.
|
||||||
|
|
||||||
|
### image/svg+xml
|
||||||
|
|
||||||
|
`cairosvg -f pdf -o '%o' '%f'`
|
||||||
|
|
||||||
|
### application/vnd.ms-powerpoint, application/vnd.openxmlformats-officedocument.presentationml.presentation, application/vnd.oasis.opendocument.presentation
|
||||||
|
|
||||||
|
`unoconv -d presentation -f pdf --stdout -v '%f' > '%o'`
|
||||||
|
|
||||||
|
### application/vnd.ms-excel, application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, application/vnd.oasis.opendocument.spreadsheet
|
||||||
|
|
||||||
|
`unoconv -d spreadsheet -f pdf --stdout -v '%f' > '%o'`
|
||||||
|
|
||||||
|
### message/rfc822
|
||||||
|
|
||||||
|
`java -jar emailconverter-2.5.3-all.jar '%f' -o '%o'`
|
||||||
|
|
||||||
|
The emailconverter can be obtained from https://github.com/nickrussler/email-to-pdf-converter
|
||||||
|
It requires `wkhtmltopdf` which is part of debian.
|
||||||
|
|
||||||
|
### text/plain
|
||||||
|
|
||||||
|
`iconv -c -f utf-8 -t latin1 '%f' | a2ps -1 -q -a1 -R -B -o - - | ps2pdf - -`
|
||||||
|
|
||||||
|
The parameter `-q` is important because a2ps sends some statistical
|
||||||
|
data to stderr, which makes SeedDMS believe the command has failed.
|
||||||
|
|
||||||
|
### application/x-xopp
|
||||||
|
|
||||||
|
`xournalpp -p "%o" "%f"`
|
||||||
|
|
||||||
|
Converting from application/x-xopp to pdf only works if the xopp file
|
||||||
|
does not use a pdf document as a background, because this pdf is not
|
||||||
|
stored in the xopp fіle.
|
||||||
|
|
||||||
|
### Many office formats
|
||||||
|
|
||||||
|
As already mentioned above, `unoconv` has some disadvantages. It is
|
||||||
|
recommended to the `unoserver` SeedDMS extension already described
|
||||||
|
above.
|
||||||
|
|
||||||
|
## Conversion to png for preview images
|
||||||
|
|
||||||
|
If you have problems running convert on PDF documents then read the page
|
||||||
|
https://askubuntu.com/questions/1081895/trouble-with-batch-conversion-of-png-to-pdf-using-convert
|
||||||
|
It basically instructs you to comment out the line
|
||||||
|
|
||||||
|
```
|
||||||
|
<policy domain="coder" rights="none" pattern="PDF" />
|
||||||
|
```
|
||||||
|
|
||||||
|
in `/etc/ImageMagick-6/policy.xml`
|
||||||
|
|
||||||
|
`convert` determines the format of the converted image from the extension of
|
||||||
|
the output filename. SeedDMS usually sets a propper extension when running
|
||||||
|
the command, but nevertheless it is good practice to explicitly set the output
|
||||||
|
format by prefixing the output filename with 'png:'. This is of course always
|
||||||
|
needed if the output goes to stdout.
|
||||||
|
|
||||||
|
### image/jpg, image/jpeg, image/png
|
||||||
|
|
||||||
|
`convert -resize %wx '%f' 'png:%o'`
|
||||||
|
|
||||||
|
### image/svg+xml
|
||||||
|
|
||||||
|
`cairosvg -f png --output-width %w -o '%o' '%f'`
|
||||||
|
|
||||||
|
### text/plain
|
||||||
|
|
||||||
|
`convert -density 100 -resize %wx 'text:%f[0]' 'png:%o'`
|
||||||
|
|
||||||
|
### application/pdf
|
||||||
|
|
||||||
|
`gs -dBATCH -dNOPAUSE -sDEVICE=png16m -dPDFFitPage -r72x72 -sOutputFile=- -dFirstPage=1 -dLastPage=1 -q '%f' | convert -resize %wx png:- '%o'`
|
||||||
|
|
||||||
|
`convert -density 100 -resize %wx '%f[0]' 'png:%o'`
|
||||||
|
|
||||||
|
`mutool draw -F png -w %w -q -N -o '%o' '%f' 1`
|
||||||
|
|
||||||
|
`pdftocairo '%f' -png -singlefile -scale-to-x %w -scale-to-y -1 - > '%o'`
|
||||||
|
|
||||||
|
`pdftocairo` needs to output to stdout because the output file name passed
|
||||||
|
to pdftocairo will be suffixed with `.png`
|
||||||
|
|
||||||
|
### application/postscript
|
||||||
|
|
||||||
|
`convert -density 100 -resize %wx '%f[0]' 'png:%o'`
|
||||||
|
|
||||||
|
### text/plain
|
||||||
|
|
||||||
|
`iconv -c -f utf-8 -t latin1 '%f' | a2ps -1 -q -a1 -R -B -o - - | gs -dBATCH -dNOPAUSE -sDEVICE=png16m -dFirstPage=1 -dLastPage=1 -dPDFFitPage -r72x72 -sOutputFile=- -q - | convert -resize %wx png:- 'png:%o'`
|
||||||
|
|
||||||
|
On Linux systems you will have to set the desired value in /etc/papersize for a2ps
|
||||||
|
e.g. a4, or letter. Unfortunately, a2ps cannot process utf-8 encoded files. That's
|
||||||
|
why the input needs to be recoded with iconv or recode.
|
||||||
|
|
||||||
|
### application/msword, application/vnd.oasis.opendocument.spreadsheet, application/vnd.oasis.opendocument.text, application/vnd.openxmlformats-officedocument.spreadsheetml.sheet, application/vnd.ms-excel, application/vnd.openxmlformats-officedocument.wordprocessingml.document, text/rtf, application/vnd.ms-powerpoint, text/csv, application/csv, application/vnd.wordperfect,
|
||||||
|
|
||||||
|
`unoconv -d document -e PageRange=1 -f pdf --stdout -v '%f' | gs -dBATCH -dNOPAUSE -sDEVICE=pngalpha -dPDFFitPage -r72x72 -sOutputFile=- -dFirstPage=1 -dLastPage=1 -q - | convert -resize %wx png:- 'png:%o'`
|
||||||
|
|
||||||
|
If you are looking for an easier solution, you should consider to
|
||||||
|
install the `unoserver` SeedDMS extension which was already described
|
||||||
|
above.
|
||||||
|
|
||||||
|
### video/webm, video/mp4
|
||||||
|
|
||||||
|
This will take 12th frame of a video and converts into a png. It requires
|
||||||
|
ffmpeg to be installed.
|
||||||
|
|
||||||
|
`convert -resize %wx "%f[12]" "png:%o"`
|
||||||
|
|
||||||
|
You may as well use ffmpeg right away
|
||||||
|
|
||||||
|
`ffmpeg -i "%f" -ss 00:00:02 -frames:v 1 -loglevel quiet -vf scale=%w:-1 -f apng "%o"`
|
||||||
|
|
||||||
|
### audio/mpeg
|
||||||
|
|
||||||
|
`sox "%f" -n spectrogram -x 600 -Y 550 -r -l -o - | convert -resize %wx png:- "png:%o"`
|
||||||
|
|
||||||
|
### application/x-xopp
|
||||||
|
|
||||||
|
`xournalpp -i "%o" --export-png-width=%w "%f"`
|
||||||
|
|
||||||
|
Converting from application/x-xopp to png only works if the xopp file
|
||||||
|
does not use a pdf document as a background, because this pdf is not
|
||||||
|
stored in the xopp fіle.
|
||||||
|
|
@ -4,6 +4,7 @@ Layout of installation
|
||||||
SeedDMS allows various kinds of installations with very individual layouts
|
SeedDMS allows various kinds of installations with very individual layouts
|
||||||
on disc. The proposed layout till version 5.1.6 was as the following:
|
on disc. The proposed layout till version 5.1.6 was as the following:
|
||||||
|
|
||||||
|
```
|
||||||
seeddms51x ---+--- data
|
seeddms51x ---+--- data
|
||||||
|
|
|
|
||||||
+--- pear
|
+--- pear
|
||||||
|
|
@ -11,29 +12,31 @@ seeddms51x ---+--- data
|
||||||
+--- seeddms-5.1.x
|
+--- seeddms-5.1.x
|
||||||
|
|
|
|
||||||
+--- www -> seeddms-5.1.x
|
+--- www -> seeddms-5.1.x
|
||||||
|
```
|
||||||
|
|
||||||
'data' contains all document files, the sqlite database (if used), the full text
|
`data` contains all document files, the sqlite database (if used), the full text
|
||||||
data, the log files, and the cached preview images.
|
data, the log files, and the cached preview images.
|
||||||
|
|
||||||
'pear' contains all PEAR packages including the four SeedDMS packages SeedDMS_Core,
|
`pear` contains all third party packages including the four SeedDMS packages SeedDMS_Core,
|
||||||
SeedDMS_Lucene, SeedDMS_Preview, SeedDMS_SQLiteFTS.
|
SeedDMS_Lucene, SeedDMS_Preview, SeedDMS_SQLiteFTS.
|
||||||
|
|
||||||
'seeddms-5.1.x' are the sources of seeddms and 'www' being a link on it.
|
`seeddms-5.1.x` are the sources of seeddms and 'www' being a link on it.
|
||||||
|
|
||||||
This layout has disadvantages when updating the source of seeddms, because
|
This layout has disadvantages when updating the source of seeddms, because
|
||||||
the directories 'conf' and 'ext' had to be moved from 'seeddms-5.1.x' to
|
the directories `conf` and `ext` had to be moved from `seeddms-5.1.x` to
|
||||||
'seeddms-5.1.(x+1)'. 'conf' was also visible over the web unless it was
|
`seeddms-5.1.(x+1)`. `conf` was also visible over the web unless it was
|
||||||
protected by an .htaccess file. The .htaccess file has been shipped, but it
|
protected by an .htaccess file. The .htaccess file has been shipped, but it
|
||||||
is far better to keep senѕitive data out of the document root in the first
|
is far better to keep senѕitive data out of the document root in the first
|
||||||
place.
|
place.
|
||||||
|
|
||||||
The new layout mostly retains that structure but uses more soft links to place
|
The new layout mostly retains that structure but uses more soft links to place
|
||||||
the local data outside of 'seeddms-5.1.x' which makes updating a lot easier
|
the local data outside of `seeddms-5.1.x` which makes updating a lot easier
|
||||||
and moves the configuration out of the document root.
|
and moves the configuration out of the document root.
|
||||||
As MS Windows does not support soft links, this change will only apply to Linux/Unix
|
As MS Windows does not support soft links, this change will only apply to Linux/Unix
|
||||||
systems. MS Windows users just skip all the soft links and set seeddms-5.1.x
|
systems. MS Windows users just skip all the soft links and set seeddms-5.1.x
|
||||||
as the document root. The new layout is the following:
|
as the document root. The new layout is the following:
|
||||||
|
|
||||||
|
```
|
||||||
seeddms51x ---+--- data --+-- log
|
seeddms51x ---+--- data --+-- log
|
||||||
| |
|
| |
|
||||||
| +-- cache
|
| +-- cache
|
||||||
|
|
@ -73,10 +76,12 @@ seeddms51x ---+--- data --+-- log
|
||||||
+-- index.php -> ../seeddms/index.php
|
+-- index.php -> ../seeddms/index.php
|
||||||
|
|
|
|
||||||
+-- ext
|
+-- ext
|
||||||
|
```
|
||||||
|
|
||||||
In order to convert to this layout you need to do the following in the seeddms51x
|
In order to convert to this layout you need to do the following in the seeddms51x
|
||||||
directory (replace the 'x' in '5.1.x' with the correct number):
|
directory (replace the 'x' in '5.1.x' with the correct number):
|
||||||
|
|
||||||
|
```
|
||||||
ln -s seeddms-5.1.x seeddms
|
ln -s seeddms-5.1.x seeddms
|
||||||
mv www/conf .
|
mv www/conf .
|
||||||
mv seeddms-5.1.x/ext www
|
mv seeddms-5.1.x/ext www
|
||||||
|
|
@ -93,3 +98,8 @@ ln -s ../seeddms/webdav
|
||||||
ln -s ../seeddms/restapi
|
ln -s ../seeddms/restapi
|
||||||
ln -s ../seeddms/pdfviewer
|
ln -s ../seeddms/pdfviewer
|
||||||
ln -s ../seeddms/index.php
|
ln -s ../seeddms/index.php
|
||||||
|
```
|
||||||
|
|
||||||
|
Since version 5.1.42 the layout has changed slightly again. The directory
|
||||||
|
`pear` which had only a subdirectory `vendor` disappeared and the `vendor` has
|
||||||
|
move one level up.
|
||||||
|
|
@ -1,12 +1,11 @@
|
||||||
Extensions in SeedDMS
|
# Extensions in SeedDMS
|
||||||
=====================
|
|
||||||
|
|
||||||
Since verson 5.0.0 SeedDMS can be extended by extensions. Extensions
|
Since verson 5.0.0 SeedDMS can be extended by extensions. Extensions
|
||||||
can hook up functions into certain operations, e.g.
|
can hook up functions into certain operations, e.g.
|
||||||
uploading, removing or displaying a document. They can also be
|
uploading, removing or displaying a document. They can also be
|
||||||
used to modify some of the internal variables like the list of
|
used to modify some of the internal variables like the list of
|
||||||
translations and they can even replace classes in the core of
|
translations and they can even replace classes in the core of
|
||||||
seeddms and hook up functions into certain operations in the core.
|
SeedDMS and hook up functions into certain operations in the core.
|
||||||
|
|
||||||
All extensions are located in the folder 'ext'. Each extension
|
All extensions are located in the folder 'ext'. Each extension
|
||||||
has its own folder named by the name of the extension. The central
|
has its own folder named by the name of the extension. The central
|
||||||
|
|
@ -19,12 +18,13 @@ the extension manager if it was changed.
|
||||||
|
|
||||||
The integration into SeedDMS is done by hooks, class and file
|
The integration into SeedDMS is done by hooks, class and file
|
||||||
overloading. SeedDMS manages
|
overloading. SeedDMS manages
|
||||||
a globally available array of hooks ($GLOBALS['SEEDDMS_HOOKS']).
|
a globally available array of hooks (`$GLOBALS['SEEDDMS_HOOKS']`).
|
||||||
This array has the elements 'view' and 'controller'. All entries
|
This array has the elements `view` and `controller`. All entries
|
||||||
in those array elements contain instances of self defined classes
|
in those array elements contain instances of self defined classes
|
||||||
containing the hook methods. For setting up the hooks in the view
|
containing the hook methods. For setting up the hooks in the view
|
||||||
'viewFolder' the following code is needed.
|
`viewFolder` the following code is needed.
|
||||||
|
|
||||||
|
```
|
||||||
$GLOBALS['SEEDDMS_HOOKS']['view']['viewFolder'][] = new SeedDMS_ExtExample_ViewFolder;
|
$GLOBALS['SEEDDMS_HOOKS']['view']['viewFolder'][] = new SeedDMS_ExtExample_ViewFolder;
|
||||||
|
|
||||||
class SeedDMS_ExtExample_ViewFolder {
|
class SeedDMS_ExtExample_ViewFolder {
|
||||||
|
|
@ -39,15 +39,16 @@ $GLOBALS['SEEDDMS_HOOKS']['controller']['removeFolder'][] = new SeedDMS_ExtExamp
|
||||||
class SeedDMS_ExtExample_RemoveFolder {
|
class SeedDMS_ExtExample_RemoveFolder {
|
||||||
...
|
...
|
||||||
};
|
};
|
||||||
|
```
|
||||||
|
|
||||||
Based on these two variants of adding hooks to the seeddms application code,
|
Based on these two variants of adding hooks to the SeedDMS application code,
|
||||||
the seeddms core can be extended by implementing the controller hook 'initDMS'
|
the SeedDMS core can be extended by implementing the controller hook 'initDMS'
|
||||||
which is called right after the class SeedDMS_Core_DMS has been initiated.
|
which is called right after the class SeedDMS_Core_DMS has been initiated.
|
||||||
|
|
||||||
Beside hooks and callbacks another way of modifying seeddms is given
|
Beside hooks and callbacks another way of modifying SeedDMS is given
|
||||||
by overloading the files in the directory 'views' and 'controllers'. Both
|
by overloading the files in the directory 'views' and 'controllers'. Both
|
||||||
directories contain class files with a single class for either running
|
directories contain class files with a single class for either running
|
||||||
controller or view code. If an extension provides those file in its
|
controller or view code. If an extension provides those file in its
|
||||||
own extension dir, they will be used instead of the files shipped with
|
own extension dir, they will be used instead of the files shipped with
|
||||||
seeddms.
|
SeedDMS.
|
||||||
|
|
||||||
|
|
@ -1,18 +0,0 @@
|
||||||
Adding authentication failure check for fail2ban
|
|
||||||
=================================================
|
|
||||||
|
|
||||||
You will have to use 5.1.10 for this to work.
|
|
||||||
|
|
||||||
Add a filter /etc/fail2ban/filter.d/seeddms.conf with the content
|
|
||||||
|
|
||||||
[Definition]
|
|
||||||
|
|
||||||
failregex = \[error\] -- \(<HOST>\) op.Login login failed
|
|
||||||
|
|
||||||
then configure a new jail in /etc/fail2ban/jail.d/seeddms.conf
|
|
||||||
|
|
||||||
[seeddms]
|
|
||||||
enabled = yes
|
|
||||||
port = http,https
|
|
||||||
filter = seeddms
|
|
||||||
logpath = /home/www-data/seeddms-demo/data/log/*.log
|
|
||||||
33
doc/README.Fail2ban.md
Normal file
33
doc/README.Fail2ban.md
Normal file
|
|
@ -0,0 +1,33 @@
|
||||||
|
Adding authentication failure check for fail2ban
|
||||||
|
=================================================
|
||||||
|
|
||||||
|
Fail2ban is a very mature and sophisticated program to detect attacks on
|
||||||
|
a service by checking its log file. If such an attack was detected an
|
||||||
|
action will be executed, which will mostly ban the IP of the attacker
|
||||||
|
for a configurable amount of time.
|
||||||
|
|
||||||
|
You will have to use at least SeedDMS 5.1.10 for this to work.
|
||||||
|
|
||||||
|
Add a filter `/etc/fail2ban/filter.d/seeddms.conf` with the content
|
||||||
|
|
||||||
|
```
|
||||||
|
[Definition]
|
||||||
|
|
||||||
|
failregex = \[error\] -- \(<HOST>\) op.Login login failed
|
||||||
|
```
|
||||||
|
|
||||||
|
This will tell fail2ban which lines in the log file are considered
|
||||||
|
to be an incident. Here it is a failed login.
|
||||||
|
|
||||||
|
Than configure a new jail in `/etc/fail2ban/jail.d/seeddms.conf`
|
||||||
|
|
||||||
|
```
|
||||||
|
[seeddms]
|
||||||
|
enabled = yes
|
||||||
|
port = http,https
|
||||||
|
filter = seeddms
|
||||||
|
logpath = /home/www-data/seeddms-demo/data/log/*.log
|
||||||
|
```
|
||||||
|
|
||||||
|
It tells fail2ban which log files shall be analysed, and which filter
|
||||||
|
has to be applied.
|
||||||
|
|
@ -6,7 +6,7 @@ session which is stored in a local file named `cookies.txt`.
|
||||||
The authentication is done with the user `admin`. You may use any other
|
The authentication is done with the user `admin`. You may use any other
|
||||||
user as well.
|
user as well.
|
||||||
|
|
||||||
You may as well pass `-H Authorization: <api key>` instead of `-b cookies.txt`
|
You can pass `-H Authorization: <api key>` instead of `-b cookies.txt`
|
||||||
to `curl` after setting the api key in the configuration of your SeedDMS.
|
to `curl` after setting the api key in the configuration of your SeedDMS.
|
||||||
Of course, in that case you will not need the initial call of the `login`
|
Of course, in that case you will not need the initial call of the `login`
|
||||||
endpoint.
|
endpoint.
|
||||||
|
|
@ -48,8 +48,16 @@ curl --silent -H "Authorization: <api key>" -X GET "${BASEURL}restapi/index.php/
|
||||||
## Notes
|
## Notes
|
||||||
|
|
||||||
Make sure to encode the data properly when using restapi functions which uses
|
Make sure to encode the data properly when using restapi functions which uses
|
||||||
put. If you use curl with PHP, then encode the data as the following
|
`put`. If you use curl with PHP, then encode the data as show in the following
|
||||||
|
lines of code:
|
||||||
|
|
||||||
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
|
```
|
||||||
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/x-www-form-urlencoded'));
|
curl_setopt($ch, CURLOPT_POSTFIELDS, http_build_query($data));
|
||||||
|
curl_setopt($ch, CURLOPT_HTTPHEADER, array('Content-Type: application/x-www-form-urlencoded'));
|
||||||
|
```
|
||||||
|
|
||||||
|
## Bruno
|
||||||
|
|
||||||
|
[Bruno](https://www.usebruno.com/) is an application for testing and exploring
|
||||||
|
Rest APIs. This [git repository](https://codeberg.org/SeedDMS/bruno) contains
|
||||||
|
the configuration for SeedDMS.
|
||||||
|
|
|
||||||
|
|
@ -1,143 +0,0 @@
|
||||||
This README was written by Eric Smith
|
|
||||||
|
|
||||||
======================================================
|
|
||||||
Steps that I took to install SeedDMS on Ubuntu 12.10
|
|
||||||
- a personal account and not an authoritative guide.
|
|
||||||
======================================================
|
|
||||||
|
|
||||||
Download four tar balls from;
|
|
||||||
http://sourceforge.net/projects/seeddms/files/seeddms-4.0.0-pre5/
|
|
||||||
|
|
||||||
seeddms-4.0.0-pre5.tar.gz
|
|
||||||
SeedDMS_Preview-1.0.0.tgz
|
|
||||||
SeedDMS_Lucene-1.1.1.tgz
|
|
||||||
SeedDMS_Core-4.0.0pre5.tgz
|
|
||||||
|
|
||||||
Install as follows the pear components:
|
|
||||||
sudo pear install SeedDMS_Core-4.0.0pre5.tgz
|
|
||||||
sudo pear install SeedDMS_Preview-1.0.0.tgz
|
|
||||||
sudo pear install SeedDMS_Lucene-1.1.1.tgz
|
|
||||||
|
|
||||||
Download and install the pear Log application:
|
|
||||||
wget http://download.pear.php.net/package/Log-1.12.7.tgz
|
|
||||||
sudo pear install Log-1.12.7.tgz
|
|
||||||
|
|
||||||
And zend:
|
|
||||||
sudo pear channel-discover zend.googlecode.com/svn
|
|
||||||
sudo pear install zend/zend
|
|
||||||
|
|
||||||
I installed the following packages, not all of which may be required
|
|
||||||
and you may require other packages, please check the dependencies on
|
|
||||||
the README.md for example for full text search, you need pdftotext,
|
|
||||||
catdoc, xls2csv or scconvert, cat, id3
|
|
||||||
|
|
||||||
sudo apt-get install php5-mysql php5-mysqlnd libapache2-mod-php5
|
|
||||||
sudo apt-get install pdo_mysql php5-gd id3 scconvert
|
|
||||||
sudo apt-get install php-http-webdav-server
|
|
||||||
sudo apt-get install zend-framework zend-framework-bin
|
|
||||||
sudo apt-get install libzend-framework-zendx-php
|
|
||||||
sudo apt-get install libjs-dojo-core libjs-dojo-dijit libjs-dojo-dojox
|
|
||||||
sudo apt-get install libzend-framework-php (It kept bitching about Zend so I just kept piling on packages until it worked)
|
|
||||||
|
|
||||||
mbstring is already a part of libapache2-mod-php5
|
|
||||||
pepper:~> show libapache2-mod-php5|grep mbstring
|
|
||||||
mbstring mhash openssl pcre Phar posix Reflection session shmop SimpleXML
|
|
||||||
|
|
||||||
|
|
||||||
Define three locations:
|
|
||||||
[1] Some cosy place in yourfile system for the source files to which you
|
|
||||||
will link
|
|
||||||
I chose "/opt/seeddms-4.0.0-pre5/"
|
|
||||||
untar seeddms-4.0.0-pre5.tar.gz into this location
|
|
||||||
|
|
||||||
[2] Make a directory and three subdirectories for the data for your site;
|
|
||||||
I chose to do this under "/opt/dms/seeddms_multisite_test/data"
|
|
||||||
sudo mkdir -p /opt/dms/seeddms_multisite_test/data/lucene/
|
|
||||||
sudo mkdir /opt/dms/seeddms_multisite_test/data/staging/
|
|
||||||
sudo mkdir /opt/dms/seeddms_multisite_test/data/cache/
|
|
||||||
|
|
||||||
Give ownership (or write access) to your httpd process to those directories;
|
|
||||||
sudo chown -cvR www-data /opt/dms/seeddms_multisite_test/data/
|
|
||||||
|
|
||||||
[3] Somewhere under your www root, make a directory for the sources of
|
|
||||||
your site:
|
|
||||||
These can be of course under different virtual domains.
|
|
||||||
/var/www/www.mydomain.eu/seeddms_multisite_test
|
|
||||||
cd /var/www/www.mydomain.eu/seeddms_multisite_test;
|
|
||||||
sudo ln -s /opt/seeddms-4.0.0-pre5 src (README.md does not include the `src'!)
|
|
||||||
ln -s src/inc inc
|
|
||||||
ln -s src/op op
|
|
||||||
ln -s src/out out
|
|
||||||
ln -s src/js js
|
|
||||||
ln -s src/views views
|
|
||||||
ln -s src/languages languages
|
|
||||||
ln -s src/styles styles
|
|
||||||
ln -s src/themes themes
|
|
||||||
ln -s src/install install
|
|
||||||
ln -s src/index.php index.php
|
|
||||||
|
|
||||||
If need be;
|
|
||||||
sudo chown -cvR www-data /var/www/www.mydomain.eu/seeddms_multisite_test/
|
|
||||||
|
|
||||||
Create Dataabse;
|
|
||||||
Run the following sql commands to create your db and a user with
|
|
||||||
appropriate privileges.
|
|
||||||
|
|
||||||
mysql> create database seeddms_multisite_test;
|
|
||||||
mysql> grant all privileges on seeddms_multisite_test.* to seeddms@localhost identified by 'your_passwd';
|
|
||||||
|
|
||||||
|
|
||||||
Point your browser to the location of your instance as in [3] above
|
|
||||||
and /install
|
|
||||||
I resorted to a text browser on my server due to failure to access the
|
|
||||||
db from a remote browser;
|
|
||||||
|
|
||||||
pepper:~> elinks www.mydomain.eu/seeddms_multisite_test/install
|
|
||||||
|
|
||||||
This is how I filled it in;
|
|
||||||
SeedDMS: INSTALL
|
|
||||||
SeedDMS Installation for version 4.0.0
|
|
||||||
|
|
||||||
Server settings
|
|
||||||
Root directory: /opt/seeddms-4.0.0-pre5/_______________________
|
|
||||||
Http Root: /seeddms_multisite_test/_______________________
|
|
||||||
Content directory: /opt/dms/seeddms_multisite_test/data___________
|
|
||||||
Directory for full text index: /opt/dms/seeddms_multisite_test/data/lucene/___
|
|
||||||
Directory for partial uploads: /opt/dms/seeddms_multisite_test/data/staging/__
|
|
||||||
Core SeedDMS directory: _______________________________________________
|
|
||||||
Lucene SeedDMS directory: _______________________________________________
|
|
||||||
Extra PHP include Path: _______________________________________________
|
|
||||||
Database settings
|
|
||||||
Database Type: mysql________________
|
|
||||||
Server name: localhost____________
|
|
||||||
Database: seeddms_multisite_tes
|
|
||||||
Username: seeddms______________
|
|
||||||
Password: ********_____________
|
|
||||||
Create database tables: [X]
|
|
||||||
|
|
||||||
[ Apply ]
|
|
||||||
|
|
||||||
|
|
||||||
If all is okay (and I hope this happens more quickly for you than for me),
|
|
||||||
you should be notified accordingly and invited to login to your new site
|
|
||||||
with credentials admin/admin. (This password is cleverly set to expire
|
|
||||||
in a couple of days. So do not get a shock like I did when it suddenly
|
|
||||||
does not work).
|
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
|
|
||||||
To make additional sites;
|
|
||||||
|
|
||||||
If you wish to make additional sites, you need to copy the data directories thusly;
|
|
||||||
sudo cp -avr /opt/dms/seeddms_multisite_test /opt/dms/seeddms_multisite_test_2
|
|
||||||
And the sources thusly;
|
|
||||||
sudo cp -avr /var/www/www.mydomain.eu/seeddms_multisite_test /var/www/www.mydomain.eu/seeddms_multisite_test_2
|
|
||||||
|
|
||||||
And of course make data directories for this site:
|
|
||||||
sudo mkdir -p /opt/dms/seeddms_multisite_test_2/data/lucene/
|
|
||||||
sudo mkdir /opt/dms/seeddms_multisite_test_2/data/staging/
|
|
||||||
sudo mkdir /opt/dms/seeddms_multisite_test_2/data/cache/
|
|
||||||
|
|
||||||
Then create another database as shown above but of course give the db
|
|
||||||
another name.
|
|
||||||
Run the install again from the new location.
|
|
||||||
|
|
@ -3,7 +3,7 @@ WebDAV
|
||||||
|
|
||||||
SeedDMS has support for WebDAV which allows to easily add, delete,
|
SeedDMS has support for WebDAV which allows to easily add, delete,
|
||||||
move, copy and modify documents. All operating systems have support
|
move, copy and modify documents. All operating systems have support
|
||||||
for WebDAV as well, but the implemtations and their behaviour varys
|
for WebDAV, but the implemtation and their behaviour varys
|
||||||
and consequently you may run into various problems. If this happens
|
and consequently you may run into various problems. If this happens
|
||||||
just file a bug report at https://sourceforge.net/projects/seeddms
|
just file a bug report at https://sourceforge.net/projects/seeddms
|
||||||
|
|
||||||
|
|
@ -34,27 +34,35 @@ Configuring davfs2
|
||||||
On Linux it is quite simple to mount the SeedDMS WebDAV server with
|
On Linux it is quite simple to mount the SeedDMS WebDAV server with
|
||||||
davfs2. Just place a line like the following in your /etc/fstab
|
davfs2. Just place a line like the following in your /etc/fstab
|
||||||
|
|
||||||
|
```
|
||||||
http://seeddms.your-domain.com/webdav/index.php /media/webdav davfs noauto,user,rw,uid=1000,gid=1000
|
http://seeddms.your-domain.com/webdav/index.php /media/webdav davfs noauto,user,rw,uid=1000,gid=1000
|
||||||
|
```
|
||||||
|
|
||||||
and mount it as root with
|
and mount it as root with
|
||||||
|
|
||||||
|
```
|
||||||
mount /media/webdav davfs
|
mount /media/webdav davfs
|
||||||
|
```
|
||||||
|
|
||||||
You may as well want to configure davfs2 in /etc/davfs2/davfs2.conf by setting
|
You may as well want to configure davfs2 in /etc/davfs2/davfs2.conf by setting
|
||||||
|
|
||||||
|
```
|
||||||
[/media/webdav]
|
[/media/webdav]
|
||||||
use_locks 0
|
use_locks 0
|
||||||
gui_optimize 1
|
gui_optimize 1
|
||||||
|
```
|
||||||
|
|
||||||
and possibly add your login data to /etc/davfs2/secrets
|
and possibly add your login data to `/etc/davfs2/secrets`
|
||||||
|
|
||||||
|
```
|
||||||
/media/webdav admin secret
|
/media/webdav admin secret
|
||||||
|
```
|
||||||
|
|
||||||
Making applications work with WebDAV
|
Making applications work with WebDAV
|
||||||
-------------------------------------
|
-------------------------------------
|
||||||
|
|
||||||
Various programms have differnt strategies to save files to disc and
|
Various programms have differnt strategies to save files to disc and
|
||||||
prevent data lost under all circumstances. Those strategies often don't
|
to prevent data lost under all circumstances. Those strategies often don't
|
||||||
work very well an a WebDAV-Server. The following will list some of those
|
work very well an a WebDAV-Server. The following will list some of those
|
||||||
strategies.
|
strategies.
|
||||||
|
|
||||||
|
|
@ -79,19 +87,25 @@ the old document. If you don't want this behaviour, then tell vim
|
||||||
to not create the backup file. You can do that by either passing additional
|
to not create the backup file. You can do that by either passing additional
|
||||||
parameters to vim
|
parameters to vim
|
||||||
|
|
||||||
|
```
|
||||||
vi "+set nobackup" "+set nowritebackup" -n test.txt
|
vi "+set nobackup" "+set nowritebackup" -n test.txt
|
||||||
|
```
|
||||||
|
|
||||||
or by setting them in your .vimrc
|
or by setting them in your .vimrc
|
||||||
|
|
||||||
|
```
|
||||||
set nobackup
|
set nobackup
|
||||||
set nowritebackup
|
set nowritebackup
|
||||||
set noswapfile
|
set noswapfile
|
||||||
|
```
|
||||||
|
|
||||||
If you want to restrict the settings to the directory where the dms
|
If you want to restrict the settings to the directory where the dms
|
||||||
is mounted by webdav, e.g. /media/webdav, you can set an auto command
|
is mounted by webdav, e.g. /media/webdav, you can set an auto command
|
||||||
in .vimrc
|
in `.vimrc`
|
||||||
|
|
||||||
|
```
|
||||||
autocmd BufNewFile,BufRead /media/webdav/* set nobackup nowritebackup noswapfile
|
autocmd BufNewFile,BufRead /media/webdav/* set nobackup nowritebackup noswapfile
|
||||||
|
```
|
||||||
|
|
||||||
Creating the backup file in a directory outside of WebDAV doesn't help in
|
Creating the backup file in a directory outside of WebDAV doesn't help in
|
||||||
this case, because it still does the file renaming which is turned off by
|
this case, because it still does the file renaming which is turned off by
|
||||||
|
|
@ -107,7 +121,9 @@ If webdav access isn't working, this client is probably the best for testing.
|
||||||
|
|
||||||
Just run
|
Just run
|
||||||
|
|
||||||
|
```
|
||||||
cadaver https://<your-domain>/<your-basedir>/webdav/index.php
|
cadaver https://<your-domain>/<your-basedir>/webdav/index.php
|
||||||
|
```
|
||||||
|
|
||||||
It will ask for the user name and password. Once you are logged in just
|
It will ask for the user name and password. Once you are logged in just
|
||||||
type `help` for a list of commands.
|
type `help` for a list of commands.
|
||||||
|
|
@ -115,19 +131,27 @@ type `help` for a list of commands.
|
||||||
SeedDMS stores a lot more properties not covered by the webdav standard.
|
SeedDMS stores a lot more properties not covered by the webdav standard.
|
||||||
Those have its own namespace called 'SeedDMS:'. Just type
|
Those have its own namespace called 'SeedDMS:'. Just type
|
||||||
|
|
||||||
|
```
|
||||||
propget <resource>
|
propget <resource>
|
||||||
|
```
|
||||||
|
|
||||||
with `resource` being either the name of a folder or document. You will
|
with `resource` being either the name of a folder or document. You will
|
||||||
get a list of all properties stored for this resource. Setting a property
|
get a list of all properties stored for this resource. Setting a property
|
||||||
requires to set the namespace first
|
requires to set the namespace first
|
||||||
|
|
||||||
|
```
|
||||||
set namespace SeedDMS:
|
set namespace SeedDMS:
|
||||||
|
```
|
||||||
|
|
||||||
Afterwards, you may set a property, e.g. the comment, with
|
Afterwards, you may set a property, e.g. the comment, with
|
||||||
|
|
||||||
|
```
|
||||||
propset <resource> comment 'Just a comment'
|
propset <resource> comment 'Just a comment'
|
||||||
|
```
|
||||||
|
|
||||||
or even delete a property
|
or even delete a property
|
||||||
|
|
||||||
|
```
|
||||||
propdel <resource> comment
|
propdel <resource> comment
|
||||||
|
```
|
||||||
|
|
||||||
|
|
@ -1,59 +0,0 @@
|
||||||
OCR
|
|
||||||
====
|
|
||||||
|
|
||||||
SeedDMS itself has no support for optical character recognition (OCR)
|
|
||||||
because it does not care about the content of file. Though, external
|
|
||||||
OCR software can be used to convert an image into text and index it
|
|
||||||
by the full text search engine.
|
|
||||||
|
|
||||||
The following script can be use to convert a scanned image into pdf
|
|
||||||
with a text layer added. The script actually takes this file to
|
|
||||||
ran it through pdftotext. It was published in the seeddms forum
|
|
||||||
https://sourceforge.net/p/seeddms/discussion/general/thread/4ec5973d/
|
|
||||||
|
|
||||||
|
|
||||||
#!/bin/bash
|
|
||||||
inputpdf=$1
|
|
||||||
temp_folder=/tmp/seedinput/$(date +"%Y_%m_%d_%H%M%S")/
|
|
||||||
lockfile=/tmp/seed
|
|
||||||
protokolldatei=./tesser_syslog
|
|
||||||
cores=2
|
|
||||||
|
|
||||||
mkdir -p $lockfile
|
|
||||||
|
|
||||||
while [ -e "$lockfile"/"`basename $0`" ];
|
|
||||||
do
|
|
||||||
sleep 5
|
|
||||||
done
|
|
||||||
|
|
||||||
if ( set -o noclobber; echo "locked" > "$lockfile"/"`basename $0`"); then
|
|
||||||
|
|
||||||
trap 'rm -f "$lockfile"/"`basename $0`"; echo $(date) " Lockdatei wird geloescht: " $lockfile"/"`basename $0` Aufrufparameter: $* >> $protokolldatei ;rm -r $temp_folder; exit $?' INT TERM KILL EXIT
|
|
||||||
#das Datum mit dem Scriptnamen in die Protokolldatei schreiben
|
|
||||||
echo $(date) " Lockdatei erstellt: " $lockfile"/"`basename $0` >> $protokolldatei
|
|
||||||
|
|
||||||
else
|
|
||||||
#Script beenden falls Lockdatei nicht erstellt werden konnte
|
|
||||||
echo $(date) " Programm wird beendet, Lockdatei konnte nicht erstellt werden: $lockfile"/"`basename $0` Aufrufparameter: $* " >> $protokolldatei
|
|
||||||
exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
mkdir -p $temp_folder
|
|
||||||
|
|
||||||
$(pdftotext -raw $1 - 1> $temp_folder''tmp.txt )
|
|
||||||
pdf_contents=`cat $temp_folder''tmp.txt`
|
|
||||||
pdf_contents=`echo "$pdf_contents" | tr -dc '[:print:]'`
|
|
||||||
if [ -z "$pdf_contents" ]; then
|
|
||||||
convert -density 300 -quality 95 $inputpdf +adjoin $temp_folder''image%03d.jpg
|
|
||||||
find $temp_folder -name '*.jpg'| parallel --gnu -j $cores tesseract -l deu --psm 6 {} {} pdf
|
|
||||||
|
|
||||||
num=`find $temp_folder -name '*.pdf'| wc -l`
|
|
||||||
if [ "$num" -gt "1" ]; then
|
|
||||||
pdfunite $temp_folder*.pdf $temp_folder''tmp.pdf
|
|
||||||
else
|
|
||||||
mv $temp_folder*.pdf $temp_folder''tmp.pdf
|
|
||||||
fi
|
|
||||||
pdftotext $temp_folder''tmp.pdf $temp_folder''tmp.txt
|
|
||||||
mv $temp_folder''tmp.pdf $1
|
|
||||||
fi
|
|
||||||
cat $temp_folder''tmp.txt
|
|
||||||
63
doc/README.ocr.md
Normal file
63
doc/README.ocr.md
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
OCR
|
||||||
|
====
|
||||||
|
|
||||||
|
SeedDMS itself has no support for optical character recognition (OCR)
|
||||||
|
because it does not care about the content of file. Though, external
|
||||||
|
OCR software can be used to convert an image into text and index it
|
||||||
|
by the full text search engine. From SeedDMS point of view, it would
|
||||||
|
be sufficient to have a conversion service which converts an image
|
||||||
|
into text. This can be implemented in any possible way, but most
|
||||||
|
likely as a SeedDMS extension.
|
||||||
|
|
||||||
|
The following script can be use to convert a pdf with scanned images
|
||||||
|
into a text. The script converts any page into a image, runs it through
|
||||||
|
tesseract, which creates a pdf again containing a text layer. All those
|
||||||
|
pdf documents will be united into a single pdf and through `pdftotext` again.
|
||||||
|
It was published in the SeedDMS forum
|
||||||
|
https://sourceforge.net/p/seeddms/discussion/general/thread/4ec5973d/
|
||||||
|
|
||||||
|
```
|
||||||
|
#!/bin/bash
|
||||||
|
inputpdf=$1
|
||||||
|
temp_folder=/tmp/seedinput/$(date +"%Y_%m_%d_%H%M%S")/
|
||||||
|
lockfile=/tmp/seed
|
||||||
|
protokolldatei=./tesser_syslog
|
||||||
|
cores=2
|
||||||
|
|
||||||
|
mkdir -p $lockfile
|
||||||
|
|
||||||
|
while [ -e "$lockfile"/"`basename $0`" ];
|
||||||
|
do
|
||||||
|
sleep 5
|
||||||
|
done
|
||||||
|
|
||||||
|
if ( set -o noclobber; echo "locked" > "$lockfile"/"`basename $0`"); then
|
||||||
|
trap 'rm -f "$lockfile"/"`basename $0`"; echo $(date) " Lock file will be deleted: " $lockfile"/"`basename $0` Aufrufparameter: $* >> $protokolldatei ;rm -r $temp_folder; exit $?' INT TERM KILL EXIT
|
||||||
|
# write date and script name into log file
|
||||||
|
echo $(date) " Lock file created: " $lockfile"/"`basename $0` >> $protokolldatei
|
||||||
|
else
|
||||||
|
# Exit script if lock file could not be created
|
||||||
|
echo $(date) " Script will exit, because lock file could not be created: $lockfile"/"`basename $0` Aufrufparameter: $* " >> $protokolldatei
|
||||||
|
exit 1
|
||||||
|
fi
|
||||||
|
|
||||||
|
mkdir -p $temp_folder
|
||||||
|
|
||||||
|
$(pdftotext -raw $1 - 1> $temp_folder''tmp.txt )
|
||||||
|
pdf_contents=`cat $temp_folder''tmp.txt`
|
||||||
|
pdf_contents=`echo "$pdf_contents" | tr -dc '[:print:]'`
|
||||||
|
if [ -z "$pdf_contents" ]; then
|
||||||
|
convert -density 300 -quality 95 $inputpdf +adjoin $temp_folder''image%03d.jpg
|
||||||
|
find $temp_folder -name '*.jpg'| parallel --gnu -j $cores tesseract -l deu --psm 6 {} {} pdf
|
||||||
|
|
||||||
|
num=`find $temp_folder -name '*.pdf'| wc -l`
|
||||||
|
if [ "$num" -gt "1" ]; then
|
||||||
|
pdfunite $temp_folder*.pdf $temp_folder''tmp.pdf
|
||||||
|
else
|
||||||
|
mv $temp_folder*.pdf $temp_folder''tmp.pdf
|
||||||
|
fi
|
||||||
|
pdftotext $temp_folder''tmp.pdf $temp_folder''tmp.txt
|
||||||
|
mv $temp_folder''tmp.pdf $1
|
||||||
|
fi
|
||||||
|
cat $temp_folder''tmp.txt
|
||||||
|
```
|
||||||
Loading…
Reference in New Issue
Block a user