initial version

This commit is contained in:
Niels Lippke 2020-01-17 14:44:07 +01:00
commit 8143adbaf5
7 changed files with 2229 additions and 0 deletions

33
Dockerfile Normal file
View File

@ -0,0 +1,33 @@
FROM php:7.4-apache
LABEL maintainer="Niels Lippke<nlippke@gmx.de>"
ENV VER 5.1.13
# Update and install necessary packages
RUN apt-get update && apt-get install --no-install-recommends gnumeric libpng-dev catdoc poppler-utils \
id3 docx2txt tesseract-ocr tesseract-ocr-deu ocrmypdf imagemagick vim parallel dos2unix cron -y
RUN docker-php-ext-install gd mysqli pdo pdo_mysql && \
pear channel-update pear.php.net && pear install Log
# Get seeddms
RUN curl -fsSL https://downloads.sourceforge.net/project/seeddms/seeddms-${VER}/seeddms-quickstart-${VER}.tar.gz | tar -xzC /var/www
RUN mv /var/www/seeddms51x /var/www/seeddms && touch /var/www/seeddms/data/conf/ENABLE_INSTALL_TOOL
# Copy settings-files
COPY sources/php.ini /usr/local/etc/php/
COPY sources/000-default.conf /etc/apache2/sites-available/
COPY sources/settings.xml /var/www/seeddms/data/conf/settings.xml
COPY sources/ocrmypdf.sh /usr/local/bin
COPY sources/seeddms-entrypoint /usr/local/bin
RUN chown -R www-data:www-data /var/www/seeddms/ && \
dos2unix /usr/local/bin/ocrmypdf.sh && chmod a+rx /usr/local/bin/ocrmypdf.sh && \
dos2unix /usr/local/bin/seeddms-entrypoint && chmod a+rx /usr/local/bin/seeddms-entrypoint && \
a2enmod rewrite
RUN cp -a /var/www/seeddms/data /var/www/seeddms/data.bak
# Volumes to mount
VOLUME [ "/var/www/seeddms/data", "/var/www/seeddms/www/ext" ]
ENTRYPOINT [ "/usr/local/bin/seeddms-entrypoint"]
CMD ["apache2-foreground"]

15
README.md Normal file
View File

@ -0,0 +1,15 @@
# Information
Builds a docker image for seeddms (https://www.seeddms.org).
This image supports OCR processing for images and PDFs.
## How to run
`docker run --name seeddms -d -v <local>:/var/www/seeddms/data -p 8080:80 seeddms`
## Additional information
The image is base on https://github.com/ludwigprager/docker-seeddms.

22
sources/000-default.conf Normal file
View File

@ -0,0 +1,22 @@
<Directory "/var/www/seeddms/www/">
DirectoryIndex index.php
AllowOverride All
Order deny,allow
Deny from all
Allow from all
</Directory>
<VirtualHost *:80>
DocumentRoot "/var/www/seeddms/www/"
ErrorLog ${APACHE_LOG_DIR}/error.log
CustomLog ${APACHE_LOG_DIR}/access.log combined
</VirtualHost>
<Directory "/var/www/seeddms/www/">
DirectoryIndex index.php
AllowOverride All
Order deny,allow
Deny from all
Allow from all
</Directory>

29
sources/ocrmypdf.sh Executable file
View File

@ -0,0 +1,29 @@
#!/bin/bash
inputpdf=$1
tmpdir=/tmp/seed
lockfile=$tmpdir/`basename $0`
cores=2
mkdir -p $tmpdir
while [ -e "$lockfile" ];
do
sleep 5
done
if ( set -o noclobber; echo "locked" > "$lockfile"); then
trap 'rm -f "$lockfile"; exit $?' INT TERM KILL EXIT
else
exit 1
fi
pdf_contents=`pdftotext -nopgbrk $1 - | sed -e 's/ [a-zA-Z0-9.]\{1\} / /g' -e 's/[0-9.]//g'`
if [ -z "$pdf_contents" ]; then
echo "ocrmypdf $1"
tmpfile=$tmpdir/`date +%s%N`
ocrmypdf -l deu --rotate-pages --jobs $cores --output-type pdfa $1 $tmpfile 2> /dev/null
pdf_contents=`pdftotext -nopgbrk $tmpfile - | sed -e 's/ [a-zA-Z0-9.]\{1\} / /g' -e 's/[0-9.]//g'`
mv $tmpfile $1
fi
echo $pdf_contents

1949
sources/php.ini Normal file

File diff suppressed because it is too large Load Diff

20
sources/seeddms-entrypoint Executable file
View File

@ -0,0 +1,20 @@
#!/bin/sh
set -e
echo "args $@"
# first run?
if [ ! -f /var/www/seeddms/data/content.db ]; then
mkdir -p /var/www/seeddms/data
cp -a /var/www/seeddms/data.bak/* /var/www/seeddms/data
fi
if [ ! -L "/var/www/seeddms/conf" ]; then
rm -rf /var/www/seeddms/conf
ln -s /var/www/seeddms/data/conf /var/www/seeddms/conf
fi
rm -f /var/run/cron*
cron
. /usr/local/bin/docker-php-entrypoint

161
sources/settings.xml Normal file
View File

@ -0,0 +1,161 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<site>
<!-- siteName: Name of site used in the page titles. Default: SeedDMS
- footNote: Message to display at the bottom of every page
- printDisclaimer: if true the disclaimer message the lang.inc files will be print on the bottom of the page
- language: default language (name of a subfolder in folder "languages")
- theme: default style (name of a subfolder in folder "styles")
-->
<display siteName="SeedDMS" footNote="" printDisclaimer="false" language="de_DE" theme="bootstrap" previewWidthList="40" previewWidthDetail="100" availablelanguages="" showFullPreview="true" convertToPdf="true" onePageMode="on" previewWidthMenuList="40" previewWidthDropFolderList="100" maxItemsPerPage="0" incItemsPerPage="0">
</display>
<!-- strictFormCheck: Strict form checking. If set to true, then all fields in the form will be checked for a value. If set to false, then (most) comments and keyword fields become optional. Comments are always required when submitting a review or overriding document status.
- viewOnlineFileTypes: files with one of the following endings can be viewed online (USE ONLY LOWER CASE CHARACTERS)
- enableConverting: enable/disable converting of files
- enableEmail: enable/disable automatic email notification
- enableUsersView: enable/disable group and user view for all users
- enableFullSearch: false to don't use fulltext search
- enableLanguageSelector: false to don't show the language selector after login
- enableClipboard: false to hide the clipboard
- enableFolderTree: false to don't show the folder tree
- expandFolderTree: 0 to start with tree hidden
- 1 to start with tree shown and first level expanded
- 2 to start with tree shown fully expanded
- stopWordsFile: path to stop word file for indexer
- sortUsersInList: how to sort users in lists ('fullname' or '' (default))
-->
<edition strictFormCheck="false" viewOnlineFileTypes=".txt;.text;.html;.htm;.xml;.pdf;.gif;.png;.jpg;.jpeg" enableConverting="true" enableEmail="false" enableUsersView="true" enableFullSearch="true" enableClipboard="true" enableFolderTree="true" expandFolderTree="1" enableLanguageSelector="true" stopWordsFile="/var/www/seeddms/data/conf/stopwords.txt" sortUsersInList="" enableDropUpload="true" enableRecursiveCount="false" maxRecursiveCount="0" enableThemeSelector="false" fullSearchEngine="sqlitefts" sortFoldersDefault="u" editOnlineFileTypes=".txt;.text" enableMenuTasks="false" enableHelp="false" defaultSearchMethod="fulltext" libraryFolder="0" maxSizeForFullText="0" showSingleSearchHit="false" enableSessionList="false" enableDropFolderList="true" enableMultiUpload="true" defaultDocPosition="end">
</edition>
<!-- enableCalendar: enable/disable calendar
- calendarDefaultView: calendar default view ("w" for week,"m" for month,"y" for year)
- firstDayOfWeek: first day of the week (0=sunday, 6=saturday)
-->
<calendar enableCalendar="false" calendarDefaultView="y" firstDayOfWeek="0">
</calendar>
<webdav enableWebdavReplaceDoc="true"/></site>
<system>
<!-- rootDir: Path to where SeedDMS is located
- httpRoot: The relative path in the URL, after the domain part. Do not include the
- http:// prefix or the web host name. e.g. If the full URL is
- http://www.example.com/seeddms/, set $_httpRoot = "/seeddms/".
- If the URL is http://www.example.com/, set $_httpRoot = "/".
- contentDir: Where the uploaded files are stored (best to choose a directory that
- is not accessible through your web-server)
- stagingDir: Where partial file uploads are saved
- luceneDir: Where the lucene fulltext index iѕ saved
- logFileEnable: set false to disable log system
- logFileRotation: the log file rotation (h=hourly, d=daily, m=monthly)
- enableLargeFileUpload: support for jumploader
- partitionsize: size of chunk uploaded by jumploader
- dropFolderDir: where files for document upload are located
- cacheDir: where the preview images are saved
- backupDir: where the backups are saved
-->
<server rootDir="/var/www/seeddms/seeddms-5.1.13/" httpRoot="/" contentDir="/var/www/seeddms/data/" stagingDir="/var/www/seeddms/data/staging/" luceneDir="/var/www/seeddms/data/lucene/" logFileEnable="true" logFileRotation="d" enableLargeFileUpload="false" partitionSize="2000000" cacheDir="/var/www/seeddms/data/cache/" dropFolderDir="" backupDir="/var/www/seeddms/data/backup/" checkOutDir="" createCheckOutDir="false" repositoryUrl="https://repository.seeddms.org/dd278323b03fd2aa5510b8fa9addf5ea/" maxUploadSize="" enableXsendfile="false">
</server>
<!-- enableGuestLogin: If you want anybody to login as guest, set the following line to true
- note: guest login should be used only in a trusted environment
- enablePasswordForgotten: Allow users to reset their password
- restricted: Restricted access: only allow users to log in if they have an entry in the local database (irrespective of successful authentication with LDAP).
- enableUserImage: enable users images
- disableSelfEdit: if true user cannot edit his own profile
- passwordStrength: minimum strength of password, set to 0 to disable
- passwordExpiration: number of days after password expires
- passwordHistory: number of remembered passwords
- passwordStrengthAlgorithm: algorithm used to calculate password strenght (simple or advanced)
- encryptionKey: arbitrary string used for creating identifiers
-->
<authentication enableGuestLogin="false" enablePasswordForgotten="true" restricted="true" enableUserImage="true" disableSelfEdit="false" passwordStrength="0" passwordStrengthAlgorithm="simple" passwordExpiration="10" passwordHistory="0" loginFailure="0" autoLoginUser="0" quota="0" undelUserIds="" encryptionKey="cfecb42d13f2e1666cddde56991a2cbf" cookieLifetime="0" enableGuestAutoLogin="false" defaultAccessDocs="0">
<connectors>
<!-- ***** CONNECTOR LDAP *****
- enable: enable/disable connector
- type: type of connector ldap / AD
- host: hostname of the authentification server
- URIs are supported, e.g.: ldaps://ldap.host.com
- port: port of the authentification server
- baseDN: top level of the LDAP directory tree
-->
<connector enable="false" type="ldap" host="ldaps://ldap.host.com" port="389" baseDN="" bindDN="" bindPw="">
</connector>
<!-- ***** CONNECTOR Microsoft Active Directory *****
- enable: enable/disable connector
- type: type of connector ldap / AD
- host: hostname of the authentification server
- port: port of the authentification server
- baseDN: top level of the LDAP directory tree
- accountDomainName: sample: example.com
-->
<connector enable="false" type="AD" host="ldap.example.com" port="389" baseDN="" accountDomainName="example.com" bindDN="" bindPw="">
</connector>
</connectors>
</authentication>
<!--
- dbDriver: DB-Driver used by adodb (see adodb-readme)
- dbHostname: DB-Server
- dbDatabase: database where the tables for seeddms are stored (optional - see adodb-readme)
- dbUser: username for database-access
- dbPass: password for database-access
-->
<database dbDriver="sqlite" dbHostname="localhost" dbDatabase="/var/www/seeddms/data/content.db" dbUser="seeddms" dbPass="seeddms" doNotCheckVersion="false">
</database>
<!-- smtpServer: SMTP Server hostname
- smtpPort: SMTP Server port
- smtpSendFrom: Send from
-->
<smtp smtpServer="localhost" smtpPort="25" smtpSendFrom="seeddms@localhost" smtpUser="" smtpPassword=""/>
</system>
<advanced>
<!-- siteDefaultPage: Default page on login. Defaults to out/out.ViewFolder.php
- rootFolderID: ID of root-folder (mostly no need to change)
- titleDisplayHack: Workaround for page titles that go over more than 2 lines.
-->
<display siteDefaultPage="" rootFolderID="1" titleDisplayHack="true" showMissingTranslations="false">
</display>
<!-- guestID: ID of guest-user used when logged in as guest (mostly no need to change)
- adminIP: if enabled admin can login only by specified IP addres, leave empty to avoid the control
- NOTE: works only with local autentication (no LDAP)
-->
<authentication guestID="2" adminIP="">
</authentication>
<!-- enableAdminRevApp: false to don't list administrator as reviewer/approver
- versioningFileName: the name of the versioning info file created by the backup tool
- workflowMode: 'traditional' or 'advanced'
- enableVersionDeletion: allow to delete versions after approval
- enableVersionModification: allow to modify versions after approval
- enableDuplicateDocNames: allow duplicate names in a folder
-->
<edition enableAdminRevApp="false" versioningFileName="versioning_info.txt" workflowMode="traditional_only_approval" enableVersionDeletion="true" enableVersionModification="true" enableDuplicateDocNames="true" enableOwnerRevApp="false" enableSelfRevApp="false" presetExpirationDate="" overrideMimeType="false" initialDocumentStatus="0" enableAcknowledgeWorkflow="" enableRevisionWorkflow="" advancedAcl="false" enableUpdateRevApp="false" removeFromDropFolder="false" allowReviewerOnly="false" enableDuplicateSubFolderNames="false">
</edition>
<!-- enableNotificationAppRev: true to send notifation if a user is added as a reviewer or approver
-->
<notification enableNotificationAppRev="true" enableOwnerNotification="false" enableNotificationWorkflow="false">
</notification>
<!-- coreDir: Path to SeedDMS_Core (optional)
- luceneClassDir: Path to SeedDMS_Lucene (optional)
- contentOffsetDir: To work around limitations in the underlying file system, a new
- directory structure has been devised that exists within the content
- directory ($_contentDir). This requires a base directory from which
- to begin. Usually leave this to the default setting, 1048576, but can
- be any number or string that does not already exist within $_contentDir.
- maxDirID: Maximum number of sub-directories per parent directory. Default: 0, use 31998 (maximum number of dirs in ext3) for a multi level content directory.
- updateNotifyTime: users are notified about document-changes that took place within the last "updateNotifyTime" seconds
- extraPath: Path to addtional software. This is the directory containing additional software like the adodb directory, or the pear Log package. This path will be added to the php include path
-->
<server coreDir="" luceneClassDir="" contentOffsetDir="1048576" maxDirID="0" updateNotifyTime="86400" extraPath="/var/www/seeddms/pear/" maxExecutionTime="30" cmdTimeout="30">
</server>
<converters target="fulltext">
<converter mimeType="application/pdf">ocrmypdf.sh %s</converter>
<converter mimeType="application/msword">catdoc %s</converter>
<converter mimeType="application/vnd.ms-excel">ssconvert -T Gnumeric_stf:stf_csv -S %s fd://1</converter>
<converter mimeType="audio/mp3">id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g'</converter>
<converter mimeType="audio/mpeg">id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g'</converter>
<converter mimeType="text/plain">cat %s</converter>
<converter mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet">xlsx2csv %s</converter><converter mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document">docx2txt %s -</converter><converter mimeType="image/jpeg">tesseract %s - -l deu --psm 3 --oem 3 2&gt; /dev/null | tr '\n' ' '</converter></converters>
<converters target="preview"><converter mimeType="image/*">convert -resize %wx '%f' '%o'</converter><converter mimeType="application/pdf">convert -density 100 -resize %wx '%f[0]' '%o'</converter><converter mimeType="application/postscript">convert -density 100 -resize %wx '%f[0]' '%o'</converter><converter mimeType="text/plain">a2ps -1 -a1 -R -B -o - '%f' | gs -dBATCH -dNOPAUSE -sDEVICE=pngalpha -dFirstPage=1 -dLastPage=1 -dPDFFitPage -r72x72 -sOutputFile=- -q - | convert -resize %wx png:- '%o'</converter></converters></advanced>
<extensions><extension name="example"/></extensions></configuration>