initial version
This commit is contained in:
commit
8143adbaf5
33
Dockerfile
Normal file
33
Dockerfile
Normal file
|
@ -0,0 +1,33 @@
|
|||
FROM php:7.4-apache
|
||||
LABEL maintainer="Niels Lippke<nlippke@gmx.de>"
|
||||
ENV VER 5.1.13
|
||||
|
||||
# Update and install necessary packages
|
||||
RUN apt-get update && apt-get install --no-install-recommends gnumeric libpng-dev catdoc poppler-utils \
|
||||
id3 docx2txt tesseract-ocr tesseract-ocr-deu ocrmypdf imagemagick vim parallel dos2unix cron -y
|
||||
RUN docker-php-ext-install gd mysqli pdo pdo_mysql && \
|
||||
pear channel-update pear.php.net && pear install Log
|
||||
|
||||
# Get seeddms
|
||||
RUN curl -fsSL https://downloads.sourceforge.net/project/seeddms/seeddms-${VER}/seeddms-quickstart-${VER}.tar.gz | tar -xzC /var/www
|
||||
RUN mv /var/www/seeddms51x /var/www/seeddms && touch /var/www/seeddms/data/conf/ENABLE_INSTALL_TOOL
|
||||
|
||||
# Copy settings-files
|
||||
COPY sources/php.ini /usr/local/etc/php/
|
||||
COPY sources/000-default.conf /etc/apache2/sites-available/
|
||||
COPY sources/settings.xml /var/www/seeddms/data/conf/settings.xml
|
||||
COPY sources/ocrmypdf.sh /usr/local/bin
|
||||
COPY sources/seeddms-entrypoint /usr/local/bin
|
||||
|
||||
RUN chown -R www-data:www-data /var/www/seeddms/ && \
|
||||
dos2unix /usr/local/bin/ocrmypdf.sh && chmod a+rx /usr/local/bin/ocrmypdf.sh && \
|
||||
dos2unix /usr/local/bin/seeddms-entrypoint && chmod a+rx /usr/local/bin/seeddms-entrypoint && \
|
||||
a2enmod rewrite
|
||||
|
||||
RUN cp -a /var/www/seeddms/data /var/www/seeddms/data.bak
|
||||
|
||||
# Volumes to mount
|
||||
VOLUME [ "/var/www/seeddms/data", "/var/www/seeddms/www/ext" ]
|
||||
|
||||
ENTRYPOINT [ "/usr/local/bin/seeddms-entrypoint"]
|
||||
CMD ["apache2-foreground"]
|
15
README.md
Normal file
15
README.md
Normal file
|
@ -0,0 +1,15 @@
|
|||
# Information
|
||||
|
||||
Builds a docker image for seeddms (https://www.seeddms.org).
|
||||
|
||||
This image supports OCR processing for images and PDFs.
|
||||
|
||||
## How to run
|
||||
|
||||
`docker run --name seeddms -d -v <local>:/var/www/seeddms/data -p 8080:80 seeddms`
|
||||
|
||||
## Additional information
|
||||
|
||||
The image is base on https://github.com/ludwigprager/docker-seeddms.
|
||||
|
||||
|
22
sources/000-default.conf
Normal file
22
sources/000-default.conf
Normal file
|
@ -0,0 +1,22 @@
|
|||
<Directory "/var/www/seeddms/www/">
|
||||
DirectoryIndex index.php
|
||||
AllowOverride All
|
||||
Order deny,allow
|
||||
Deny from all
|
||||
Allow from all
|
||||
</Directory>
|
||||
|
||||
<VirtualHost *:80>
|
||||
DocumentRoot "/var/www/seeddms/www/"
|
||||
ErrorLog ${APACHE_LOG_DIR}/error.log
|
||||
CustomLog ${APACHE_LOG_DIR}/access.log combined
|
||||
</VirtualHost>
|
||||
|
||||
|
||||
<Directory "/var/www/seeddms/www/">
|
||||
DirectoryIndex index.php
|
||||
AllowOverride All
|
||||
Order deny,allow
|
||||
Deny from all
|
||||
Allow from all
|
||||
</Directory>
|
29
sources/ocrmypdf.sh
Executable file
29
sources/ocrmypdf.sh
Executable file
|
@ -0,0 +1,29 @@
|
|||
#!/bin/bash
|
||||
|
||||
inputpdf=$1
|
||||
tmpdir=/tmp/seed
|
||||
lockfile=$tmpdir/`basename $0`
|
||||
cores=2
|
||||
|
||||
mkdir -p $tmpdir
|
||||
|
||||
while [ -e "$lockfile" ];
|
||||
do
|
||||
sleep 5
|
||||
done
|
||||
|
||||
if ( set -o noclobber; echo "locked" > "$lockfile"); then
|
||||
trap 'rm -f "$lockfile"; exit $?' INT TERM KILL EXIT
|
||||
else
|
||||
exit 1
|
||||
fi
|
||||
|
||||
pdf_contents=`pdftotext -nopgbrk $1 - | sed -e 's/ [a-zA-Z0-9.]\{1\} / /g' -e 's/[0-9.]//g'`
|
||||
if [ -z "$pdf_contents" ]; then
|
||||
echo "ocrmypdf $1"
|
||||
tmpfile=$tmpdir/`date +%s%N`
|
||||
ocrmypdf -l deu --rotate-pages --jobs $cores --output-type pdfa $1 $tmpfile 2> /dev/null
|
||||
pdf_contents=`pdftotext -nopgbrk $tmpfile - | sed -e 's/ [a-zA-Z0-9.]\{1\} / /g' -e 's/[0-9.]//g'`
|
||||
mv $tmpfile $1
|
||||
fi
|
||||
echo $pdf_contents
|
1949
sources/php.ini
Normal file
1949
sources/php.ini
Normal file
File diff suppressed because it is too large
Load Diff
20
sources/seeddms-entrypoint
Executable file
20
sources/seeddms-entrypoint
Executable file
|
@ -0,0 +1,20 @@
|
|||
#!/bin/sh
|
||||
set -e
|
||||
|
||||
echo "args $@"
|
||||
|
||||
# first run?
|
||||
if [ ! -f /var/www/seeddms/data/content.db ]; then
|
||||
mkdir -p /var/www/seeddms/data
|
||||
cp -a /var/www/seeddms/data.bak/* /var/www/seeddms/data
|
||||
fi
|
||||
|
||||
if [ ! -L "/var/www/seeddms/conf" ]; then
|
||||
rm -rf /var/www/seeddms/conf
|
||||
ln -s /var/www/seeddms/data/conf /var/www/seeddms/conf
|
||||
fi
|
||||
|
||||
rm -f /var/run/cron*
|
||||
cron
|
||||
|
||||
. /usr/local/bin/docker-php-entrypoint
|
161
sources/settings.xml
Normal file
161
sources/settings.xml
Normal file
|
@ -0,0 +1,161 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<configuration>
|
||||
<site>
|
||||
<!-- siteName: Name of site used in the page titles. Default: SeedDMS
|
||||
- footNote: Message to display at the bottom of every page
|
||||
- printDisclaimer: if true the disclaimer message the lang.inc files will be print on the bottom of the page
|
||||
- language: default language (name of a subfolder in folder "languages")
|
||||
- theme: default style (name of a subfolder in folder "styles")
|
||||
-->
|
||||
<display siteName="SeedDMS" footNote="" printDisclaimer="false" language="de_DE" theme="bootstrap" previewWidthList="40" previewWidthDetail="100" availablelanguages="" showFullPreview="true" convertToPdf="true" onePageMode="on" previewWidthMenuList="40" previewWidthDropFolderList="100" maxItemsPerPage="0" incItemsPerPage="0">
|
||||
</display>
|
||||
<!-- strictFormCheck: Strict form checking. If set to true, then all fields in the form will be checked for a value. If set to false, then (most) comments and keyword fields become optional. Comments are always required when submitting a review or overriding document status.
|
||||
- viewOnlineFileTypes: files with one of the following endings can be viewed online (USE ONLY LOWER CASE CHARACTERS)
|
||||
- enableConverting: enable/disable converting of files
|
||||
- enableEmail: enable/disable automatic email notification
|
||||
- enableUsersView: enable/disable group and user view for all users
|
||||
- enableFullSearch: false to don't use fulltext search
|
||||
- enableLanguageSelector: false to don't show the language selector after login
|
||||
- enableClipboard: false to hide the clipboard
|
||||
- enableFolderTree: false to don't show the folder tree
|
||||
- expandFolderTree: 0 to start with tree hidden
|
||||
- 1 to start with tree shown and first level expanded
|
||||
- 2 to start with tree shown fully expanded
|
||||
- stopWordsFile: path to stop word file for indexer
|
||||
- sortUsersInList: how to sort users in lists ('fullname' or '' (default))
|
||||
-->
|
||||
<edition strictFormCheck="false" viewOnlineFileTypes=".txt;.text;.html;.htm;.xml;.pdf;.gif;.png;.jpg;.jpeg" enableConverting="true" enableEmail="false" enableUsersView="true" enableFullSearch="true" enableClipboard="true" enableFolderTree="true" expandFolderTree="1" enableLanguageSelector="true" stopWordsFile="/var/www/seeddms/data/conf/stopwords.txt" sortUsersInList="" enableDropUpload="true" enableRecursiveCount="false" maxRecursiveCount="0" enableThemeSelector="false" fullSearchEngine="sqlitefts" sortFoldersDefault="u" editOnlineFileTypes=".txt;.text" enableMenuTasks="false" enableHelp="false" defaultSearchMethod="fulltext" libraryFolder="0" maxSizeForFullText="0" showSingleSearchHit="false" enableSessionList="false" enableDropFolderList="true" enableMultiUpload="true" defaultDocPosition="end">
|
||||
</edition>
|
||||
<!-- enableCalendar: enable/disable calendar
|
||||
- calendarDefaultView: calendar default view ("w" for week,"m" for month,"y" for year)
|
||||
- firstDayOfWeek: first day of the week (0=sunday, 6=saturday)
|
||||
-->
|
||||
<calendar enableCalendar="false" calendarDefaultView="y" firstDayOfWeek="0">
|
||||
</calendar>
|
||||
<webdav enableWebdavReplaceDoc="true"/></site>
|
||||
|
||||
<system>
|
||||
<!-- rootDir: Path to where SeedDMS is located
|
||||
- httpRoot: The relative path in the URL, after the domain part. Do not include the
|
||||
- http:// prefix or the web host name. e.g. If the full URL is
|
||||
- http://www.example.com/seeddms/, set $_httpRoot = "/seeddms/".
|
||||
- If the URL is http://www.example.com/, set $_httpRoot = "/".
|
||||
- contentDir: Where the uploaded files are stored (best to choose a directory that
|
||||
- is not accessible through your web-server)
|
||||
- stagingDir: Where partial file uploads are saved
|
||||
- luceneDir: Where the lucene fulltext index iѕ saved
|
||||
- logFileEnable: set false to disable log system
|
||||
- logFileRotation: the log file rotation (h=hourly, d=daily, m=monthly)
|
||||
- enableLargeFileUpload: support for jumploader
|
||||
- partitionsize: size of chunk uploaded by jumploader
|
||||
- dropFolderDir: where files for document upload are located
|
||||
- cacheDir: where the preview images are saved
|
||||
- backupDir: where the backups are saved
|
||||
-->
|
||||
<server rootDir="/var/www/seeddms/seeddms-5.1.13/" httpRoot="/" contentDir="/var/www/seeddms/data/" stagingDir="/var/www/seeddms/data/staging/" luceneDir="/var/www/seeddms/data/lucene/" logFileEnable="true" logFileRotation="d" enableLargeFileUpload="false" partitionSize="2000000" cacheDir="/var/www/seeddms/data/cache/" dropFolderDir="" backupDir="/var/www/seeddms/data/backup/" checkOutDir="" createCheckOutDir="false" repositoryUrl="https://repository.seeddms.org/dd278323b03fd2aa5510b8fa9addf5ea/" maxUploadSize="" enableXsendfile="false">
|
||||
</server>
|
||||
|
||||
<!-- enableGuestLogin: If you want anybody to login as guest, set the following line to true
|
||||
- note: guest login should be used only in a trusted environment
|
||||
- enablePasswordForgotten: Allow users to reset their password
|
||||
- restricted: Restricted access: only allow users to log in if they have an entry in the local database (irrespective of successful authentication with LDAP).
|
||||
- enableUserImage: enable users images
|
||||
- disableSelfEdit: if true user cannot edit his own profile
|
||||
- passwordStrength: minimum strength of password, set to 0 to disable
|
||||
- passwordExpiration: number of days after password expires
|
||||
- passwordHistory: number of remembered passwords
|
||||
- passwordStrengthAlgorithm: algorithm used to calculate password strenght (simple or advanced)
|
||||
- encryptionKey: arbitrary string used for creating identifiers
|
||||
-->
|
||||
<authentication enableGuestLogin="false" enablePasswordForgotten="true" restricted="true" enableUserImage="true" disableSelfEdit="false" passwordStrength="0" passwordStrengthAlgorithm="simple" passwordExpiration="10" passwordHistory="0" loginFailure="0" autoLoginUser="0" quota="0" undelUserIds="" encryptionKey="cfecb42d13f2e1666cddde56991a2cbf" cookieLifetime="0" enableGuestAutoLogin="false" defaultAccessDocs="0">
|
||||
<connectors>
|
||||
<!-- ***** CONNECTOR LDAP *****
|
||||
- enable: enable/disable connector
|
||||
- type: type of connector ldap / AD
|
||||
- host: hostname of the authentification server
|
||||
- URIs are supported, e.g.: ldaps://ldap.host.com
|
||||
- port: port of the authentification server
|
||||
- baseDN: top level of the LDAP directory tree
|
||||
-->
|
||||
<connector enable="false" type="ldap" host="ldaps://ldap.host.com" port="389" baseDN="" bindDN="" bindPw="">
|
||||
</connector>
|
||||
<!-- ***** CONNECTOR Microsoft Active Directory *****
|
||||
- enable: enable/disable connector
|
||||
- type: type of connector ldap / AD
|
||||
- host: hostname of the authentification server
|
||||
- port: port of the authentification server
|
||||
- baseDN: top level of the LDAP directory tree
|
||||
- accountDomainName: sample: example.com
|
||||
-->
|
||||
<connector enable="false" type="AD" host="ldap.example.com" port="389" baseDN="" accountDomainName="example.com" bindDN="" bindPw="">
|
||||
</connector>
|
||||
</connectors>
|
||||
</authentication>
|
||||
<!--
|
||||
- dbDriver: DB-Driver used by adodb (see adodb-readme)
|
||||
- dbHostname: DB-Server
|
||||
- dbDatabase: database where the tables for seeddms are stored (optional - see adodb-readme)
|
||||
- dbUser: username for database-access
|
||||
- dbPass: password for database-access
|
||||
-->
|
||||
<database dbDriver="sqlite" dbHostname="localhost" dbDatabase="/var/www/seeddms/data/content.db" dbUser="seeddms" dbPass="seeddms" doNotCheckVersion="false">
|
||||
</database>
|
||||
<!-- smtpServer: SMTP Server hostname
|
||||
- smtpPort: SMTP Server port
|
||||
- smtpSendFrom: Send from
|
||||
-->
|
||||
<smtp smtpServer="localhost" smtpPort="25" smtpSendFrom="seeddms@localhost" smtpUser="" smtpPassword=""/>
|
||||
</system>
|
||||
|
||||
|
||||
<advanced>
|
||||
<!-- siteDefaultPage: Default page on login. Defaults to out/out.ViewFolder.php
|
||||
- rootFolderID: ID of root-folder (mostly no need to change)
|
||||
- titleDisplayHack: Workaround for page titles that go over more than 2 lines.
|
||||
-->
|
||||
<display siteDefaultPage="" rootFolderID="1" titleDisplayHack="true" showMissingTranslations="false">
|
||||
</display>
|
||||
<!-- guestID: ID of guest-user used when logged in as guest (mostly no need to change)
|
||||
- adminIP: if enabled admin can login only by specified IP addres, leave empty to avoid the control
|
||||
- NOTE: works only with local autentication (no LDAP)
|
||||
-->
|
||||
<authentication guestID="2" adminIP="">
|
||||
</authentication>
|
||||
<!-- enableAdminRevApp: false to don't list administrator as reviewer/approver
|
||||
- versioningFileName: the name of the versioning info file created by the backup tool
|
||||
- workflowMode: 'traditional' or 'advanced'
|
||||
- enableVersionDeletion: allow to delete versions after approval
|
||||
- enableVersionModification: allow to modify versions after approval
|
||||
- enableDuplicateDocNames: allow duplicate names in a folder
|
||||
-->
|
||||
<edition enableAdminRevApp="false" versioningFileName="versioning_info.txt" workflowMode="traditional_only_approval" enableVersionDeletion="true" enableVersionModification="true" enableDuplicateDocNames="true" enableOwnerRevApp="false" enableSelfRevApp="false" presetExpirationDate="" overrideMimeType="false" initialDocumentStatus="0" enableAcknowledgeWorkflow="" enableRevisionWorkflow="" advancedAcl="false" enableUpdateRevApp="false" removeFromDropFolder="false" allowReviewerOnly="false" enableDuplicateSubFolderNames="false">
|
||||
</edition>
|
||||
<!-- enableNotificationAppRev: true to send notifation if a user is added as a reviewer or approver
|
||||
-->
|
||||
<notification enableNotificationAppRev="true" enableOwnerNotification="false" enableNotificationWorkflow="false">
|
||||
</notification>
|
||||
<!-- coreDir: Path to SeedDMS_Core (optional)
|
||||
- luceneClassDir: Path to SeedDMS_Lucene (optional)
|
||||
- contentOffsetDir: To work around limitations in the underlying file system, a new
|
||||
- directory structure has been devised that exists within the content
|
||||
- directory ($_contentDir). This requires a base directory from which
|
||||
- to begin. Usually leave this to the default setting, 1048576, but can
|
||||
- be any number or string that does not already exist within $_contentDir.
|
||||
- maxDirID: Maximum number of sub-directories per parent directory. Default: 0, use 31998 (maximum number of dirs in ext3) for a multi level content directory.
|
||||
- updateNotifyTime: users are notified about document-changes that took place within the last "updateNotifyTime" seconds
|
||||
- extraPath: Path to addtional software. This is the directory containing additional software like the adodb directory, or the pear Log package. This path will be added to the php include path
|
||||
-->
|
||||
<server coreDir="" luceneClassDir="" contentOffsetDir="1048576" maxDirID="0" updateNotifyTime="86400" extraPath="/var/www/seeddms/pear/" maxExecutionTime="30" cmdTimeout="30">
|
||||
</server>
|
||||
<converters target="fulltext">
|
||||
<converter mimeType="application/pdf">ocrmypdf.sh %s</converter>
|
||||
<converter mimeType="application/msword">catdoc %s</converter>
|
||||
<converter mimeType="application/vnd.ms-excel">ssconvert -T Gnumeric_stf:stf_csv -S %s fd://1</converter>
|
||||
<converter mimeType="audio/mp3">id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g'</converter>
|
||||
<converter mimeType="audio/mpeg">id3 -l -R %s | egrep '(Title|Artist|Album)' | sed 's/^[^:]*: //g'</converter>
|
||||
<converter mimeType="text/plain">cat %s</converter>
|
||||
<converter mimeType="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet">xlsx2csv %s</converter><converter mimeType="application/vnd.openxmlformats-officedocument.wordprocessingml.document">docx2txt %s -</converter><converter mimeType="image/jpeg">tesseract %s - -l deu --psm 3 --oem 3 2> /dev/null | tr '\n' ' '</converter></converters>
|
||||
|
||||
<converters target="preview"><converter mimeType="image/*">convert -resize %wx '%f' '%o'</converter><converter mimeType="application/pdf">convert -density 100 -resize %wx '%f[0]' '%o'</converter><converter mimeType="application/postscript">convert -density 100 -resize %wx '%f[0]' '%o'</converter><converter mimeType="text/plain">a2ps -1 -a1 -R -B -o - '%f' | gs -dBATCH -dNOPAUSE -sDEVICE=pngalpha -dFirstPage=1 -dLastPage=1 -dPDFFitPage -r72x72 -sOutputFile=- -q - | convert -resize %wx png:- '%o'</converter></converters></advanced>
|
||||
|
||||
<extensions><extension name="example"/></extensions></configuration>
|
Loading…
Reference in New Issue
Block a user