forked from pulibrary/plum
-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy path.install_tesseract
executable file
·80 lines (65 loc) · 2.42 KB
/
.install_tesseract
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#! /bin/bash
[ ! -d "$HOME/dependencies" ] && mkdir $HOME/dependencies ; cd $HOME/dependencies
# The $HOME/dependencies directory is configured to be cached and persisted among builds
# so our logic can determine whether to fully build based on existence.
if [ ! -d "./tesseract" ]
then
# Presumably the lack of directory means we need to download and build everything
mkdir ./tesseract ; cd ./tesseract
echo "Compiling and installing Tesseract and its library dependencies"
wget "https://github.com/uclouvain/openjpeg/archive/v2.3.1.zip"
unzip -q v2.3.1.zip
cd openjpeg-v2.3.1
mkdir build
cd build
which cmake
cmake ..
make
sudo make install
sudo make clean
cd $HOME/dependencies/tesseract
wget "https://github.com/DanBloomberg/leptonica/releases/download/1.79.0/leptonica-1.79.0.tar.gz"
tar xzf leptonica-1.79.0.tar.gz
cd leptonica-1.79.0
sed -i 's/#define HAVE_LIBJP2K 0/#define HAVE_LIBJP2K 1/g' ./src/environ.h
sed -i 's/-ltiff -ljpeg -lpng -lz -lm/-ltiff -ljpeg -lpng -lz -lm -lopenjp2/g' ./prog/makefile.static
./configure
make
sudo make install
cd $HOME/dependencies/tesseract
wget "https://github.com/tesseract-ocr/tesseract/archive/3.05.02.zip"
unzip -q 3.05.02.zip
cd tesseract-3.05.02
./autogen.sh
./configure
make
sudo make install
sudo ldconfig
cd $HOME/dependencies/tesseract
echo "Obtaining Tesseract trained language data"
mkdir ./tessdata
curl -L --output ./tessdata/eng.traineddata https://github.com/tesseract-ocr/tessdata/raw/master/eng.traineddata
curl -L --output ./tessdata/ita.traineddata https://github.com/tesseract-ocr/tessdata/raw/master/ita.traineddata
# git clone https://github.com/tesseract-ocr/tessdata.git
sudo cp tessdata/eng.* /usr/local/share/tessdata/
sudo cp tessdata/ita* /usr/local/share/tessdata
else
# We've already done the build before so just install libraries
echo "Installing Tesseract library dependencies"
cd ./tesseract
cd openjpeg-v2.3.1/build
sudo make install
cd $HOME/dependencies/tesseract
cd leptonica-1.79.0
sudo make install
cd $HOME/dependencies/tesseract
cd tesseract-3.05.02
sudo make install
sudo ldconfig
cd $HOME/dependencies/tesseract
echo "Obtaining Tesseract trained language data"
sudo cp tessdata/eng.* /usr/local/share/tessdata/
sudo cp tessdata/ita* /usr/local/share/tessdata
fi
# With any luck we get Tesseract in our running infrastructure one way or another
tesseract -v