Merge pull request #6 from VikParuchuri/dev

Fix image resizing, py3.9
VikParuchuri · Jan 2, 2024 · 6c9f43a · 6c9f43a
2 parents fdc40d2 + ef13eeb
commit 6c9f43a
Show file tree

Hide file tree

Showing 7 changed files with 586 additions and 523 deletions.
diff --git a/README.md b/README.md
@@ -21,15 +21,11 @@ See more details in the [benchmarks](#benchmarks) section.
 
 ## Examples
 
-**Note** I added spaces after _ symbols because [Github math formatting is broken](https://github.com/github/markup/issues/1575).
+**Note** I added spaces after _ symbols and removed \, because [Github math formatting is broken](https://github.com/github/markup/issues/1575).
 
 ![Example 0](data/examples/0.png)
 
-**Detected Text** The potential $V_{i}$ of cell $\mathcal{C}_ {j}$ centred at position $\mathbf{r}_ {i}$ is related to the surface charge densities $\sigma_ {j}$ of cells $\mathcal{E}_ {j}$ $j\in[1,N]$ through the superposition principle as:
-
-$$V_ {i}\,=\,\sum_ {j=0}^{N}\,\frac{\sigma_ {j}}{4\pi\varepsilon_ {0}}\,\int_{\mathcal{E}_ {j}}\frac{1}{\left|\mathbf{r}_ {i}-\mathbf{r}^{\prime}\right|}\,\mathrm{d}^{2}\mathbf{r}^{\prime}\,=\,\sum_{j=0}^{N}\,Q_ {ij}\,\sigma_{j},$$
-
-where the integral over the surface of cell $\mathcal{C}_ {j}$ only depends on $\mathcal{C}{j}$ shape and on the relative position of the target point $\mathbf{r}_ {i}$ with respect to $\mathcal{C}_ {j}$ location, as $\sigma_ {j}$ is assumed constant over the whole surface of cell $\mathcal{C}_ {j}$.
+**Detected Text** The potential $V_ i$ of cell $\mathcal{C}_ i$ centred at position $\mathbf{r}_ i$ is related to the surface charge densities $\sigma_ j$ of cells $\mathcal{C}_ j$ $j\in[1,N]$ through the superposition principle as: $$V_ i = \sum_ {j=0}^{N} \frac{\sigma_ j}{4\pi\varepsilon_ 0} \int_ {\mathcal{C}_ j} \frac{1}{|\mathbf{r}_ i-\mathbf{r}'|} \mathrm{d}^2\mathbf{r}' = \sum_{j=0}^{N} Q_ {ij} \sigma_ j,$$ where the integral over the surface of cell $\mathcal{C}_ j$ only depends on $\mathcal{C}_ j$ shape and on the relative position of the target point $\mathbf{r}_ i$ with respect to $\mathcal{C}_ j$ location, as $\sigma_ j$ is assumed constant over the whole surface of cell $\mathcal{C}_ j$.
 
 | Image                      | OCR Markdown              |
 |----------------------------|---------------------------|
@@ -39,7 +35,7 @@ where the integral over the surface of cell $\mathcal{C}_ {j}$ only depends on $
 
 # Installation
 
-You'll need python 3.10+ and PyTorch. You may need to install the CPU version of torch first if you're not using a Mac or a GPU machine.  See [here](https://pytorch.org/get-started/locally/) for more details.
+You'll need python 3.9+ and PyTorch. You may need to install the CPU version of torch first if you're not using a Mac or a GPU machine.  See [here](https://pytorch.org/get-started/locally/) for more details.
 
 Install with:
 
@@ -80,6 +76,7 @@ texify /path/to/folder_or_file --max 8 --json_path results.json
 
 - `--max` is how many images in the folder to convert at most.  Omit this to convert all images in the folder.
 - `--json_path` is an optional path to a json file where the results will be saved.  If you omit this, the results will be saved to `data/results.json`.
+- `--katex_compatible` will make the output more compatible with KaTeX.
 
 ## Import and run
 
@@ -97,6 +94,8 @@ img = Image.open("test.png") # Your image name here
 results = batch_inference([img], model, processor)
 ```
 
+See `texify/output.py:replace_katex_invalid` if you want to make the output more compatible with KaTeX.
+
 # Manual install
 
 If you want to develop texify, you can install it manually:

diff --git a/data/examples/0.md b/data/examples/0.md
@@ -1,5 +1 @@
-The potential $V_{i}$ of cell $\mathcal{C}_ {j}$ centred at position $\mathbf{r}_ {i}$ is related to the surface charge densities $\sigma_ {j}$ of cells $\mathcal{E}_ {j}$ $j\in[1,N]$ through the superposition principle as:
-
-$$V_ {i}\,=\,\sum_ {j=0}^{N}\,\frac{\sigma_ {j}}{4\pi\varepsilon_ {0}}\,\int_{\mathcal{E}_ {j}}\frac{1}{\left|\mathbf{r}_ {i}-\mathbf{r}^{\prime}\right|}\,\mathrm{d}^{2}\mathbf{r}^{\prime}\,=\,\sum_{j=0}^{N}\,Q_ {ij}\,\sigma_{j},$$
-
-where the integral over the surface of cell $\mathcal{C}_ {j}$ only depends on $\mathcal{C}{j}$ shape and on the relative position of the target point $\mathbf{r}_ {i}$ with respect to $\mathcal{C}_ {j}$ location, as $\sigma_ {j}$ is assumed constant over the whole surface of cell $\mathcal{C}_ {j}$.
+The potential $V_ i$ of cell $\mathcal{C}_ i$ centred at position $\mathbf{r}_ i$ is related to the surface charge densities $\sigma_ j$ of cells $\mathcal{C}_ j$ $j\in[1,N]$ through the superposition principle as: $$V_ i = \sum_ {j=0}^{N} \frac{\sigma_ j}{4\pi\varepsilon_ 0} \int_ {\mathcal{C}_ j} \frac{1}{|\mathbf{r}_ i-\mathbf{r}'|} \mathrm{d}^2\mathbf{r}' = \sum_{j=0}^{N} Q_ {ij} \sigma_ j,$$ where the integral over the surface of cell $\mathcal{C}_ j$ only depends on $\mathcal{C}_ j$ shape and on the relative position of the target point $\mathbf{r}_ i$ with respect to $\mathcal{C}_ j$ location, as $\sigma_ j$ is assumed constant over the whole surface of cell $\mathcal{C}_ j$.
diff --git a/ocr_app.py b/ocr_app.py
@@ -9,20 +9,13 @@
 from texify.inference import batch_inference
 from texify.model.model import load_model
 from texify.model.processor import load_processor
-from texify.settings import settings
-import subprocess
-import re
+from texify.output import replace_katex_invalid
 from PIL import Image
 
-MAX_WIDTH = 1000
+MAX_WIDTH = 800
+MAX_HEIGHT = 1000
 
 
-def replace_katex_invalid(string):
-    # KaTeX cannot render all LaTeX, so we need to replace some things
-    string = re.sub(r'\\tag\{.*?\}', '', string)
-    string = re.sub(r'\\Big\{(.*?)\}|\\big\{(.*?)\}', r'\1\2', string)
-    return string
-
 @st.cache_resource()
 def load_model_cached():
     return load_model()
@@ -63,6 +56,12 @@ def get_uploaded_image(in_file):
     return Image.open(in_file).convert("RGB")
 
 
+def resize_image(pil_image):
+    if pil_image is None:
+        return
+    pil_image.thumbnail((MAX_WIDTH, MAX_HEIGHT), Image.Resampling.LANCZOS)
+
+
 @st.cache_data()
 def page_count(pdf_file):
     doc = open_pdf(pdf_file)
@@ -76,12 +75,8 @@ def get_canvas_hash(pil_image):
 @st.cache_data()
 def get_image_size(pil_image):
     if pil_image is None:
-        return 800, 600
+        return MAX_HEIGHT, MAX_WIDTH
     height, width = pil_image.height, pil_image.width
-    if width > MAX_WIDTH:
-        scale = MAX_WIDTH / width
-        height = int(height * scale)
-        width = MAX_WIDTH
     return height, width
 
 
@@ -115,6 +110,9 @@ def get_image_size(pil_image):
     pil_image = get_uploaded_image(in_file)
     whole_image = st.sidebar.button("OCR image")
 
+# Resize to max bounds
+resize_image(pil_image)
+
 temperature = st.sidebar.slider("Generation temperature:", min_value=0.0, max_value=1.0, value=0.0, step=0.05)
 
 canvas_hash = get_canvas_hash(pil_image) if pil_image else "canvas"

diff --git a/ocr_image.py b/ocr_image.py
@@ -5,21 +5,25 @@
 from texify.model.model import load_model
 from texify.model.processor import load_processor
 from PIL import Image
+
+from texify.output import replace_katex_invalid
 from texify.settings import settings
 from texify.util import is_valid_image
 import json
 
 
-def inference_single_image(image_path, json_path, model, processor):
+def inference_single_image(image_path, json_path, model, processor, katex_compatible=False):
     image = Image.open(image_path)
     text = batch_inference([image], model, processor)
+    if katex_compatible:
+        text = [replace_katex_invalid(t) for t in text]
     write_data = [{"image_path": image_path, "text": text[0]}]
     with open(json_path, "w+") as f:
         json_repr = json.dumps(write_data, indent=4)
         f.write(json_repr)
 
 
-def inference_image_dir(image_dir, json_path, model, processor, max=None):
+def inference_image_dir(image_dir, json_path, model, processor, max=None, katex_compatible=False):
     image_paths = [os.path.join(image_dir, image_name) for image_name in os.listdir(image_dir)]
     image_paths = [ip for ip in image_paths if is_valid_image(ip)]
     if max:
@@ -31,6 +35,8 @@ def inference_image_dir(image_dir, json_path, model, processor, max=None):
         images = [Image.open(image_path) for image_path in batch]
         text = batch_inference(images, model, processor)
         for image_path, t in zip(batch, text):
+            if katex_compatible:
+                t = replace_katex_invalid(t)
             write_data.append({"image_path": image_path, "text": t})
 
     with open(json_path, "w+") as f:
@@ -43,6 +49,7 @@ def main():
     parser.add_argument("image", type=str, help="Path to image or folder of images to OCR.")
     parser.add_argument("--max", type=int, help="Maximum number of images to OCR if a folder is passes.", default=None)
     parser.add_argument("--json_path", type=str, help="Path to JSON file to save results to.", default=os.path.join(settings.DATA_DIR, "results.json"))
+    parser.add_argument("--katex_compatible", action="store_true", help="Make output KaTeX compatible.", default=False)
     args = parser.parse_args()
 
     image_path = args.image
@@ -53,9 +60,9 @@ def main():
     os.makedirs(os.path.dirname(json_path), exist_ok=True)
 
     if os.path.isfile(image_path):
-        inference_single_image(image_path, json_path, model, processor)
+        inference_single_image(image_path, json_path, model, processor, args.katex_compatible)
     else:
-        inference_image_dir(image_path, json_path, model, processor, args.max)
+        inference_image_dir(image_path, json_path, model, processor, args.max, args.katex_compatible)
 
     print(f"Wrote results to {json_path}")