diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..0ff0546 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,28 @@ +# Use an official Python runtime as a parent image +FROM python:3.10-slim-buster + +# Set the working directory in the container to /app +WORKDIR /app + +# Copy the current directory contents into the container at /app +ADD . /app + +# Update and install system libraries +RUN apt-get update && apt-get install -y \ + ghostscript \ + --no-install-recommends && \ + rm -rf /var/lib/apt/lists/* + + +# Install Poetry +RUN pip install poetry + +# Install project dependencies +RUN poetry config virtualenvs.create false \ + && poetry install --no-interaction --no-ansi + +# Make port 80 available to the world outside this container +EXPOSE 8000 + +# Run app.py when the container launches +CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/constant.py b/constant.py new file mode 100644 index 0000000..8205280 --- /dev/null +++ b/constant.py @@ -0,0 +1,44 @@ +ps_content_template = '''% Define an ICC profile : +/ICCProfile (/usr/share/ghostscript/{gs_version}/iccprofiles/default_rgb.icc) def + +% Define an OutputConditionIdentifier : +/OutputConditionIdentifier (default_rgb) def + +% Create a PDF stream object to hold the XML invoice : +[ /_objdef {{InvoiceStream}} /type /stream /OBJ pdfmark + +% Add the required entries to that stream dictionary : +[ {{InvoiceStream}} << /Type /EmbeddedFile /Subtype (text/xml) cvn /Params << /ModDate (D:20130121081433+01'00') >> >> /PUT pdfmark + +% Read the XML invoice data from the file and store it in the PDF stream : +[ {{InvoiceStream}} ({xml_path}) (r) file /PUT pdfmark + +% Close the PDF stream : +[ {{InvoiceStream}} /CLOSE pdfmark + +% Create a PDF file specification dictionary to hold the file info : +[ /_objdef {{InvoiceFile}} /type /dict /OBJ pdfmark + +% Add the required entries to that file specification dictionary : +[ {{InvoiceFile}} << + /Type /Filespec + /F ({xml_filename}) + /EF << /F {{InvoiceStream}} >> +>> /PUT pdfmark + + +% Associate the file specification with the PDF document : +[/Root << + /Names << /EmbeddedFiles << /Names [ ({xml_filename}) {{InvoiceFile}} ] >> >> +>> /pdfmark + + +% Create an annotation to display the attachment in the PDF viewer: +[{{Catalog}} << + /Names << /EmbeddedFiles << /Names [ ({xml_filename}) {{InvoiceFile}} ] >> >> + /ViewerPreferences << + /DisplayDocTitle true + >> + /PageLayout /SinglePage + /PageMode /UseAttachments +>> /PUT pdfmark''' diff --git a/main.py b/main.py new file mode 100644 index 0000000..805f51d --- /dev/null +++ b/main.py @@ -0,0 +1,25 @@ +from fastapi import FastAPI, UploadFile, File +from fastapi.responses import FileResponse +import tempfile +import secrets +import subprocess +import os + +from utils import save_temp_file, create_post_script_file + +app = FastAPI() + + + +@app.post("/upload") +async def upload_files(pdf_file: UploadFile = File(...), xml_file: UploadFile = File(...)): + session_id = secrets.token_hex(8) + pdf_file_path = await save_temp_file(await pdf_file.read(), '.pdf', session_id) + xml_file_path = await save_temp_file(await xml_file.read(), '.xml', session_id) + ps_file = await create_post_script_file(xml_file_path, session_id) + result_file_path = os.path.join(tempfile.mkdtemp(), session_id) + '.pdf' + command = f"gs -sDEVICE=pdfwrite -dNOSAFER -dPDFA=3 -sColorConversionStrategy=RGB -dPDFACompatibilityPolicy=2 -o {result_file_path} {pdf_file_path} {ps_file} -dDEBUG" + subprocess.run(command, shell=True) + + print(result_file_path) + return FileResponse(result_file_path, filename="result.pdf") \ No newline at end of file diff --git a/poetry.lock b/poetry.lock new file mode 100644 index 0000000..2e0b7e5 --- /dev/null +++ b/poetry.lock @@ -0,0 +1,229 @@ +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. + +[[package]] +name = "anyio" +version = "3.7.0" +description = "High level compatibility layer for multiple asynchronous event loop implementations" +optional = false +python-versions = ">=3.7" +files = [ + {file = "anyio-3.7.0-py3-none-any.whl", hash = "sha256:eddca883c4175f14df8aedce21054bfca3adb70ffe76a9f607aef9d7fa2ea7f0"}, + {file = "anyio-3.7.0.tar.gz", hash = "sha256:275d9973793619a5374e1c89a4f4ad3f4b0a5510a2b5b939444bee8f4c4d37ce"}, +] + +[package.dependencies] +exceptiongroup = {version = "*", markers = "python_version < \"3.11\""} +idna = ">=2.8" +sniffio = ">=1.1" + +[package.extras] +doc = ["Sphinx (>=6.1.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme", "sphinxcontrib-jquery"] +test = ["anyio[trio]", "coverage[toml] (>=4.5)", "hypothesis (>=4.0)", "mock (>=4)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] +trio = ["trio (<0.22)"] + +[[package]] +name = "click" +version = "8.1.3" +description = "Composable command line interface toolkit" +optional = false +python-versions = ">=3.7" +files = [ + {file = "click-8.1.3-py3-none-any.whl", hash = "sha256:bb4d8133cb15a609f44e8213d9b391b0809795062913b383c62be0ee95b1db48"}, + {file = "click-8.1.3.tar.gz", hash = "sha256:7682dc8afb30297001674575ea00d1814d808d6a36af415a82bd481d37ba7b8e"}, +] + +[package.dependencies] +colorama = {version = "*", markers = "platform_system == \"Windows\""} + +[[package]] +name = "colorama" +version = "0.4.6" +description = "Cross-platform colored terminal text." +optional = false +python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7" +files = [ + {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"}, + {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"}, +] + +[[package]] +name = "exceptiongroup" +version = "1.1.1" +description = "Backport of PEP 654 (exception groups)" +optional = false +python-versions = ">=3.7" +files = [ + {file = "exceptiongroup-1.1.1-py3-none-any.whl", hash = "sha256:232c37c63e4f682982c8b6459f33a8981039e5fb8756b2074364e5055c498c9e"}, + {file = "exceptiongroup-1.1.1.tar.gz", hash = "sha256:d484c3090ba2889ae2928419117447a14daf3c1231d5e30d0aae34f354f01785"}, +] + +[package.extras] +test = ["pytest (>=6)"] + +[[package]] +name = "fastapi" +version = "0.98.0" +description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" +optional = false +python-versions = ">=3.7" +files = [ + {file = "fastapi-0.98.0-py3-none-any.whl", hash = "sha256:f4165fb1fe3610c52cb1b8282c1480de9c34bc270f56a965aa93a884c350d605"}, + {file = "fastapi-0.98.0.tar.gz", hash = "sha256:0d3c18886f652038262b5898fec6b09f4ca92ee23e9d9b1d1d24e429f84bf27b"}, +] + +[package.dependencies] +pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0" +starlette = ">=0.27.0,<0.28.0" + +[package.extras] +all = ["email-validator (>=1.1.1)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "python-multipart (>=0.0.5)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] + +[[package]] +name = "h11" +version = "0.14.0" +description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1" +optional = false +python-versions = ">=3.7" +files = [ + {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"}, + {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"}, +] + +[[package]] +name = "idna" +version = "3.4" +description = "Internationalized Domain Names in Applications (IDNA)" +optional = false +python-versions = ">=3.5" +files = [ + {file = "idna-3.4-py3-none-any.whl", hash = "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2"}, + {file = "idna-3.4.tar.gz", hash = "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4"}, +] + +[[package]] +name = "pydantic" +version = "1.10.10" +description = "Data validation and settings management using python type hints" +optional = false +python-versions = ">=3.7" +files = [ + {file = "pydantic-1.10.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:adad1ee4ab9888f12dac2529276704e719efcf472e38df7813f5284db699b4ec"}, + {file = "pydantic-1.10.10-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a7db03339893feef2092ff7b1afc9497beed15ebd4af84c3042a74abce02d48"}, + {file = "pydantic-1.10.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67b3714b97ff84b2689654851c2426389bcabfac9080617bcf4306c69db606f6"}, + {file = "pydantic-1.10.10-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:edfdf0a5abc5c9bf2052ebaec20e67abd52e92d257e4f2d30e02c354ed3e6030"}, + {file = "pydantic-1.10.10-cp310-cp310-musllinux_1_1_i686.whl", hash = "sha256:20a3b30fd255eeeb63caa9483502ba96b7795ce5bf895c6a179b3d909d9f53a6"}, + {file = "pydantic-1.10.10-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:db4c7f7e60ca6f7d6c1785070f3e5771fcb9b2d88546e334d2f2c3934d949028"}, + {file = "pydantic-1.10.10-cp310-cp310-win_amd64.whl", hash = "sha256:a2d5be50ac4a0976817144c7d653e34df2f9436d15555189f5b6f61161d64183"}, + {file = "pydantic-1.10.10-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:566a04ba755e8f701b074ffb134ddb4d429f75d5dced3fbd829a527aafe74c71"}, + {file = "pydantic-1.10.10-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:f79db3652ed743309f116ba863dae0c974a41b688242482638b892246b7db21d"}, + {file = "pydantic-1.10.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c62376890b819bebe3c717a9ac841a532988372b7e600e76f75c9f7c128219d5"}, + {file = "pydantic-1.10.10-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4870f13a4fafd5bc3e93cff3169222534fad867918b188e83ee0496452978437"}, + {file = "pydantic-1.10.10-cp311-cp311-musllinux_1_1_i686.whl", hash = "sha256:990027e77cda6072a566e433b6962ca3b96b4f3ae8bd54748e9d62a58284d9d7"}, + {file = "pydantic-1.10.10-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:8c40964596809eb616d94f9c7944511f620a1103d63d5510440ed2908fc410af"}, + {file = "pydantic-1.10.10-cp311-cp311-win_amd64.whl", hash = "sha256:ea9eebc2ebcba3717e77cdeee3f6203ffc0e78db5f7482c68b1293e8cc156e5e"}, + {file = "pydantic-1.10.10-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:762aa598f79b4cac2f275d13336b2dd8662febee2a9c450a49a2ab3bec4b385f"}, + {file = "pydantic-1.10.10-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6dab5219659f95e357d98d70577b361383057fb4414cfdb587014a5f5c595f7b"}, + {file = "pydantic-1.10.10-cp37-cp37m-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:f3d4ee957a727ccb5a36f1b0a6dbd9fad5dedd2a41eada99a8df55c12896e18d"}, + {file = "pydantic-1.10.10-cp37-cp37m-musllinux_1_1_i686.whl", hash = "sha256:b69f9138dec566962ec65623c9d57bee44412d2fc71065a5f3ebb3820bdeee96"}, + {file = "pydantic-1.10.10-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:7aa75d1bd9cc275cf9782f50f60cddaf74cbaae19b6ada2a28e737edac420312"}, + {file = "pydantic-1.10.10-cp37-cp37m-win_amd64.whl", hash = "sha256:9f62a727f5c590c78c2d12fda302d1895141b767c6488fe623098f8792255fe5"}, + {file = "pydantic-1.10.10-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:aac218feb4af73db8417ca7518fb3bade4534fcca6e3fb00f84966811dd94450"}, + {file = "pydantic-1.10.10-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:88546dc10a40b5b52cae87d64666787aeb2878f9a9b37825aedc2f362e7ae1da"}, + {file = "pydantic-1.10.10-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c41bbaae89e32fc582448e71974de738c055aef5ab474fb25692981a08df808a"}, + {file = "pydantic-1.10.10-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b71bd504d1573b0b722ae536e8ffb796bedeef978979d076bf206e77dcc55a5"}, + {file = "pydantic-1.10.10-cp38-cp38-musllinux_1_1_i686.whl", hash = "sha256:e088e3865a2270ecbc369924cd7d9fbc565667d9158e7f304e4097ebb9cf98dd"}, + {file = "pydantic-1.10.10-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:3403a090db45d4027d2344859d86eb797484dfda0706cf87af79ace6a35274ef"}, + {file = "pydantic-1.10.10-cp38-cp38-win_amd64.whl", hash = "sha256:e0014e29637125f4997c174dd6167407162d7af0da73414a9340461ea8573252"}, + {file = "pydantic-1.10.10-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:9965e49c6905840e526e5429b09e4c154355b6ecc0a2f05492eda2928190311d"}, + {file = "pydantic-1.10.10-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:748d10ab6089c5d196e1c8be9de48274f71457b01e59736f7a09c9dc34f51887"}, + {file = "pydantic-1.10.10-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86936c383f7c38fd26d35107eb669c85d8f46dfceae873264d9bab46fe1c7dde"}, + {file = "pydantic-1.10.10-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a26841be620309a9697f5b1ffc47dce74909e350c5315ccdac7a853484d468a"}, + {file = "pydantic-1.10.10-cp39-cp39-musllinux_1_1_i686.whl", hash = "sha256:409b810f387610cc7405ab2fa6f62bdf7ea485311845a242ebc0bd0496e7e5ac"}, + {file = "pydantic-1.10.10-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:ce937a2a2c020bcad1c9fde02892392a1123de6dda906ddba62bfe8f3e5989a2"}, + {file = "pydantic-1.10.10-cp39-cp39-win_amd64.whl", hash = "sha256:37ebddef68370e6f26243acc94de56d291e01227a67b2ace26ea3543cf53dd5f"}, + {file = "pydantic-1.10.10-py3-none-any.whl", hash = "sha256:a5939ec826f7faec434e2d406ff5e4eaf1716eb1f247d68cd3d0b3612f7b4c8a"}, + {file = "pydantic-1.10.10.tar.gz", hash = "sha256:3b8d5bd97886f9eb59260594207c9f57dce14a6f869c6ceea90188715d29921a"}, +] + +[package.dependencies] +typing-extensions = ">=4.2.0" + +[package.extras] +dotenv = ["python-dotenv (>=0.10.4)"] +email = ["email-validator (>=1.0.3)"] + +[[package]] +name = "python-multipart" +version = "0.0.6" +description = "A streaming multipart parser for Python" +optional = false +python-versions = ">=3.7" +files = [ + {file = "python_multipart-0.0.6-py3-none-any.whl", hash = "sha256:ee698bab5ef148b0a760751c261902cd096e57e10558e11aca17646b74ee1c18"}, + {file = "python_multipart-0.0.6.tar.gz", hash = "sha256:e9925a80bb668529f1b67c7fdb0a5dacdd7cbfc6fb0bff3ea443fe22bdd62132"}, +] + +[package.extras] +dev = ["atomicwrites (==1.2.1)", "attrs (==19.2.0)", "coverage (==6.5.0)", "hatch", "invoke (==1.7.3)", "more-itertools (==4.3.0)", "pbr (==4.3.0)", "pluggy (==1.0.0)", "py (==1.11.0)", "pytest (==7.2.0)", "pytest-cov (==4.0.0)", "pytest-timeout (==2.1.0)", "pyyaml (==5.1)"] + +[[package]] +name = "sniffio" +version = "1.3.0" +description = "Sniff out which async library your code is running under" +optional = false +python-versions = ">=3.7" +files = [ + {file = "sniffio-1.3.0-py3-none-any.whl", hash = "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384"}, + {file = "sniffio-1.3.0.tar.gz", hash = "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101"}, +] + +[[package]] +name = "starlette" +version = "0.27.0" +description = "The little ASGI library that shines." +optional = false +python-versions = ">=3.7" +files = [ + {file = "starlette-0.27.0-py3-none-any.whl", hash = "sha256:918416370e846586541235ccd38a474c08b80443ed31c578a418e2209b3eef91"}, + {file = "starlette-0.27.0.tar.gz", hash = "sha256:6a6b0d042acb8d469a01eba54e9cda6cbd24ac602c4cd016723117d6a7e73b75"}, +] + +[package.dependencies] +anyio = ">=3.4.0,<5" + +[package.extras] +full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart", "pyyaml"] + +[[package]] +name = "typing-extensions" +version = "4.7.0" +description = "Backported and Experimental Type Hints for Python 3.7+" +optional = false +python-versions = ">=3.7" +files = [ + {file = "typing_extensions-4.7.0-py3-none-any.whl", hash = "sha256:5d8c9dac95c27d20df12fb1d97b9793ab8b2af8a3a525e68c80e21060c161771"}, + {file = "typing_extensions-4.7.0.tar.gz", hash = "sha256:935ccf31549830cda708b42289d44b6f74084d616a00be651601a4f968e77c82"}, +] + +[[package]] +name = "uvicorn" +version = "0.22.0" +description = "The lightning-fast ASGI server." +optional = false +python-versions = ">=3.7" +files = [ + {file = "uvicorn-0.22.0-py3-none-any.whl", hash = "sha256:e9434d3bbf05f310e762147f769c9f21235ee118ba2d2bf1155a7196448bd996"}, + {file = "uvicorn-0.22.0.tar.gz", hash = "sha256:79277ae03db57ce7d9aa0567830bbb51d7a612f54d6e1e3e92da3ef24c2c8ed8"}, +] + +[package.dependencies] +click = ">=7.0" +h11 = ">=0.8" + +[package.extras] +standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.4)"] + +[metadata] +lock-version = "2.0" +python-versions = "^3.10" +content-hash = "289475586c529d75b54af2f3adb56a909cb386dbcfb26d767e2283e1e1c7c17a" diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..4a85c6d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,18 @@ +[tool.poetry] +name = "pypdfa3-zugferd" +version = "0.1.0" +description = "Pythonfastapi to convert pdf to zugferd format" +authors = ["Mayur Rawte "] +license = "MIT" + +[tool.poetry.dependencies] +python = "^3.10" +fastapi = "^0.98.0" +uvicorn = "^0.22.0" +python-multipart = "^0.0.6" + +[tool.poetry.dev-dependencies] + +[build-system] +requires = ["poetry-core>=1.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..69d5f4a --- /dev/null +++ b/utils.py @@ -0,0 +1,40 @@ +import tempfile +from constant import ps_content_template +from fastapi import UploadFile +import subprocess + + +xml_path = '/home/thepsygeek/zugferd/sample.xml' +xml_filename = 'sample.xml' + + + + +async def save_temp_file(upload_file: UploadFile, file_extension: str) -> str: + with tempfile.NamedTemporaryFile(suffix=file_extension, delete=False) as temp_file: + # Save the file + temp_file.write(await upload_file.read()) + temp_file_path = temp_file.name + return temp_file_path + + +import os + +async def save_temp_file(file_data: bytes, file_extension: str, folder: str) -> str: + os.makedirs(folder, exist_ok=True) + with tempfile.NamedTemporaryFile(suffix=file_extension, delete=False, dir=folder) as temp_file: + temp_file.write(file_data) + temp_file_path = temp_file.name + return temp_file_path + + +async def create_post_script_file(xml_file_path: str, session_id): + content = ps_content_template.format(xml_path=xml_file_path, xml_filename='invoice.xml', gs_version=get_gs_version()) + return await save_temp_file(content.encode('utf-8'), '.ps', session_id) + +def get_gs_version(): + try: + gs_version = subprocess.check_output(["gs", "--version"]).strip().decode() + return gs_version + except subprocess.CalledProcessError: + return "Ghostscript not found" \ No newline at end of file