diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml new file mode 100644 index 0000000..774f7c5 --- /dev/null +++ b/.github/workflows/test-pr.yml @@ -0,0 +1,35 @@ +name: PR QA + +on: + pull_request: + types: [opened, synchronize, reopened] + +jobs: + build: + name: Checkout and Build + runs-on: ubuntu-20.04 + + strategy: + matrix: + java-version: [8, 11, 16, 17] + + steps: + - uses: actions/checkout@v2 + - name: JDK setup + uses: actions/setup-java@v2 + with: + java-version: ${{ matrix.java-version }} + distribution: 'temurin' + cache: maven + - name: Build with Maven + run: mvn --batch-mode --update-snapshots verify + + coverage: + name: Quality Assurance + runs-on: ubuntu-20.04 + needs: [ build ] + + steps: + - uses: actions/checkout@v2 + - name: Codacy analysis reporting + uses: codacy/codacy-analysis-cli-action@master diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..72a05c9 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,23 @@ +name: Test + +on: + workflow_call: + +jobs: + build: + name: Checkout and Build + runs-on: ubuntu-20.04 + + steps: + - uses: actions/checkout@v2 + - name: JDK setup + uses: actions/setup-java@v2 + with: + java-version: 11 + distribution: 'temurin' + cache: maven + - name: Test + run: cd policy-generator + mvn clean package + cd target + java -jar policy-generator.jar diff --git a/corpus-wiki-generation/pom.xml b/corpus-wiki-generation/pom.xml new file mode 100644 index 0000000..d55c995 --- /dev/null +++ b/corpus-wiki-generation/pom.xml @@ -0,0 +1,72 @@ + + + 4.0.0 + + + verapdf-tools + org.verapdf + 1.0-SNAPSHOT + + + org.verapdf + corpus-wiki-generator + 1.0-SNAPSHOT + + + 11 + 11 + + + + + org.verapdf + validation-model + ${verapdf.version} + + + + org.apache.pdfbox + pdfbox + 2.0.26 + compile + + + + + + + + maven-compiler-plugin + + + + org.apache.maven.plugins + maven-assembly-plugin + + + + org.verapdf.tools.CorpusWikiGenerator + + + + jar-with-dependencies + + false + + + + make-assembly + package + + single + + + + + + + + + diff --git a/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusDownload.java b/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusDownload.java new file mode 100644 index 0000000..b827ccb --- /dev/null +++ b/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusDownload.java @@ -0,0 +1,78 @@ +package org.verapdf.tools; + + +import java.io.*; +import java.net.HttpURLConnection; +import java.net.URL; +import java.net.URLDecoder; +import java.util.HashMap; +import java.util.Map; + +//code from integration-tests +public class CorpusDownload { + + public static File createTempFileFromCorpus(final URL downloadLoc, final String prefix) throws IOException { + File tempFile = File.createTempFile(prefix, ".zip"); + System.out.println("Downloading: " + downloadLoc + ", to temp:" + tempFile); + int totalBytes = 0; + try (OutputStream output = new FileOutputStream(tempFile); + InputStream corpusInput = handleRedirects(downloadLoc)) { + byte[] buffer = new byte[8 * 1024]; + int bytesRead; + while ((bytesRead = corpusInput.read(buffer)) != -1) { + output.write(buffer, 0, bytesRead); + totalBytes += bytesRead; + } + } + System.out.println("Downloaded: " + totalBytes + " bytes"); + tempFile.deleteOnExit(); + return tempFile; + } + + static InputStream handleRedirects(URL url) throws IOException { + if (!url.getProtocol().startsWith("http")) { + return url.openStream(); + } + System.err.println("Prot:" + url.getProtocol()); + URL resourceUrl; + URL base; + URL next; + Map visited; + HttpURLConnection conn; + String location; + String urlString = url.toExternalForm(); + int times; + + visited = new HashMap<>(); + + while (true) { + times = visited.compute(urlString, (key, count) -> count == null ? 1 : count + 1); + + if (times > 3) + throw new IOException("Stuck in redirect loop"); + + resourceUrl = new URL(urlString); + conn = (HttpURLConnection) resourceUrl.openConnection(); + + conn.setConnectTimeout(15000); + conn.setReadTimeout(15000); + conn.setInstanceFollowRedirects(false); // Make the logic below easier to detect redirections + conn.setRequestProperty("User-Agent", "Mozilla/5.0..."); + + switch (conn.getResponseCode()) { + case HttpURLConnection.HTTP_MOVED_PERM: + case HttpURLConnection.HTTP_MOVED_TEMP: + location = conn.getHeaderField("Location"); + location = URLDecoder.decode(location, "UTF-8"); + base = new URL(urlString); + next = new URL(base, location); // Deal with relative URLs + urlString = next.toExternalForm(); + continue; + } + + break; + } + + return conn.getInputStream(); + } +} diff --git a/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusWikiGenerator.java b/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusWikiGenerator.java new file mode 100644 index 0000000..28fc9cd --- /dev/null +++ b/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusWikiGenerator.java @@ -0,0 +1,212 @@ +package org.verapdf.tools; + +import java.io.*; +import java.net.URI; +import java.util.*; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; + +import org.apache.pdfbox.text.PDFTextStripper; +import org.verapdf.exceptions.InvalidPasswordException; +import org.verapdf.pd.PDDocument; +import org.verapdf.pd.PDOutlineDictionary; +import org.verapdf.pd.PDOutlineItem; + +/** + * @author Maxim Plushchou + */ +public class CorpusWikiGenerator { + + private static final String veraUrl = "https://github.com/veraPDF/veraPDF-corpus/archive/staging.zip"; + private static final String LINK_START = "https://raw.githubusercontent.com/veraPDF/veraPDF-corpus/staging/"; + private static final String STAGING = "staging"; + private static final String PDF_UA_1 = "PDF_UA-1"; + private static final String EXPECTED_MESSAGE = "expected message"; + private static final String PDF_EXTENSION = ".pdf"; + private static PrintWriter writer; + private static String corpusPart; + + public static void main(String[] args) throws IOException { + writer = new PrintWriter(new FileOutputStream("test.md")); + File zipFile; + try { + zipFile = CorpusDownload.createTempFileFromCorpus(URI.create(veraUrl).toURL(), "corpusWiki"); + } catch (IOException excep) { + throw new IllegalStateException(excep); + } + ZipFile zipSource = new ZipFile(zipFile); + Enumeration entries = zipSource.entries(); + SortedSet entriesSet = new TreeSet<>(new ZipEntryComparator()); + while (entries.hasMoreElements()) { + entriesSet.add(entries.nextElement()); + } + for (ZipEntry entry : entriesSet) { + if (entry.isDirectory()) { + printDirectory(entry); + } else if (entry.getName().endsWith(PDF_EXTENSION)) { + try { + printFileDescription(zipSource, entry); + } catch (InvalidPasswordException e) { + writer.println("Encrypted pdf"); + System.out.println(entry.getName() + ": Encrypted pdf"); + } + } + } + } + + private static int getHeadingLevel(String directoryName) { + return directoryName.length() - directoryName.replace("/","").length(); + } + + private static void printDirectory(ZipEntry entry) throws FileNotFoundException { + String directoryName = entry.getName(); + directoryName = directoryName.substring(directoryName.indexOf(STAGING) + STAGING.length() + 1); + int headingLevel = getHeadingLevel(directoryName); + if (!directoryName.isEmpty()) { + directoryName = directoryName.substring(0, directoryName.length() - 1); + } + if (!directoryName.isEmpty() && headingLevel > 0) { + directoryName = directoryName.substring(directoryName.lastIndexOf("/") + 1); + } + if (directoryName.isEmpty()) { + return; + } + if (headingLevel == 1) { + corpusPart = directoryName; + writer.flush(); + writer.close(); + writer = new PrintWriter(new FileOutputStream(directoryName + ".md")); + } else { + printHeading(directoryName, headingLevel); + } + } + + private static void printHeading(String directoryName, int headingLevel) { + for (int i = 1; i < headingLevel; i++) { + writer.print("#"); + } + writer.println(" " + directoryName); + writer.println(); + } + + private static void printFileDescription(ZipFile zipSource, ZipEntry entry) throws IOException { + PDDocument document = new PDDocument(zipSource.getInputStream(entry)); + printFileName(entry); + printLinkToFile(entry); + PDOutlineDictionary outlines = document.getOutlines(); + if (outlines != null) { + PDOutlineItem outlineItem = outlines.getFirst(); + if (outlineItem != null) { + if (outlineItem.getTitle() == null) { + writer.println(" null title"); + } + if (PDF_UA_1.equals(corpusPart)) { + printTextFromPDFUAOutlines(outlineItem); + } else { + printTextFromOutlines(outlineItem); + } + } else { + printTextFromPagesContents(zipSource, entry); + } + } else { + printTextFromPagesContents(zipSource, entry); + } + writer.println(); + document.close(); + } + + private static void printTextFromPagesContents(ZipFile zipSource, ZipEntry entry) throws IOException { + org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(zipSource.getInputStream(entry)); + PDFTextStripper pdfStripper = new PDFTextStripper(); + String text = pdfStripper.getText(pdDocument); + String[] messages = text.split("\n"); + int outlinesIndex = -1; + for (int i = 0; i < messages.length; i++) { + if (messages[i].contains("Outlines:")) { + outlinesIndex = i; + } + } + writer.print(": "); + for (int i = outlinesIndex + 2; i < messages.length - 1; i++) { + messages[i] = messages[i].replace("\r",""); + if (stringStartsWithLabel(messages[i])) { + messages[i] = messages[i].substring(2); + } + if (i == messages.length - 2 || stringStartsWithLabel(messages[i + 1])) { + messages[i] = messages[i] + "."; + writer.println(messages[i]); + } else { + writer.print(messages[i] + " "); + } + } + pdDocument.close(); + } + + private static boolean stringStartsWithLabel(String str) { + return str.startsWith("- ") || str.startsWith("• "); + } + + private static void printFileName(ZipEntry entry) { + String fileName = entry.getName(); + fileName = fileName.substring(fileName.lastIndexOf("/") + 1); + writer.print("[" + fileName + "]"); + } + + private static void printLinkToFile(ZipEntry entry) { + String fileLink = entry.getName(); + fileLink = fileLink.substring(fileLink.indexOf(STAGING) + STAGING.length() + 1).replace(" ", "%20"); + writer.print("(" + LINK_START + fileLink + ")"); + } + + private static void printTextFromPDFUAOutlines(PDOutlineItem outlineItem) { + writer.print(": "); + outlineItem = outlineItem.getNext(); + while (outlineItem != null && outlineItem.getNext() != null) { + if (outlineItem.getTitle() != null && outlineItem.getTitle().length() < 15_000) { + String title = outlineItem.getTitle(); + title = getCorrectMDString(title); + title = title.replace("\n", ""); + if (!title.endsWith(".")) { + title = title + "."; + } + writer.println(title); + printChildrenOutlines(outlineItem.getFirst()); + } + outlineItem = outlineItem.getNext(); + } + } + + private static String getCorrectMDString(String str) { + return str.replace("<", "\\<").replace(">","\\>"); + } + + private static void printChildrenOutlines(PDOutlineItem outlineItem) { + while (outlineItem != null) { + if (outlineItem.getTitle() != null) { + writer.println(outlineItem.getTitle()); + } + printChildrenOutlines(outlineItem.getFirst()); + outlineItem = outlineItem.getNext(); + } + } + + private static void printTextFromOutlines(PDOutlineItem outlineItem) { + boolean isPrinted = false; + while (outlineItem != null) { + isPrinted = printTitle(outlineItem.getTitle(), isPrinted); + printTextFromOutlines(outlineItem.getFirst()); + outlineItem = outlineItem.getNext(); + } + } + + private static boolean printTitle(String string, boolean isPrinted) { + if (string.contains(EXPECTED_MESSAGE) || isPrinted) { + String message = string.replace(EXPECTED_MESSAGE, ""); + if (message.length() < 15_000) { + writer.println(message); + } + return true; + } + return false; + } +} diff --git a/corpus-wiki-generation/src/main/java/org/verapdf/tools/ZipEntryComparator.java b/corpus-wiki-generation/src/main/java/org/verapdf/tools/ZipEntryComparator.java new file mode 100644 index 0000000..519b1d8 --- /dev/null +++ b/corpus-wiki-generation/src/main/java/org/verapdf/tools/ZipEntryComparator.java @@ -0,0 +1,79 @@ +package org.verapdf.tools; + +import java.util.Comparator; +import java.util.zip.ZipEntry; + +public class ZipEntryComparator implements Comparator { + + @Override + public int compare(ZipEntry o1, ZipEntry o2) { + String name1 = o1.getName(); + String name2 = o2.getName(); + return compare(name1, name2); + } + + public int compare(String name1, String name2) { + int commonLength = getCommonStartLength(name1, name2); + int start = getNotNumberStartLength(name1, commonLength); + String substring1 = name1.substring(start); + String substring2 = name2.substring(start); + substring1 = substring1.substring(0, getNumberStartLength(substring1)); + substring2 = substring2.substring(0, getNumberStartLength(substring2)); + Integer int1 = getIntegerFromString(substring1); + Integer int2 = getIntegerFromString(substring2); + if (int1 != null && int2 != null && !int1.equals(int2)) { + return int1 - int2; + } + return name1.compareTo(name2); + } + + public static int getCommonStartLength(String s1, String s2) { + return getCommonStartLength(s1, s2, Math.min(s1.length(), s2.length())); + } + + private static int getCommonStartLength(String s1, String s2, int length) { + for (int i = 0; i < length; i++) { + if (s1.charAt(i) != s2.charAt(i)) { + return i; + } + } + return length; + } + + protected static int getNotNumberStartLength(String string, int commonStartLength) { + return getNotRegexStartLength(string, commonStartLength, "\\d+"); + } + + private static int getNotRegexStartLength(String string, int commonStartLength, String regex) { + if (commonStartLength == 0) { + return 0; + } + for (int i = commonStartLength; i > 0; i--) { + if (!string.substring(i - 1, i).matches(regex)) { + return i; + } + } + return 0; + } + + protected static int getNumberStartLength(String string) { + return getRegexStartLength(string, "\\d+"); + } + + public static int getRegexStartLength(String string, String regex) { + for (int i = 0; i < string.length(); i++) { + if (!string.substring(i, i + 1).matches(regex)) { + return i; + } + } + return string.length(); + } + + public static Integer getIntegerFromString(String string) { + try { + return Integer.parseUnsignedInt(string); + } catch (NumberFormatException ignored) { + } + return null; + } +} diff --git a/fix-metadata/pom.xml b/fix-metadata/pom.xml new file mode 100644 index 0000000..4fe800d --- /dev/null +++ b/fix-metadata/pom.xml @@ -0,0 +1,77 @@ + + + + verapdf-tools + org.verapdf + 1.0-SNAPSHOT + + 4.0.0 + + fix-metadata + + + + + org.apache.pdfbox + pdfbox + 2.0.24 + + + + org.verapdf + validation-model + ${verapdf.version} + + + + + + + + + src/main/resources + + pdf-a.xmp + pdf-a4.xmp + pdf-ua1.xmp + + + + + + + + maven-compiler-plugin + + + + org.apache.maven.plugins + maven-assembly-plugin + + + + org.verapdf.tools.FixMetadataTool + + + + jar-with-dependencies + + false + + + + make-assembly + package + + single + + + + + + + + + diff --git a/fix-metadata/src/main/java/org/verapdf/tools/FixMetadataTool.java b/fix-metadata/src/main/java/org/verapdf/tools/FixMetadataTool.java new file mode 100644 index 0000000..838511b --- /dev/null +++ b/fix-metadata/src/main/java/org/verapdf/tools/FixMetadataTool.java @@ -0,0 +1,106 @@ +package org.verapdf.tools; + +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDDocumentInformation; +import org.apache.pdfbox.pdmodel.common.PDMetadata; +import org.verapdf.metadata.fixer.gf.utils.DateConverter; +import org.verapdf.pdfa.flavours.PDFAFlavour; +import org.verapdf.xmp.XMPDateTimeFactory; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.*; + +public class FixMetadataTool { + public static void main(String[] args) throws Exception { + if (args.length < 3) { + System.out.println("arguments: inputFile outputFile flavourId/xmpFileName"); + System.out.println("possible flavourIds: " + + Arrays.toString(PDFAFlavour.values()) + .replaceFirst("0, ", "") + .replace(", wcag2", "")); + return; + } + PDDocument pdDocument = PDDocument.load(new File(args[0])); + PDFAFlavour flavour = PDFAFlavour.byFlavourId(args[2]); + if (flavour == PDFAFlavour.NO_FLAVOUR) { + PDMetadata newMetadata = new PDMetadata(pdDocument, new FileInputStream(args[2])); + pdDocument.getDocumentCatalog().setMetadata(newMetadata); + } else { + PDDocumentInformation pdInfo = pdDocument.getDocumentInformation(); + Calendar time = Calendar.getInstance(TimeZone.getTimeZone("UTC")); + setInfoEntries(pdInfo, time); + setDocumentVersion(pdDocument, flavour); + setMetadata(pdDocument, flavour, pdInfo.getCreationDate(), time); + } + pdDocument.save(args[1]); + pdDocument.close(); + } + + private static void setInfoEntries(PDDocumentInformation pdInfo, Calendar time) { + pdInfo.setProducer("veraPDF Test Builder 1.0"); + pdInfo.setCreator("veraPDF Test Builder"); + pdInfo.setAuthor("veraPDF Consortium"); + pdInfo.setKeywords(null); + pdInfo.setTitle(null); + pdInfo.setSubject(null); + pdInfo.setModificationDate(time); + Calendar creationDate = pdInfo.getCreationDate(); + if (creationDate == null) { + pdInfo.setCreationDate(time); + } + + } + + private static void setDocumentVersion(PDDocument pdDocument, PDFAFlavour flavour) { + if (flavour.getPart() == PDFAFlavour.Specification.ISO_19005_4) { + pdDocument.getDocument().getTrailer().removeItem(COSName.INFO); + pdDocument.setVersion(2.0f); + pdDocument.getDocument().setVersion(2.0f); + } else if (flavour.getPart() == PDFAFlavour.Specification.ISO_19005_1) { + pdDocument.getDocument().setVersion(1.4f); + } else { + pdDocument.getDocument().setVersion(1.7f); + } + } + + private static String getResourceName(PDFAFlavour flavour) { + if (flavour.getPart() == PDFAFlavour.Specification.ISO_19005_4) { + return "pdf-a4.xmp"; + } + if (flavour.getPart() == PDFAFlavour.Specification.ISO_19005_1) { + return "pdf-a.xmp"; + } + if (flavour.getPart() == PDFAFlavour.Specification.ISO_14289_1) { + return "pdf-ua1.xmp"; + } + return "pdf-a.xmp"; + } + + private static void setMetadata(PDDocument pdDocument, PDFAFlavour flavour, Calendar creationDate, Calendar time) { + String resourceName = getResourceName(flavour); + try (InputStream newXMPData = FixMetadataTool.class.getClassLoader().getResourceAsStream(resourceName)) { + Scanner s = new Scanner(newXMPData).useDelimiter("\\A"); + String meta = s.hasNext() ? s.next() : ""; + meta = meta.replace("CREATION_DATE", getXMPDate(creationDate)); + meta = meta.replace("MOD_DATE", getXMPDate(time)); + + if (flavour != PDFAFlavour.PDFUA_1) { + meta = meta.replace("FLAVOUR_PART", String.valueOf(flavour.getPart().getPartNumber())); + meta = meta.replace("FLAVOUR_LEVEL", PDFAFlavour.PDFA_4 != flavour ? + "pdfaid:conformance=\"" + flavour.getLevel().getCode().toUpperCase() + "\" " : ""); + } + PDMetadata newMetadata = new PDMetadata(pdDocument, new ByteArrayInputStream(meta.getBytes())); + pdDocument.getDocumentCatalog().setMetadata(newMetadata); + } catch (Exception e) { + e.printStackTrace(); + } + } + + private static String getXMPDate(Calendar date) { + return XMPDateTimeFactory.createFromCalendar(DateConverter.toCalendar(DateConverter.toPDFDateFormat(date))).getISO8601String(); + } +} diff --git a/fix-metadata/src/main/resources/pdf-a.xmp b/fix-metadata/src/main/resources/pdf-a.xmp new file mode 100644 index 0000000..08576ac --- /dev/null +++ b/fix-metadata/src/main/resources/pdf-a.xmp @@ -0,0 +1,16 @@ + + + + + + + veraPDF Consortium + + + + + + + + + diff --git a/fix-metadata/src/main/resources/pdf-a4.xmp b/fix-metadata/src/main/resources/pdf-a4.xmp new file mode 100644 index 0000000..a8ce89e --- /dev/null +++ b/fix-metadata/src/main/resources/pdf-a4.xmp @@ -0,0 +1,16 @@ + + + + + + + veraPDF Consortium + + + + + + + + + diff --git a/fix-metadata/src/main/resources/pdf-ua1.xmp b/fix-metadata/src/main/resources/pdf-ua1.xmp new file mode 100644 index 0000000..377b6ab --- /dev/null +++ b/fix-metadata/src/main/resources/pdf-ua1.xmp @@ -0,0 +1,54 @@ + + + + + CREATION_DATE + veraPDF Test Builder + MOD_DATE + 2020-08-10T16:14:45+03:00 + veraPDF Test Builder 1.0 + + application/pdf + + + Outlines-fail + + + + + veraPDF Consortium + + + uuid:9058faed-2a9e-433a-b076-ca9403522301 + uuid:75d77e04-b092-41c1-bee5-d5eacefc1d46 + 1 + + + + PDF/UA Universal Accessibility Schema + http://www.aiim.org/pdfua/ns/id/ + pdfuaid + + + + part + Integer + internal + Indicates, which part of ISO 14289 standard is followed + + + + + + + + + diff --git a/jpeg-files-generation/pom.xml b/jpeg-files-generation/pom.xml new file mode 100644 index 0000000..5e16473 --- /dev/null +++ b/jpeg-files-generation/pom.xml @@ -0,0 +1,69 @@ + + + 4.0.0 + + + verapdf-tools + org.verapdf + 1.0-SNAPSHOT + + + org.verapdf + jpeg-files-generation + 1.0-SNAPSHOT + jpeg-files-generation + + + 8 + 8 + + + + + org.apache.pdfbox + pdfbox + 2.0.26 + + + + com.github.jai-imageio + jai-imageio-jpeg2000 + 1.3.0 + + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + + org.verapdf.tools.JpegFilesGenerationApplication + + + + jar-with-dependencies + + false + + + + make-assembly + package + + single + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + + + diff --git a/jpeg-files-generation/src/main/java/org/verapdf/tools/JpegFilesGenerationApplication.java b/jpeg-files-generation/src/main/java/org/verapdf/tools/JpegFilesGenerationApplication.java new file mode 100644 index 0000000..bbe7399 --- /dev/null +++ b/jpeg-files-generation/src/main/java/org/verapdf/tools/JpegFilesGenerationApplication.java @@ -0,0 +1,110 @@ +package org.verapdf.tools; + +import com.github.jaiimageio.jpeg2000.J2KImageWriteParam; +import org.apache.pdfbox.cos.COSName; +import org.apache.pdfbox.io.IOUtils; +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.PDPage; +import org.apache.pdfbox.pdmodel.PDPageContentStream; +import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceCMYK; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray; +import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB; +import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject; + +import javax.imageio.IIOImage; +import javax.imageio.ImageIO; +import javax.imageio.ImageTypeSpecifier; +import javax.imageio.ImageWriter; +import javax.imageio.metadata.IIOMetadata; +import javax.imageio.stream.ImageOutputStream; +import java.awt.color.ColorSpace; +import java.awt.color.ICC_ColorSpace; +import java.awt.image.BufferedImage; +import java.io.*; + +public class JpegFilesGenerationApplication { + + public static void main(String[] args) throws IOException { + BufferedImage image = ImageIO.read(new File(args[0])); + + File myFile = new File(args[1]); + PDDocument doc = PDDocument.load(myFile); + + PDPage page = doc.getPage(0); + PDPageContentStream contentStream = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND, + false); + + ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); + encodeImageToJPEGStream(image, 1, byteArrayOutputStream); + + PDImageXObject pdImage = new PDImageXObject(doc, new ByteArrayInputStream(byteArrayOutputStream.toByteArray()), + COSName.JPX_DECODE, image.getWidth(), image.getHeight(), + image.getColorModel().getComponentSize(0), + getColorSpaceFromAWT(image)); + contentStream.drawImage(pdImage, 0, 0); + + contentStream.close(); + doc.save("result.pdf"); + doc.close(); + } + + private static PDColorSpace getColorSpaceFromAWT(BufferedImage awtImage) { // returns a PDColorSpace for a given BufferedImage + if (awtImage.getColorModel().getNumComponents() == 1) { + return PDDeviceGray.INSTANCE; // 256 color (gray) JPEG + } + ColorSpace awtColorSpace = awtImage.getColorModel().getColorSpace(); + if (awtColorSpace instanceof ICC_ColorSpace && !awtColorSpace.isCS_sRGB()) { + throw new UnsupportedOperationException("ICC color spaces not implemented"); + } + + switch (awtColorSpace.getType()) { + case ColorSpace.TYPE_RGB: + return PDDeviceRGB.INSTANCE; + case ColorSpace.TYPE_GRAY: + return PDDeviceGray.INSTANCE; + case ColorSpace.TYPE_CMYK: + return PDDeviceCMYK.INSTANCE; + default: + throw new UnsupportedOperationException("color space not implemented: " + awtColorSpace.getType()); + } + } + + private static void encodeImageToJPEGStream(BufferedImage image, float quality, OutputStream out) throws IOException { + ImageOutputStream ios = null; // encode to JPEG + ImageWriter imageWriter = null; + try { + imageWriter = ImageIO.getImageWritersBySuffix("jp2").next(); // find JAI writer + ios = ImageIO.createImageOutputStream(out); + imageWriter.setOutput(ios); + // add compression + J2KImageWriteParam param = (J2KImageWriteParam) imageWriter.getDefaultWriteParam(); + param.setSOP(true); + param.setEPH(true); + param.setWriteCodeStreamOnly(true); + if (quality == 1.0f) { + param.setLossless(true); + //param.setFilter(J2KImageWriteParam.FILTER_53); + } else { + param.setProgressionType("res"); + param.setCompressionMode(J2KImageWriteParam.MODE_EXPLICIT); + param.setCompressionType("JPEG2000"); + param.setLossless(false); + param.setCompressionQuality(quality); + param.setEncodingRate(1.01); + param.setFilter(J2KImageWriteParam.FILTER_97); + } + ImageTypeSpecifier imageTypeSpecifier = new ImageTypeSpecifier(image); + IIOMetadata data = imageWriter.getDefaultImageMetadata(imageTypeSpecifier, param); + imageWriter.write(data, new IIOImage(image, null, null), param); // write + } finally { + IOUtils.closeQuietly(out); // clean up + if (ios != null) { + ios.close(); + } + if (imageWriter != null) { + imageWriter.dispose(); + } + } + } +} diff --git a/outlines-editor/pom.xml b/outlines-editor/pom.xml new file mode 100644 index 0000000..a98cd79 --- /dev/null +++ b/outlines-editor/pom.xml @@ -0,0 +1,60 @@ + + + 4.0.0 + + org.verapdf + outlines-editor + 1.0-SNAPSHOT + + + 8 + 8 + + + + outlines-editor + + + org.apache.maven.plugins + maven-assembly-plugin + + + + org.verapdf.tools.OutlinesEditor + + + + jar-with-dependencies + + false + + + + make-assembly + package + + single + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + + + + + + org.apache.pdfbox + pdfbox + 2.0.26 + compile + + + + diff --git a/outlines-editor/src/main/java/org/verapdf/tools/OutlinesEditor.java b/outlines-editor/src/main/java/org/verapdf/tools/OutlinesEditor.java new file mode 100644 index 0000000..a507e13 --- /dev/null +++ b/outlines-editor/src/main/java/org/verapdf/tools/OutlinesEditor.java @@ -0,0 +1,36 @@ +package org.verapdf.tools; + +import org.apache.pdfbox.pdmodel.PDDocument; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline; +import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem; + +import java.io.*; + +public class OutlinesEditor { + + public static void main(String[] args) throws IOException { + if (args.length < 3) { + System.out.println("Parameters: input pdf file, input text file, output pdf file"); + return; + } + PDDocument document = PDDocument.load(new File(args[0])); + PDDocumentOutline outlines = new PDDocumentOutline(); + document.getDocumentCatalog().setDocumentOutline(outlines); + PDOutlineItem outline = new PDOutlineItem(); + outlines.addFirst(outline); + try (BufferedReader reader = new BufferedReader(new FileReader(args[1]))) { + String line = reader.readLine(); + outline.setTitle(line); + line = reader.readLine(); + while(line != null && !line.isEmpty()) { + PDOutlineItem newOutline = new PDOutlineItem(); + newOutline.setTitle(line); + outline.insertSiblingAfter(newOutline); + outline = newOutline; + line = reader.readLine(); + } + } + document.save(new File(args[2])); + document.close(); + } +} diff --git a/pdf-deprecated-finder/pom.xml b/pdf-deprecated-finder/pom.xml index d745a55..82fcf81 100644 --- a/pdf-deprecated-finder/pom.xml +++ b/pdf-deprecated-finder/pom.xml @@ -17,7 +17,7 @@ org.verapdf.pdfbox pdfbox - [2.0.0,2.1.0) + [2.0.62,2.1.0) @@ -28,7 +28,7 @@ org.verapdf.pdfbox xmpbox - 2.0.44 + [2.0.62,2.1.0) diff --git a/pdf-stream-change/src/main/java/org/verapdf/tools/Cli.java b/pdf-stream-change/src/main/java/org/verapdf/tools/Cli.java index 6e893a4..3eaff91 100644 --- a/pdf-stream-change/src/main/java/org/verapdf/tools/Cli.java +++ b/pdf-stream-change/src/main/java/org/verapdf/tools/Cli.java @@ -5,6 +5,7 @@ import org.apache.pdfbox.pdmodel.PDDocument; import java.io.*; +import java.util.Map; /** * @author Maxim Plushchov @@ -25,16 +26,26 @@ public static void main(String[] args) throws Exception { String textFileName = args[2]; Integer objectNumber = Integer.decode(args[3]); PDDocument document = PDDocument.load(new File(pdfFileName)); - COSObject object = document.getDocument().getObjectFromPool(new COSObjectKey(objectNumber, 0)); + COSObjectKey key = new COSObjectKey(objectNumber, 0); + COSObject object = document.getDocument().getObjectFromPool(key); if (object == null) { System.out.println("Object with number " + objectNumber + " not found."); return; } COSBase base = object.getObject(); - if (!(base instanceof COSStream)) { + if (!(base instanceof COSDictionary)) { System.out.println("Object with number " + objectNumber + " not a stream."); return; } + if (!(base instanceof COSStream)) { + COSStream newBase = new COSStream(); + for (Map.Entry entry : ((COSDictionary)base).entrySet()) { + newBase.setItem(entry.getKey(), entry.getValue()); + } + base = newBase; + object.setObject(base); + System.out.println("Stream added to dictionary " + objectNumber + "."); + } COSStream stream = (COSStream)base; if (READ.equals(args[0])) { try (InputStream in = stream.createInputStream(); OutputStream out = new FileOutputStream(textFileName)) { diff --git a/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyGenerator.java b/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyGenerator.java index 8ff582c..e0c152d 100644 --- a/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyGenerator.java +++ b/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyGenerator.java @@ -4,7 +4,7 @@ import org.verapdf.core.VeraPDFException; import org.verapdf.metadata.fixer.FixerFactory; import org.verapdf.metadata.fixer.MetadataFixerConfig; -import org.verapdf.pdfa.VeraGreenfieldFoundryProvider; +import org.verapdf.gf.foundry.VeraGreenfieldFoundryProvider; import org.verapdf.pdfa.flavours.PDFAFlavour; import org.verapdf.pdfa.validation.profiles.Profiles; import org.verapdf.pdfa.validation.profiles.ValidationProfile; @@ -26,10 +26,13 @@ import java.nio.file.Paths; import java.util.ArrayList; import java.util.EnumSet; +import java.util.LinkedList; import java.util.List; import java.util.logging.Level; import java.util.logging.Logger; +import static java.util.stream.Collectors.toList; + public class PolicyGenerator { private static final String HELP = "[options] \n Options:"; private static final Logger logger = Logger.getLogger(PolicyGenerator.class.getCanonicalName()); @@ -59,31 +62,33 @@ public static void main(String[] args) { try { PolicyGenerator generator = new PolicyGenerator(); + if (commandLine.getArgs().length < 1) { + formatter.printHelp(HELP, options); + return; + } if (commandLine.hasOption("n")) { generator.isLogsEnabled = false; } - if (commandLine.hasOption("p")) { - String profilePath = commandLine.getOptionValue("profile"); - if (profilePath != null) { - try (InputStream is = new FileInputStream(Paths.get(profilePath).toFile())) { - generator.customProfile = Profiles.profileFromXml(is); - } catch (JAXBException | FileNotFoundException e) { - generator.customProfile = null; - logger.log(Level.WARNING, "Error while getting profile from xml file. The profile will be selected automatically"); - } catch (IOException e) { - e.printStackTrace(); + generator.fileName = String.join(" ", commandLine.getArgs()); + if (commandLine.hasOption("v")) { + generator.validate(commandLine.getOptionValue("v"), commandLine.getOptionValue("profile")); + } else { + if (commandLine.hasOption("p")) { + String profilePath = commandLine.getOptionValue("profile"); + if (profilePath != null) { + try (InputStream is = new FileInputStream(Paths.get(profilePath).toFile())) { + generator.customProfile = Profiles.profileFromXml(is); + } catch (JAXBException | FileNotFoundException e) { + generator.customProfile = null; + logger.log(Level.WARNING, "Error while getting profile from xml file. The profile will be selected automatically"); + } catch (IOException e) { + e.printStackTrace(); + } } } + generator.validate(); } - if (commandLine.getArgs().length < 1) { - formatter.printHelp(HELP, options); - return; - } - generator.fileName = String.join(" ", commandLine.getArgs()); - - generator.validate(); generator.generate(); - } catch (IOException e) { e.printStackTrace(); } @@ -97,18 +102,61 @@ private static Options defineOptions() { Option profile = new Option("p", "profile", true, "Specifies path to custom profile"); profile.setRequired(false); options.addOption(profile); + Option verapdfPath = new Option("v", "verapdf_path", true, "path to verapdf"); + verapdfPath.setRequired(false); + options.addOption(verapdfPath); return options; } + private void validate(String verapdfPath, String profilePath) throws IOException { + List command = new LinkedList<>(); + List veraPDFParameters = new LinkedList<>(); + if (isLogsEnabled) { + veraPDFParameters.add("--addlogs"); + } + if (profilePath != null) { + veraPDFParameters.add("--profile"); + veraPDFParameters.add(profilePath); + } + + File tempMrrFile = File.createTempFile("veraPDF", ".mrr"); + tempMrrFile.deleteOnExit(); + veraPDFParameters.add("1>" + tempMrrFile.getAbsolutePath()); + command.add(verapdfPath); + command.addAll(veraPDFParameters); + command.add(fileName); + + command = command.stream().map(parameter -> { + if (parameter.isEmpty()) { + return "\"\""; + } + return parameter; + }).collect(toList()); + + try { + ProcessBuilder pb = new ProcessBuilder(); + pb.command(command); + pb.redirectError(ProcessBuilder.Redirect.INHERIT); + + Process process = pb.start(); + process.waitFor(); + report = new FileInputStream(tempMrrFile); + } catch (IOException | InterruptedException exception) { + exception.printStackTrace(); + } + } + private void validate() throws IOException { - MetadataFixerConfig fixConf = FixerFactory.configFromValues("test", true); + MetadataFixerConfig fixConf = FixerFactory.configFromValues("test"); ProcessorConfig processorConfig = this.customProfile == null ? ProcessorFactory.fromValues( - ValidatorFactory.createConfig(PDFAFlavour.NO_FLAVOUR, PDFAFlavour.PDFA_1_B, true, 0, false, isLogsEnabled, Level.WARNING, BaseValidator.DEFAULT_MAX_NUMBER_OF_DISPLAYED_FAILED_CHECKS), + ValidatorFactory.createConfig(PDFAFlavour.NO_FLAVOUR, PDFAFlavour.PDFA_1_B, true, + 0, false, isLogsEnabled, Level.WARNING, BaseValidator.DEFAULT_MAX_NUMBER_OF_DISPLAYED_FAILED_CHECKS, false, "", false, false), null, null, fixConf, EnumSet.of(TaskType.VALIDATE), (String) null) - : ProcessorFactory.fromValues( - ValidatorFactory.createConfig(PDFAFlavour.NO_FLAVOUR, PDFAFlavour.NO_FLAVOUR, true, 0, false, isLogsEnabled, Level.WARNING, BaseValidator.DEFAULT_MAX_NUMBER_OF_DISPLAYED_FAILED_CHECKS), + : ProcessorFactory.fromValues(ValidatorFactory.createConfig(PDFAFlavour.NO_FLAVOUR, + PDFAFlavour.NO_FLAVOUR, true, 0, false, isLogsEnabled, Level.WARNING, + BaseValidator.DEFAULT_MAX_NUMBER_OF_DISPLAYED_FAILED_CHECKS, false, "", false, false), null, null, fixConf, EnumSet.of(TaskType.VALIDATE), this.customProfile, null); BatchProcessor processor = ProcessorFactory.fileBatchProcessor(processorConfig); @@ -259,7 +307,8 @@ private void appendLogs() { String occurrencesToBeReplaced = node.getAttributes().getNamedItem("occurrences").getNodeValue(); String levelToBeReplaced = node.getAttributes().getNamedItem("level").getNodeValue(); content.append(PolicyHelper.LOG - .replace("{logToBeReplaced}", logToBeReplaced.replace("'", "'")) + .replace("{logToBeReplaced}", logToBeReplaced.replace("'", "'") + .replace(shortFilePath, ".pdf")) .replace("{occurrencesToBeReplaced}", occurrencesToBeReplaced) .replace("{levelToBeReplaced}", levelToBeReplaced)); diff --git a/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyHelper.java b/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyHelper.java index 355fddf..e9840f8 100644 --- a/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyHelper.java +++ b/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyHelper.java @@ -10,7 +10,7 @@ public class PolicyHelper { " \n" + " \n" + "\n" + - " name = \"Checking the validationReport: profile\"\n" + + " \n" + " \n" + " Failed check, Expected: isCompliant=true\n" + " \n" + @@ -25,13 +25,13 @@ public class PolicyHelper { " \n" + "\n" + "\n" + - " name = \"Checking the validationReport: document is not compliant\"\n" + + " \n" + " \n" + " Failed check, Expected: isCompliant=false\n" + " \n" + " \n" + "\n" + - " name = \"Checking the validationReport: rules\"\n" + + " \n" + " \n" + " Failed check, Expected: {failedRulesToBeReplaced}\t\n" + " \n" + @@ -47,18 +47,22 @@ public class PolicyHelper { "\n" + " \n"; public static final String LOGS_REPORT = "\n" + - " name = \"Checking the logs\"\n" + + " \n" + + " \n" + + " Failed check, Expected: contains logs\n" + + " \n" + + "\n" + " \n" + " Failed check, Expected: {logsCountToBeReplaced}\t\n" + " \n"; - public static final String NO_LOGS = "\n name = \"Checking for the absence of logs\"\n" + + public static final String NO_LOGS = "\n \n" + " \n" + " Failed check, Expected: no logs\n" + " \n"; public static final String LOGS = "\n" + " \n" + " \n" + " \n" + "\n" + - " name = \"Checking the taskResult\"\n" + - " \n" + + " \n" + + " \n" + " \n" + " Failed check, Expected Error: {exceptionToBeReplaced}\n" + " \n" + " \n" + " \n" + "\n" + - " name = \"Checking the batchSummary\"\n" + + " \n" + " \n" + " \n" + diff --git a/pom.xml b/pom.xml index 1e7b341..91b4b2a 100644 --- a/pom.xml +++ b/pom.xml @@ -7,7 +7,7 @@ verapdf-parent org.verapdf - 1.21.1 + 1.25.1 org.verapdf @@ -15,7 +15,11 @@ pom 1.0-SNAPSHOT + corpus-wiki-generation + fix-metadata generation-json-from-profile + jpeg-files-generation + outlines-editor pdf-decode-fonts pdf-decode-streams pdf-deprecated-finder @@ -23,12 +27,14 @@ pdf-stream-dump policy-generator preforma-classification + profiles-wiki-generation tagged-structure-merger + verapdf-examples veraPDF-parsers-performance - [1.21.0,1.22.0-RC) + [1.25.0,1.26.0-RC) diff --git a/preforma-classification/src/main/java/org/verapdf/tools/classification/CliProcessor.java b/preforma-classification/src/main/java/org/verapdf/tools/classification/CliProcessor.java index 3c222d1..ba505cb 100644 --- a/preforma-classification/src/main/java/org/verapdf/tools/classification/CliProcessor.java +++ b/preforma-classification/src/main/java/org/verapdf/tools/classification/CliProcessor.java @@ -4,6 +4,7 @@ import javax.xml.transform.TransformerException; import java.io.*; +import java.nio.charset.StandardCharsets; import java.util.*; /** @@ -99,7 +100,6 @@ private void processFiles(final List toProcess) throws FileNotFoundExcepti System.out.println(out.getAbsolutePath()); os = new FileOutputStream(out); } - XsltTransformer.transform(source, xsltIS, - os, arguments); + XsltTransformer.transform(source, xsltIS, new PrintWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)), arguments); } } diff --git a/profiles-wiki-generation/pom.xml b/profiles-wiki-generation/pom.xml new file mode 100644 index 0000000..ee8b498 --- /dev/null +++ b/profiles-wiki-generation/pom.xml @@ -0,0 +1,29 @@ + + + + verapdf-tools + org.verapdf + 1.0-SNAPSHOT + + 4.0.0 + + profiles-wiki-generation + + + 11 + 11 + + + + + + org.verapdf + core + ${verapdf.version} + + + + + diff --git a/profiles-wiki-generation/src/main/java/ProfilesWikiGenerator.java b/profiles-wiki-generation/src/main/java/ProfilesWikiGenerator.java new file mode 100644 index 0000000..95d8639 --- /dev/null +++ b/profiles-wiki-generation/src/main/java/ProfilesWikiGenerator.java @@ -0,0 +1,88 @@ +import org.verapdf.pdfa.validation.profiles.Profiles; +import org.verapdf.pdfa.validation.profiles.Reference; +import org.verapdf.pdfa.validation.profiles.Rule; +import org.verapdf.pdfa.validation.profiles.ValidationProfile; + +import java.io.*; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.SortedSet; +import java.util.TreeSet; + +public class ProfilesWikiGenerator { + + public static String inputFileName = "PDFA-4.xml"; + public static String pdfa1_flavour = "PDF/A-1"; + public static String pdfa2_flavour = "PDF/A-2"; + public static String pdfua1_flavour = "PDF/UA-1"; + public static String pdfa4_flavour = "PDF/A-4"; + public static String flavour = pdfa4_flavour; + public static String outputFileName = "wiki_" + flavour.replace("/","") + ".md"; + + public static void main(String[] args) { + try (InputStream inputStream = Files.newInputStream(Paths.get(inputFileName)); + PrintWriter out = new PrintWriter(outputFileName)) { + ValidationProfile profile = Profiles.profileFromXml(inputStream); + SortedSet rules = new TreeSet<>(new Profiles.RuleComparator()); + rules.addAll(profile.getRules()); + out.println("# " + flavour + " validation rules"); + for (Rule rule : rules) { + out.println("## Rule " + rule.getRuleId().getClause() + "-" + rule.getRuleId().getTestNumber()); + out.println(); + out.println("### Requirement"); + out.println(); + String description = rule.getDescription().replace(" (*) ", "*\n\n>- *"); + out.println(">*" + description + "*"); + out.println(); + out.println("### Error details"); + out.println(); + out.println(rule.getError().getMessage()); + out.println(); + out.println("* Object type: `" + rule.getObject() + "`"); + out.println("* Test condition: `" + rule.getTest() + "`"); + out.println("* Specification: " + getSpecification()); + String levels = getLevels(); + if (levels != null) { + out.println("* Levels: " + levels); + } + if (!rule.getReferences().isEmpty()) { + out.println("* Additional references:"); + for (Reference reference : rule.getReferences()) { + if (reference.getClause().isEmpty()) { + out.println(" * " + reference.getSpecification()); + } else { + out.println(" * " + reference.getSpecification() + ", " + reference.getClause()); + } + } + } + out.println(); + } + } catch (Exception e) { + e.printStackTrace(); + } + } + + private static String getLevels() { + if (pdfa4_flavour.equals(flavour)) { + return "4, 4E, 4F"; + } else if (pdfa1_flavour.equals(flavour)) { + return "A, B"; + } else if (pdfa2_flavour.equals(flavour)) { + return "A, B, E"; + } + return null; + } + + private static String getSpecification() { + if (pdfa4_flavour.equals(flavour)) { + return "ISO 19005-4:2020"; + } else if (pdfa1_flavour.equals(flavour)) { + return "ISO 19005-1:2005"; + } else if (pdfa2_flavour.equals(flavour)) { + return "ISO 19005-2:2011, ISO 19005-3:2012"; + } else if (pdfua1_flavour.equals(flavour)) { + return "ISO 14289-1:2014"; + } + return null; + } +} diff --git a/veraPDF-parsers-performance/pom.xml b/veraPDF-parsers-performance/pom.xml index 2840ca7..6b7c956 100644 --- a/veraPDF-parsers-performance/pom.xml +++ b/veraPDF-parsers-performance/pom.xml @@ -54,12 +54,6 @@ jcommander - - log4j - log4j - 1.2.17 - - junit junit diff --git a/verapdf-examples/pom.xml b/verapdf-examples/pom.xml new file mode 100644 index 0000000..031d7ef --- /dev/null +++ b/verapdf-examples/pom.xml @@ -0,0 +1,25 @@ + + + + verapdf-tools + org.verapdf + 1.0-SNAPSHOT + + 4.0.0 + + org.verapdf + verapdf-example + 1.0-SNAPSHOT + + + + org.verapdf.apps + greenfield-apps + ${verapdf.version} + + + + + diff --git a/verapdf-examples/src/main/java/IOTest.java b/verapdf-examples/src/main/java/IOTest.java new file mode 100644 index 0000000..b2dfc4d --- /dev/null +++ b/verapdf-examples/src/main/java/IOTest.java @@ -0,0 +1,62 @@ +import org.verapdf.core.VeraPDFException; +import org.verapdf.features.FeatureExtractorConfig; +import org.verapdf.features.FeatureFactory; +import org.verapdf.gf.foundry.VeraGreenfieldFoundryProvider; +import org.verapdf.metadata.fixer.FixerFactory; +import org.verapdf.metadata.fixer.MetadataFixerConfig; +import org.verapdf.pdfa.validation.validators.ValidatorConfig; +import org.verapdf.pdfa.validation.validators.ValidatorFactory; +import org.verapdf.processor.*; +import org.verapdf.processor.plugins.PluginsCollectionConfig; + +import java.io.File; +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.EnumSet; +import java.util.List; + +// example from https://docs.verapdf.org/develop/processor/ (veraPDF.github.io\develop\processor\index.md) +public class IOTest { + + public static void main(String[] args) { + // Foundry initialising. Can be changed into PDFBox based one + VeraGreenfieldFoundryProvider.initialise(); + // Default validator config + ValidatorConfig validatorConfig = ValidatorFactory.defaultConfig(); + // or it is possible to select the needed parameters using ValidatorConfigBuilder, for example flavour + // ValidatorConfig validatorConfig = new ValidatorConfigBuilder().flavour(PDFAFlavour.PDFA_4).build(); + FormatOption format = FormatOption.MRR; + //create builder + //add VeraAppConfig + // Default features config + FeatureExtractorConfig featureConfig = FeatureFactory.defaultConfig(); + // Default plugins config + PluginsCollectionConfig pluginsConfig = PluginsCollectionConfig.defaultConfig(); + // Default fixer config + MetadataFixerConfig fixerConfig = FixerFactory.defaultConfig(); + // Tasks configuring + EnumSet tasks = EnumSet.noneOf(TaskType.class); + tasks.add(TaskType.VALIDATE); + tasks.add(TaskType.EXTRACT_FEATURES); + tasks.add(TaskType.FIX_METADATA); + // Creating processor config + ProcessorConfig processorConfig = ProcessorFactory.fromValues(validatorConfig, featureConfig, pluginsConfig, fixerConfig, tasks); + // Creating processor and output stream. In this example output stream is System.out + try (BatchProcessor processor = ProcessorFactory.fileBatchProcessor(processorConfig); + OutputStream reportStream = System.out) { + // Generating list of files for processing + List files = new ArrayList<>(); + files.add(new File("fail.pdf")); + // starting the processor + processor.process(files, ProcessorFactory.getHandler(format, true, reportStream, + processorConfig.getValidatorConfig().isRecordPasses())); + } catch (VeraPDFException e) { + System.err.println("Exception raised while processing batch"); + e.printStackTrace(); + } catch (IOException excep) { + System.err.println("Exception raised closing MRR temp file."); + excep.printStackTrace(); + } + } +} diff --git a/verapdf-examples/src/main/java/IOTest2.java b/verapdf-examples/src/main/java/IOTest2.java new file mode 100644 index 0000000..7758594 --- /dev/null +++ b/verapdf-examples/src/main/java/IOTest2.java @@ -0,0 +1,32 @@ +import org.verapdf.core.EncryptedPdfException; +import org.verapdf.core.ModelParsingException; +import org.verapdf.core.ValidationException; +import org.verapdf.gf.foundry.VeraGreenfieldFoundryProvider; +import org.verapdf.pdfa.Foundries; +import org.verapdf.pdfa.PDFAParser; +import org.verapdf.pdfa.PDFAValidator; +import org.verapdf.pdfa.results.ValidationResult; + +import java.io.FileInputStream; +import java.io.IOException; + +// example from https://docs.verapdf.org/develop/ (veraPDF.github.io\develop\index.md) +public class IOTest2 { + + + public static void main(String[] args) { + VeraGreenfieldFoundryProvider.initialise(); + try (PDFAParser parser = Foundries.defaultInstance().createParser(new FileInputStream("mydoc.pdf"))) { + PDFAValidator validator = Foundries.defaultInstance().createValidator(parser.getFlavour(), false); + ValidationResult result = validator.validate(parser); + if (result.isCompliant()) { + // File is a valid PDF/A 1b + } else { + // it isn't + } + } catch (IOException | ValidationException | ModelParsingException | EncryptedPdfException exception) { + // Exception during validation + } + } + +} diff --git a/verapdf-examples/src/main/java/IOTest3.java b/verapdf-examples/src/main/java/IOTest3.java new file mode 100644 index 0000000..97732d9 --- /dev/null +++ b/verapdf-examples/src/main/java/IOTest3.java @@ -0,0 +1,33 @@ +import org.verapdf.core.EncryptedPdfException; +import org.verapdf.core.ModelParsingException; +import org.verapdf.core.ValidationException; +import org.verapdf.gf.foundry.VeraGreenfieldFoundryProvider; +import org.verapdf.pdfa.Foundries; +import org.verapdf.pdfa.PDFAParser; +import org.verapdf.pdfa.PDFAValidator; +import org.verapdf.pdfa.flavours.PDFAFlavour; +import org.verapdf.pdfa.results.ValidationResult; + +import java.io.FileInputStream; +import java.io.IOException; + +// example from https://docs.verapdf.org/develop/ (veraPDF.github.io\develop\index.md) +public class IOTest3 { + + public static void main(String[] args) { + VeraGreenfieldFoundryProvider.initialise(); + PDFAFlavour flavour = PDFAFlavour.fromString("1b"); + try (PDFAParser parser = Foundries.defaultInstance().createParser(new FileInputStream("mydoc.pdf"), flavour)) { + PDFAValidator validator = Foundries.defaultInstance().createValidator(flavour, false); + ValidationResult result = validator.validate(parser); + if (result.isCompliant()) { + // File is a valid PDF/A 1b + } else { + // it isn't + } + } catch (IOException | ValidationException | ModelParsingException | EncryptedPdfException exception) { + // Exception during validation + } + } + +} diff --git a/verapdf-examples/src/main/java/PluginExample.java b/verapdf-examples/src/main/java/PluginExample.java new file mode 100644 index 0000000..698fe94 --- /dev/null +++ b/verapdf-examples/src/main/java/PluginExample.java @@ -0,0 +1,29 @@ +import java.util.ArrayList; +import java.util.List; +import java.util.logging.Level; +import java.util.logging.Logger; +// and from VeraPDF: +import org.verapdf.core.FeatureParsingException; +import org.verapdf.features.AbstractEmbeddedFileFeaturesExtractor; +import org.verapdf.features.EmbeddedFileFeaturesData; +import org.verapdf.features.tools.FeatureTreeNode; + +// example from https://docs.verapdf.org/plugins/ (veraPDF.github.io\plugins\index.md) +public class PluginExample extends AbstractEmbeddedFileFeaturesExtractor { + + private static final Logger LOGGER = Logger.getLogger(PluginExample.class.getCanonicalName()); + + @Override + public List getEmbeddedFileFeatures(EmbeddedFileFeaturesData embeddedFileFeaturesData) { + List res = new ArrayList<>(); + try { + FeatureTreeNode node = FeatureTreeNode.createRootNode("Hello"); + node.setValue("World"); + res.add(node); + } catch (FeatureParsingException e) { + LOGGER.log(Level.SEVERE, e.getMessage(), e); + } + return res; + } + +}