diff --git a/.github/workflows/test-pr.yml b/.github/workflows/test-pr.yml
new file mode 100644
index 0000000..774f7c5
--- /dev/null
+++ b/.github/workflows/test-pr.yml
@@ -0,0 +1,35 @@
+name: PR QA
+
+on:
+ pull_request:
+ types: [opened, synchronize, reopened]
+
+jobs:
+ build:
+ name: Checkout and Build
+ runs-on: ubuntu-20.04
+
+ strategy:
+ matrix:
+ java-version: [8, 11, 16, 17]
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: JDK setup
+ uses: actions/setup-java@v2
+ with:
+ java-version: ${{ matrix.java-version }}
+ distribution: 'temurin'
+ cache: maven
+ - name: Build with Maven
+ run: mvn --batch-mode --update-snapshots verify
+
+ coverage:
+ name: Quality Assurance
+ runs-on: ubuntu-20.04
+ needs: [ build ]
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: Codacy analysis reporting
+ uses: codacy/codacy-analysis-cli-action@master
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
new file mode 100644
index 0000000..72a05c9
--- /dev/null
+++ b/.github/workflows/test.yml
@@ -0,0 +1,23 @@
+name: Test
+
+on:
+ workflow_call:
+
+jobs:
+ build:
+ name: Checkout and Build
+ runs-on: ubuntu-20.04
+
+ steps:
+ - uses: actions/checkout@v2
+ - name: JDK setup
+ uses: actions/setup-java@v2
+ with:
+ java-version: 11
+ distribution: 'temurin'
+ cache: maven
+ - name: Test
+ run: cd policy-generator
+ mvn clean package
+ cd target
+ java -jar policy-generator.jar
diff --git a/corpus-wiki-generation/pom.xml b/corpus-wiki-generation/pom.xml
new file mode 100644
index 0000000..d55c995
--- /dev/null
+++ b/corpus-wiki-generation/pom.xml
@@ -0,0 +1,72 @@
+
+
+ 4.0.0
+
+
+ verapdf-tools
+ org.verapdf
+ 1.0-SNAPSHOT
+
+
+ org.verapdf
+ corpus-wiki-generator
+ 1.0-SNAPSHOT
+
+
+ 11
+ 11
+
+
+
+
+ org.verapdf
+ validation-model
+ ${verapdf.version}
+
+
+
+ org.apache.pdfbox
+ pdfbox
+ 2.0.26
+ compile
+
+
+
+
+
+
+
+ maven-compiler-plugin
+
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+
+
+
+ org.verapdf.tools.CorpusWikiGenerator
+
+
+
+ jar-with-dependencies
+
+ false
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
+
+
diff --git a/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusDownload.java b/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusDownload.java
new file mode 100644
index 0000000..b827ccb
--- /dev/null
+++ b/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusDownload.java
@@ -0,0 +1,78 @@
+package org.verapdf.tools;
+
+
+import java.io.*;
+import java.net.HttpURLConnection;
+import java.net.URL;
+import java.net.URLDecoder;
+import java.util.HashMap;
+import java.util.Map;
+
+//code from integration-tests
+public class CorpusDownload {
+
+ public static File createTempFileFromCorpus(final URL downloadLoc, final String prefix) throws IOException {
+ File tempFile = File.createTempFile(prefix, ".zip");
+ System.out.println("Downloading: " + downloadLoc + ", to temp:" + tempFile);
+ int totalBytes = 0;
+ try (OutputStream output = new FileOutputStream(tempFile);
+ InputStream corpusInput = handleRedirects(downloadLoc)) {
+ byte[] buffer = new byte[8 * 1024];
+ int bytesRead;
+ while ((bytesRead = corpusInput.read(buffer)) != -1) {
+ output.write(buffer, 0, bytesRead);
+ totalBytes += bytesRead;
+ }
+ }
+ System.out.println("Downloaded: " + totalBytes + " bytes");
+ tempFile.deleteOnExit();
+ return tempFile;
+ }
+
+ static InputStream handleRedirects(URL url) throws IOException {
+ if (!url.getProtocol().startsWith("http")) {
+ return url.openStream();
+ }
+ System.err.println("Prot:" + url.getProtocol());
+ URL resourceUrl;
+ URL base;
+ URL next;
+ Map visited;
+ HttpURLConnection conn;
+ String location;
+ String urlString = url.toExternalForm();
+ int times;
+
+ visited = new HashMap<>();
+
+ while (true) {
+ times = visited.compute(urlString, (key, count) -> count == null ? 1 : count + 1);
+
+ if (times > 3)
+ throw new IOException("Stuck in redirect loop");
+
+ resourceUrl = new URL(urlString);
+ conn = (HttpURLConnection) resourceUrl.openConnection();
+
+ conn.setConnectTimeout(15000);
+ conn.setReadTimeout(15000);
+ conn.setInstanceFollowRedirects(false); // Make the logic below easier to detect redirections
+ conn.setRequestProperty("User-Agent", "Mozilla/5.0...");
+
+ switch (conn.getResponseCode()) {
+ case HttpURLConnection.HTTP_MOVED_PERM:
+ case HttpURLConnection.HTTP_MOVED_TEMP:
+ location = conn.getHeaderField("Location");
+ location = URLDecoder.decode(location, "UTF-8");
+ base = new URL(urlString);
+ next = new URL(base, location); // Deal with relative URLs
+ urlString = next.toExternalForm();
+ continue;
+ }
+
+ break;
+ }
+
+ return conn.getInputStream();
+ }
+}
diff --git a/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusWikiGenerator.java b/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusWikiGenerator.java
new file mode 100644
index 0000000..28fc9cd
--- /dev/null
+++ b/corpus-wiki-generation/src/main/java/org/verapdf/tools/CorpusWikiGenerator.java
@@ -0,0 +1,212 @@
+package org.verapdf.tools;
+
+import java.io.*;
+import java.net.URI;
+import java.util.*;
+import java.util.zip.ZipEntry;
+import java.util.zip.ZipFile;
+
+import org.apache.pdfbox.text.PDFTextStripper;
+import org.verapdf.exceptions.InvalidPasswordException;
+import org.verapdf.pd.PDDocument;
+import org.verapdf.pd.PDOutlineDictionary;
+import org.verapdf.pd.PDOutlineItem;
+
+/**
+ * @author Maxim Plushchou
+ */
+public class CorpusWikiGenerator {
+
+ private static final String veraUrl = "https://github.com/veraPDF/veraPDF-corpus/archive/staging.zip";
+ private static final String LINK_START = "https://raw.githubusercontent.com/veraPDF/veraPDF-corpus/staging/";
+ private static final String STAGING = "staging";
+ private static final String PDF_UA_1 = "PDF_UA-1";
+ private static final String EXPECTED_MESSAGE = "expected message";
+ private static final String PDF_EXTENSION = ".pdf";
+ private static PrintWriter writer;
+ private static String corpusPart;
+
+ public static void main(String[] args) throws IOException {
+ writer = new PrintWriter(new FileOutputStream("test.md"));
+ File zipFile;
+ try {
+ zipFile = CorpusDownload.createTempFileFromCorpus(URI.create(veraUrl).toURL(), "corpusWiki");
+ } catch (IOException excep) {
+ throw new IllegalStateException(excep);
+ }
+ ZipFile zipSource = new ZipFile(zipFile);
+ Enumeration extends ZipEntry> entries = zipSource.entries();
+ SortedSet entriesSet = new TreeSet<>(new ZipEntryComparator());
+ while (entries.hasMoreElements()) {
+ entriesSet.add(entries.nextElement());
+ }
+ for (ZipEntry entry : entriesSet) {
+ if (entry.isDirectory()) {
+ printDirectory(entry);
+ } else if (entry.getName().endsWith(PDF_EXTENSION)) {
+ try {
+ printFileDescription(zipSource, entry);
+ } catch (InvalidPasswordException e) {
+ writer.println("Encrypted pdf");
+ System.out.println(entry.getName() + ": Encrypted pdf");
+ }
+ }
+ }
+ }
+
+ private static int getHeadingLevel(String directoryName) {
+ return directoryName.length() - directoryName.replace("/","").length();
+ }
+
+ private static void printDirectory(ZipEntry entry) throws FileNotFoundException {
+ String directoryName = entry.getName();
+ directoryName = directoryName.substring(directoryName.indexOf(STAGING) + STAGING.length() + 1);
+ int headingLevel = getHeadingLevel(directoryName);
+ if (!directoryName.isEmpty()) {
+ directoryName = directoryName.substring(0, directoryName.length() - 1);
+ }
+ if (!directoryName.isEmpty() && headingLevel > 0) {
+ directoryName = directoryName.substring(directoryName.lastIndexOf("/") + 1);
+ }
+ if (directoryName.isEmpty()) {
+ return;
+ }
+ if (headingLevel == 1) {
+ corpusPart = directoryName;
+ writer.flush();
+ writer.close();
+ writer = new PrintWriter(new FileOutputStream(directoryName + ".md"));
+ } else {
+ printHeading(directoryName, headingLevel);
+ }
+ }
+
+ private static void printHeading(String directoryName, int headingLevel) {
+ for (int i = 1; i < headingLevel; i++) {
+ writer.print("#");
+ }
+ writer.println(" " + directoryName);
+ writer.println();
+ }
+
+ private static void printFileDescription(ZipFile zipSource, ZipEntry entry) throws IOException {
+ PDDocument document = new PDDocument(zipSource.getInputStream(entry));
+ printFileName(entry);
+ printLinkToFile(entry);
+ PDOutlineDictionary outlines = document.getOutlines();
+ if (outlines != null) {
+ PDOutlineItem outlineItem = outlines.getFirst();
+ if (outlineItem != null) {
+ if (outlineItem.getTitle() == null) {
+ writer.println(" null title");
+ }
+ if (PDF_UA_1.equals(corpusPart)) {
+ printTextFromPDFUAOutlines(outlineItem);
+ } else {
+ printTextFromOutlines(outlineItem);
+ }
+ } else {
+ printTextFromPagesContents(zipSource, entry);
+ }
+ } else {
+ printTextFromPagesContents(zipSource, entry);
+ }
+ writer.println();
+ document.close();
+ }
+
+ private static void printTextFromPagesContents(ZipFile zipSource, ZipEntry entry) throws IOException {
+ org.apache.pdfbox.pdmodel.PDDocument pdDocument = org.apache.pdfbox.pdmodel.PDDocument.load(zipSource.getInputStream(entry));
+ PDFTextStripper pdfStripper = new PDFTextStripper();
+ String text = pdfStripper.getText(pdDocument);
+ String[] messages = text.split("\n");
+ int outlinesIndex = -1;
+ for (int i = 0; i < messages.length; i++) {
+ if (messages[i].contains("Outlines:")) {
+ outlinesIndex = i;
+ }
+ }
+ writer.print(": ");
+ for (int i = outlinesIndex + 2; i < messages.length - 1; i++) {
+ messages[i] = messages[i].replace("\r","");
+ if (stringStartsWithLabel(messages[i])) {
+ messages[i] = messages[i].substring(2);
+ }
+ if (i == messages.length - 2 || stringStartsWithLabel(messages[i + 1])) {
+ messages[i] = messages[i] + ".";
+ writer.println(messages[i]);
+ } else {
+ writer.print(messages[i] + " ");
+ }
+ }
+ pdDocument.close();
+ }
+
+ private static boolean stringStartsWithLabel(String str) {
+ return str.startsWith("- ") || str.startsWith("• ");
+ }
+
+ private static void printFileName(ZipEntry entry) {
+ String fileName = entry.getName();
+ fileName = fileName.substring(fileName.lastIndexOf("/") + 1);
+ writer.print("[" + fileName + "]");
+ }
+
+ private static void printLinkToFile(ZipEntry entry) {
+ String fileLink = entry.getName();
+ fileLink = fileLink.substring(fileLink.indexOf(STAGING) + STAGING.length() + 1).replace(" ", "%20");
+ writer.print("(" + LINK_START + fileLink + ")");
+ }
+
+ private static void printTextFromPDFUAOutlines(PDOutlineItem outlineItem) {
+ writer.print(": ");
+ outlineItem = outlineItem.getNext();
+ while (outlineItem != null && outlineItem.getNext() != null) {
+ if (outlineItem.getTitle() != null && outlineItem.getTitle().length() < 15_000) {
+ String title = outlineItem.getTitle();
+ title = getCorrectMDString(title);
+ title = title.replace("\n", "");
+ if (!title.endsWith(".")) {
+ title = title + ".";
+ }
+ writer.println(title);
+ printChildrenOutlines(outlineItem.getFirst());
+ }
+ outlineItem = outlineItem.getNext();
+ }
+ }
+
+ private static String getCorrectMDString(String str) {
+ return str.replace("<", "\\<").replace(">","\\>");
+ }
+
+ private static void printChildrenOutlines(PDOutlineItem outlineItem) {
+ while (outlineItem != null) {
+ if (outlineItem.getTitle() != null) {
+ writer.println(outlineItem.getTitle());
+ }
+ printChildrenOutlines(outlineItem.getFirst());
+ outlineItem = outlineItem.getNext();
+ }
+ }
+
+ private static void printTextFromOutlines(PDOutlineItem outlineItem) {
+ boolean isPrinted = false;
+ while (outlineItem != null) {
+ isPrinted = printTitle(outlineItem.getTitle(), isPrinted);
+ printTextFromOutlines(outlineItem.getFirst());
+ outlineItem = outlineItem.getNext();
+ }
+ }
+
+ private static boolean printTitle(String string, boolean isPrinted) {
+ if (string.contains(EXPECTED_MESSAGE) || isPrinted) {
+ String message = string.replace(EXPECTED_MESSAGE, "");
+ if (message.length() < 15_000) {
+ writer.println(message);
+ }
+ return true;
+ }
+ return false;
+ }
+}
diff --git a/corpus-wiki-generation/src/main/java/org/verapdf/tools/ZipEntryComparator.java b/corpus-wiki-generation/src/main/java/org/verapdf/tools/ZipEntryComparator.java
new file mode 100644
index 0000000..519b1d8
--- /dev/null
+++ b/corpus-wiki-generation/src/main/java/org/verapdf/tools/ZipEntryComparator.java
@@ -0,0 +1,79 @@
+package org.verapdf.tools;
+
+import java.util.Comparator;
+import java.util.zip.ZipEntry;
+
+public class ZipEntryComparator implements Comparator {
+
+ @Override
+ public int compare(ZipEntry o1, ZipEntry o2) {
+ String name1 = o1.getName();
+ String name2 = o2.getName();
+ return compare(name1, name2);
+ }
+
+ public int compare(String name1, String name2) {
+ int commonLength = getCommonStartLength(name1, name2);
+ int start = getNotNumberStartLength(name1, commonLength);
+ String substring1 = name1.substring(start);
+ String substring2 = name2.substring(start);
+ substring1 = substring1.substring(0, getNumberStartLength(substring1));
+ substring2 = substring2.substring(0, getNumberStartLength(substring2));
+ Integer int1 = getIntegerFromString(substring1);
+ Integer int2 = getIntegerFromString(substring2);
+ if (int1 != null && int2 != null && !int1.equals(int2)) {
+ return int1 - int2;
+ }
+ return name1.compareTo(name2);
+ }
+
+ public static int getCommonStartLength(String s1, String s2) {
+ return getCommonStartLength(s1, s2, Math.min(s1.length(), s2.length()));
+ }
+
+ private static int getCommonStartLength(String s1, String s2, int length) {
+ for (int i = 0; i < length; i++) {
+ if (s1.charAt(i) != s2.charAt(i)) {
+ return i;
+ }
+ }
+ return length;
+ }
+
+ protected static int getNotNumberStartLength(String string, int commonStartLength) {
+ return getNotRegexStartLength(string, commonStartLength, "\\d+");
+ }
+
+ private static int getNotRegexStartLength(String string, int commonStartLength, String regex) {
+ if (commonStartLength == 0) {
+ return 0;
+ }
+ for (int i = commonStartLength; i > 0; i--) {
+ if (!string.substring(i - 1, i).matches(regex)) {
+ return i;
+ }
+ }
+ return 0;
+ }
+
+ protected static int getNumberStartLength(String string) {
+ return getRegexStartLength(string, "\\d+");
+ }
+
+ public static int getRegexStartLength(String string, String regex) {
+ for (int i = 0; i < string.length(); i++) {
+ if (!string.substring(i, i + 1).matches(regex)) {
+ return i;
+ }
+ }
+ return string.length();
+ }
+
+ public static Integer getIntegerFromString(String string) {
+ try {
+ return Integer.parseUnsignedInt(string);
+ } catch (NumberFormatException ignored) {
+ }
+ return null;
+ }
+}
diff --git a/fix-metadata/pom.xml b/fix-metadata/pom.xml
new file mode 100644
index 0000000..4fe800d
--- /dev/null
+++ b/fix-metadata/pom.xml
@@ -0,0 +1,77 @@
+
+
+
+ verapdf-tools
+ org.verapdf
+ 1.0-SNAPSHOT
+
+ 4.0.0
+
+ fix-metadata
+
+
+
+
+ org.apache.pdfbox
+ pdfbox
+ 2.0.24
+
+
+
+ org.verapdf
+ validation-model
+ ${verapdf.version}
+
+
+
+
+
+
+
+
+ src/main/resources
+
+ pdf-a.xmp
+ pdf-a4.xmp
+ pdf-ua1.xmp
+
+
+
+
+
+
+
+ maven-compiler-plugin
+
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+
+
+
+ org.verapdf.tools.FixMetadataTool
+
+
+
+ jar-with-dependencies
+
+ false
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
+
+
diff --git a/fix-metadata/src/main/java/org/verapdf/tools/FixMetadataTool.java b/fix-metadata/src/main/java/org/verapdf/tools/FixMetadataTool.java
new file mode 100644
index 0000000..838511b
--- /dev/null
+++ b/fix-metadata/src/main/java/org/verapdf/tools/FixMetadataTool.java
@@ -0,0 +1,106 @@
+package org.verapdf.tools;
+
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDDocumentInformation;
+import org.apache.pdfbox.pdmodel.common.PDMetadata;
+import org.verapdf.metadata.fixer.gf.utils.DateConverter;
+import org.verapdf.pdfa.flavours.PDFAFlavour;
+import org.verapdf.xmp.XMPDateTimeFactory;
+
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.*;
+
+public class FixMetadataTool {
+ public static void main(String[] args) throws Exception {
+ if (args.length < 3) {
+ System.out.println("arguments: inputFile outputFile flavourId/xmpFileName");
+ System.out.println("possible flavourIds: " +
+ Arrays.toString(PDFAFlavour.values())
+ .replaceFirst("0, ", "")
+ .replace(", wcag2", ""));
+ return;
+ }
+ PDDocument pdDocument = PDDocument.load(new File(args[0]));
+ PDFAFlavour flavour = PDFAFlavour.byFlavourId(args[2]);
+ if (flavour == PDFAFlavour.NO_FLAVOUR) {
+ PDMetadata newMetadata = new PDMetadata(pdDocument, new FileInputStream(args[2]));
+ pdDocument.getDocumentCatalog().setMetadata(newMetadata);
+ } else {
+ PDDocumentInformation pdInfo = pdDocument.getDocumentInformation();
+ Calendar time = Calendar.getInstance(TimeZone.getTimeZone("UTC"));
+ setInfoEntries(pdInfo, time);
+ setDocumentVersion(pdDocument, flavour);
+ setMetadata(pdDocument, flavour, pdInfo.getCreationDate(), time);
+ }
+ pdDocument.save(args[1]);
+ pdDocument.close();
+ }
+
+ private static void setInfoEntries(PDDocumentInformation pdInfo, Calendar time) {
+ pdInfo.setProducer("veraPDF Test Builder 1.0");
+ pdInfo.setCreator("veraPDF Test Builder");
+ pdInfo.setAuthor("veraPDF Consortium");
+ pdInfo.setKeywords(null);
+ pdInfo.setTitle(null);
+ pdInfo.setSubject(null);
+ pdInfo.setModificationDate(time);
+ Calendar creationDate = pdInfo.getCreationDate();
+ if (creationDate == null) {
+ pdInfo.setCreationDate(time);
+ }
+
+ }
+
+ private static void setDocumentVersion(PDDocument pdDocument, PDFAFlavour flavour) {
+ if (flavour.getPart() == PDFAFlavour.Specification.ISO_19005_4) {
+ pdDocument.getDocument().getTrailer().removeItem(COSName.INFO);
+ pdDocument.setVersion(2.0f);
+ pdDocument.getDocument().setVersion(2.0f);
+ } else if (flavour.getPart() == PDFAFlavour.Specification.ISO_19005_1) {
+ pdDocument.getDocument().setVersion(1.4f);
+ } else {
+ pdDocument.getDocument().setVersion(1.7f);
+ }
+ }
+
+ private static String getResourceName(PDFAFlavour flavour) {
+ if (flavour.getPart() == PDFAFlavour.Specification.ISO_19005_4) {
+ return "pdf-a4.xmp";
+ }
+ if (flavour.getPart() == PDFAFlavour.Specification.ISO_19005_1) {
+ return "pdf-a.xmp";
+ }
+ if (flavour.getPart() == PDFAFlavour.Specification.ISO_14289_1) {
+ return "pdf-ua1.xmp";
+ }
+ return "pdf-a.xmp";
+ }
+
+ private static void setMetadata(PDDocument pdDocument, PDFAFlavour flavour, Calendar creationDate, Calendar time) {
+ String resourceName = getResourceName(flavour);
+ try (InputStream newXMPData = FixMetadataTool.class.getClassLoader().getResourceAsStream(resourceName)) {
+ Scanner s = new Scanner(newXMPData).useDelimiter("\\A");
+ String meta = s.hasNext() ? s.next() : "";
+ meta = meta.replace("CREATION_DATE", getXMPDate(creationDate));
+ meta = meta.replace("MOD_DATE", getXMPDate(time));
+
+ if (flavour != PDFAFlavour.PDFUA_1) {
+ meta = meta.replace("FLAVOUR_PART", String.valueOf(flavour.getPart().getPartNumber()));
+ meta = meta.replace("FLAVOUR_LEVEL", PDFAFlavour.PDFA_4 != flavour ?
+ "pdfaid:conformance=\"" + flavour.getLevel().getCode().toUpperCase() + "\" " : "");
+ }
+ PDMetadata newMetadata = new PDMetadata(pdDocument, new ByteArrayInputStream(meta.getBytes()));
+ pdDocument.getDocumentCatalog().setMetadata(newMetadata);
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ private static String getXMPDate(Calendar date) {
+ return XMPDateTimeFactory.createFromCalendar(DateConverter.toCalendar(DateConverter.toPDFDateFormat(date))).getISO8601String();
+ }
+}
diff --git a/fix-metadata/src/main/resources/pdf-a.xmp b/fix-metadata/src/main/resources/pdf-a.xmp
new file mode 100644
index 0000000..08576ac
--- /dev/null
+++ b/fix-metadata/src/main/resources/pdf-a.xmp
@@ -0,0 +1,16 @@
+
+
+
+
+
+
+ veraPDF Consortium
+
+
+
+
+
+
+
+
+
diff --git a/fix-metadata/src/main/resources/pdf-a4.xmp b/fix-metadata/src/main/resources/pdf-a4.xmp
new file mode 100644
index 0000000..a8ce89e
--- /dev/null
+++ b/fix-metadata/src/main/resources/pdf-a4.xmp
@@ -0,0 +1,16 @@
+
+
+
+
+
+
+ veraPDF Consortium
+
+
+
+
+
+
+
+
+
diff --git a/fix-metadata/src/main/resources/pdf-ua1.xmp b/fix-metadata/src/main/resources/pdf-ua1.xmp
new file mode 100644
index 0000000..377b6ab
--- /dev/null
+++ b/fix-metadata/src/main/resources/pdf-ua1.xmp
@@ -0,0 +1,54 @@
+
+
+
+
+ CREATION_DATE
+ veraPDF Test Builder
+ MOD_DATE
+ 2020-08-10T16:14:45+03:00
+ veraPDF Test Builder 1.0
+
+ application/pdf
+
+
+ Outlines-fail
+
+
+
+
+ veraPDF Consortium
+
+
+ uuid:9058faed-2a9e-433a-b076-ca9403522301
+ uuid:75d77e04-b092-41c1-bee5-d5eacefc1d46
+ 1
+
+
+
+ PDF/UA Universal Accessibility Schema
+ http://www.aiim.org/pdfua/ns/id/
+ pdfuaid
+
+
+
+ part
+ Integer
+ internal
+ Indicates, which part of ISO 14289 standard is followed
+
+
+
+
+
+
+
+
+
diff --git a/jpeg-files-generation/pom.xml b/jpeg-files-generation/pom.xml
new file mode 100644
index 0000000..5e16473
--- /dev/null
+++ b/jpeg-files-generation/pom.xml
@@ -0,0 +1,69 @@
+
+
+ 4.0.0
+
+
+ verapdf-tools
+ org.verapdf
+ 1.0-SNAPSHOT
+
+
+ org.verapdf
+ jpeg-files-generation
+ 1.0-SNAPSHOT
+ jpeg-files-generation
+
+
+ 8
+ 8
+
+
+
+
+ org.apache.pdfbox
+ pdfbox
+ 2.0.26
+
+
+
+ com.github.jai-imageio
+ jai-imageio-jpeg2000
+ 1.3.0
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+
+
+
+ org.verapdf.tools.JpegFilesGenerationApplication
+
+
+
+ jar-with-dependencies
+
+ false
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+
+
+
diff --git a/jpeg-files-generation/src/main/java/org/verapdf/tools/JpegFilesGenerationApplication.java b/jpeg-files-generation/src/main/java/org/verapdf/tools/JpegFilesGenerationApplication.java
new file mode 100644
index 0000000..bbe7399
--- /dev/null
+++ b/jpeg-files-generation/src/main/java/org/verapdf/tools/JpegFilesGenerationApplication.java
@@ -0,0 +1,110 @@
+package org.verapdf.tools;
+
+import com.github.jaiimageio.jpeg2000.J2KImageWriteParam;
+import org.apache.pdfbox.cos.COSName;
+import org.apache.pdfbox.io.IOUtils;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.PDPageContentStream;
+import org.apache.pdfbox.pdmodel.graphics.color.PDColorSpace;
+import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceCMYK;
+import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceGray;
+import org.apache.pdfbox.pdmodel.graphics.color.PDDeviceRGB;
+import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
+
+import javax.imageio.IIOImage;
+import javax.imageio.ImageIO;
+import javax.imageio.ImageTypeSpecifier;
+import javax.imageio.ImageWriter;
+import javax.imageio.metadata.IIOMetadata;
+import javax.imageio.stream.ImageOutputStream;
+import java.awt.color.ColorSpace;
+import java.awt.color.ICC_ColorSpace;
+import java.awt.image.BufferedImage;
+import java.io.*;
+
+public class JpegFilesGenerationApplication {
+
+ public static void main(String[] args) throws IOException {
+ BufferedImage image = ImageIO.read(new File(args[0]));
+
+ File myFile = new File(args[1]);
+ PDDocument doc = PDDocument.load(myFile);
+
+ PDPage page = doc.getPage(0);
+ PDPageContentStream contentStream = new PDPageContentStream(doc, page, PDPageContentStream.AppendMode.APPEND,
+ false);
+
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ encodeImageToJPEGStream(image, 1, byteArrayOutputStream);
+
+ PDImageXObject pdImage = new PDImageXObject(doc, new ByteArrayInputStream(byteArrayOutputStream.toByteArray()),
+ COSName.JPX_DECODE, image.getWidth(), image.getHeight(),
+ image.getColorModel().getComponentSize(0),
+ getColorSpaceFromAWT(image));
+ contentStream.drawImage(pdImage, 0, 0);
+
+ contentStream.close();
+ doc.save("result.pdf");
+ doc.close();
+ }
+
+ private static PDColorSpace getColorSpaceFromAWT(BufferedImage awtImage) { // returns a PDColorSpace for a given BufferedImage
+ if (awtImage.getColorModel().getNumComponents() == 1) {
+ return PDDeviceGray.INSTANCE; // 256 color (gray) JPEG
+ }
+ ColorSpace awtColorSpace = awtImage.getColorModel().getColorSpace();
+ if (awtColorSpace instanceof ICC_ColorSpace && !awtColorSpace.isCS_sRGB()) {
+ throw new UnsupportedOperationException("ICC color spaces not implemented");
+ }
+
+ switch (awtColorSpace.getType()) {
+ case ColorSpace.TYPE_RGB:
+ return PDDeviceRGB.INSTANCE;
+ case ColorSpace.TYPE_GRAY:
+ return PDDeviceGray.INSTANCE;
+ case ColorSpace.TYPE_CMYK:
+ return PDDeviceCMYK.INSTANCE;
+ default:
+ throw new UnsupportedOperationException("color space not implemented: " + awtColorSpace.getType());
+ }
+ }
+
+ private static void encodeImageToJPEGStream(BufferedImage image, float quality, OutputStream out) throws IOException {
+ ImageOutputStream ios = null; // encode to JPEG
+ ImageWriter imageWriter = null;
+ try {
+ imageWriter = ImageIO.getImageWritersBySuffix("jp2").next(); // find JAI writer
+ ios = ImageIO.createImageOutputStream(out);
+ imageWriter.setOutput(ios);
+ // add compression
+ J2KImageWriteParam param = (J2KImageWriteParam) imageWriter.getDefaultWriteParam();
+ param.setSOP(true);
+ param.setEPH(true);
+ param.setWriteCodeStreamOnly(true);
+ if (quality == 1.0f) {
+ param.setLossless(true);
+ //param.setFilter(J2KImageWriteParam.FILTER_53);
+ } else {
+ param.setProgressionType("res");
+ param.setCompressionMode(J2KImageWriteParam.MODE_EXPLICIT);
+ param.setCompressionType("JPEG2000");
+ param.setLossless(false);
+ param.setCompressionQuality(quality);
+ param.setEncodingRate(1.01);
+ param.setFilter(J2KImageWriteParam.FILTER_97);
+ }
+ ImageTypeSpecifier imageTypeSpecifier = new ImageTypeSpecifier(image);
+ IIOMetadata data = imageWriter.getDefaultImageMetadata(imageTypeSpecifier, param);
+ imageWriter.write(data, new IIOImage(image, null, null), param); // write
+ } finally {
+ IOUtils.closeQuietly(out); // clean up
+ if (ios != null) {
+ ios.close();
+ }
+ if (imageWriter != null) {
+ imageWriter.dispose();
+ }
+ }
+ }
+}
diff --git a/outlines-editor/pom.xml b/outlines-editor/pom.xml
new file mode 100644
index 0000000..a98cd79
--- /dev/null
+++ b/outlines-editor/pom.xml
@@ -0,0 +1,60 @@
+
+
+ 4.0.0
+
+ org.verapdf
+ outlines-editor
+ 1.0-SNAPSHOT
+
+
+ 8
+ 8
+
+
+
+ outlines-editor
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+
+
+
+ org.verapdf.tools.OutlinesEditor
+
+
+
+ jar-with-dependencies
+
+ false
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+
+
+
+
+
+ org.apache.pdfbox
+ pdfbox
+ 2.0.26
+ compile
+
+
+
+
diff --git a/outlines-editor/src/main/java/org/verapdf/tools/OutlinesEditor.java b/outlines-editor/src/main/java/org/verapdf/tools/OutlinesEditor.java
new file mode 100644
index 0000000..a507e13
--- /dev/null
+++ b/outlines-editor/src/main/java/org/verapdf/tools/OutlinesEditor.java
@@ -0,0 +1,36 @@
+package org.verapdf.tools;
+
+import org.apache.pdfbox.pdmodel.PDDocument;
+import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
+import org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
+
+import java.io.*;
+
+public class OutlinesEditor {
+
+ public static void main(String[] args) throws IOException {
+ if (args.length < 3) {
+ System.out.println("Parameters: input pdf file, input text file, output pdf file");
+ return;
+ }
+ PDDocument document = PDDocument.load(new File(args[0]));
+ PDDocumentOutline outlines = new PDDocumentOutline();
+ document.getDocumentCatalog().setDocumentOutline(outlines);
+ PDOutlineItem outline = new PDOutlineItem();
+ outlines.addFirst(outline);
+ try (BufferedReader reader = new BufferedReader(new FileReader(args[1]))) {
+ String line = reader.readLine();
+ outline.setTitle(line);
+ line = reader.readLine();
+ while(line != null && !line.isEmpty()) {
+ PDOutlineItem newOutline = new PDOutlineItem();
+ newOutline.setTitle(line);
+ outline.insertSiblingAfter(newOutline);
+ outline = newOutline;
+ line = reader.readLine();
+ }
+ }
+ document.save(new File(args[2]));
+ document.close();
+ }
+}
diff --git a/pdf-deprecated-finder/pom.xml b/pdf-deprecated-finder/pom.xml
index d745a55..82fcf81 100644
--- a/pdf-deprecated-finder/pom.xml
+++ b/pdf-deprecated-finder/pom.xml
@@ -17,7 +17,7 @@
org.verapdf.pdfbox
pdfbox
- [2.0.0,2.1.0)
+ [2.0.62,2.1.0)
@@ -28,7 +28,7 @@
org.verapdf.pdfbox
xmpbox
- 2.0.44
+ [2.0.62,2.1.0)
diff --git a/pdf-stream-change/src/main/java/org/verapdf/tools/Cli.java b/pdf-stream-change/src/main/java/org/verapdf/tools/Cli.java
index 6e893a4..3eaff91 100644
--- a/pdf-stream-change/src/main/java/org/verapdf/tools/Cli.java
+++ b/pdf-stream-change/src/main/java/org/verapdf/tools/Cli.java
@@ -5,6 +5,7 @@
import org.apache.pdfbox.pdmodel.PDDocument;
import java.io.*;
+import java.util.Map;
/**
* @author Maxim Plushchov
@@ -25,16 +26,26 @@ public static void main(String[] args) throws Exception {
String textFileName = args[2];
Integer objectNumber = Integer.decode(args[3]);
PDDocument document = PDDocument.load(new File(pdfFileName));
- COSObject object = document.getDocument().getObjectFromPool(new COSObjectKey(objectNumber, 0));
+ COSObjectKey key = new COSObjectKey(objectNumber, 0);
+ COSObject object = document.getDocument().getObjectFromPool(key);
if (object == null) {
System.out.println("Object with number " + objectNumber + " not found.");
return;
}
COSBase base = object.getObject();
- if (!(base instanceof COSStream)) {
+ if (!(base instanceof COSDictionary)) {
System.out.println("Object with number " + objectNumber + " not a stream.");
return;
}
+ if (!(base instanceof COSStream)) {
+ COSStream newBase = new COSStream();
+ for (Map.Entry entry : ((COSDictionary)base).entrySet()) {
+ newBase.setItem(entry.getKey(), entry.getValue());
+ }
+ base = newBase;
+ object.setObject(base);
+ System.out.println("Stream added to dictionary " + objectNumber + ".");
+ }
COSStream stream = (COSStream)base;
if (READ.equals(args[0])) {
try (InputStream in = stream.createInputStream(); OutputStream out = new FileOutputStream(textFileName)) {
diff --git a/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyGenerator.java b/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyGenerator.java
index 8ff582c..e0c152d 100644
--- a/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyGenerator.java
+++ b/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyGenerator.java
@@ -4,7 +4,7 @@
import org.verapdf.core.VeraPDFException;
import org.verapdf.metadata.fixer.FixerFactory;
import org.verapdf.metadata.fixer.MetadataFixerConfig;
-import org.verapdf.pdfa.VeraGreenfieldFoundryProvider;
+import org.verapdf.gf.foundry.VeraGreenfieldFoundryProvider;
import org.verapdf.pdfa.flavours.PDFAFlavour;
import org.verapdf.pdfa.validation.profiles.Profiles;
import org.verapdf.pdfa.validation.profiles.ValidationProfile;
@@ -26,10 +26,13 @@
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.EnumSet;
+import java.util.LinkedList;
import java.util.List;
import java.util.logging.Level;
import java.util.logging.Logger;
+import static java.util.stream.Collectors.toList;
+
public class PolicyGenerator {
private static final String HELP = "[options] \n Options:";
private static final Logger logger = Logger.getLogger(PolicyGenerator.class.getCanonicalName());
@@ -59,31 +62,33 @@ public static void main(String[] args) {
try {
PolicyGenerator generator = new PolicyGenerator();
+ if (commandLine.getArgs().length < 1) {
+ formatter.printHelp(HELP, options);
+ return;
+ }
if (commandLine.hasOption("n")) {
generator.isLogsEnabled = false;
}
- if (commandLine.hasOption("p")) {
- String profilePath = commandLine.getOptionValue("profile");
- if (profilePath != null) {
- try (InputStream is = new FileInputStream(Paths.get(profilePath).toFile())) {
- generator.customProfile = Profiles.profileFromXml(is);
- } catch (JAXBException | FileNotFoundException e) {
- generator.customProfile = null;
- logger.log(Level.WARNING, "Error while getting profile from xml file. The profile will be selected automatically");
- } catch (IOException e) {
- e.printStackTrace();
+ generator.fileName = String.join(" ", commandLine.getArgs());
+ if (commandLine.hasOption("v")) {
+ generator.validate(commandLine.getOptionValue("v"), commandLine.getOptionValue("profile"));
+ } else {
+ if (commandLine.hasOption("p")) {
+ String profilePath = commandLine.getOptionValue("profile");
+ if (profilePath != null) {
+ try (InputStream is = new FileInputStream(Paths.get(profilePath).toFile())) {
+ generator.customProfile = Profiles.profileFromXml(is);
+ } catch (JAXBException | FileNotFoundException e) {
+ generator.customProfile = null;
+ logger.log(Level.WARNING, "Error while getting profile from xml file. The profile will be selected automatically");
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
}
}
+ generator.validate();
}
- if (commandLine.getArgs().length < 1) {
- formatter.printHelp(HELP, options);
- return;
- }
- generator.fileName = String.join(" ", commandLine.getArgs());
-
- generator.validate();
generator.generate();
-
} catch (IOException e) {
e.printStackTrace();
}
@@ -97,18 +102,61 @@ private static Options defineOptions() {
Option profile = new Option("p", "profile", true, "Specifies path to custom profile");
profile.setRequired(false);
options.addOption(profile);
+ Option verapdfPath = new Option("v", "verapdf_path", true, "path to verapdf");
+ verapdfPath.setRequired(false);
+ options.addOption(verapdfPath);
return options;
}
+ private void validate(String verapdfPath, String profilePath) throws IOException {
+ List command = new LinkedList<>();
+ List veraPDFParameters = new LinkedList<>();
+ if (isLogsEnabled) {
+ veraPDFParameters.add("--addlogs");
+ }
+ if (profilePath != null) {
+ veraPDFParameters.add("--profile");
+ veraPDFParameters.add(profilePath);
+ }
+
+ File tempMrrFile = File.createTempFile("veraPDF", ".mrr");
+ tempMrrFile.deleteOnExit();
+ veraPDFParameters.add("1>" + tempMrrFile.getAbsolutePath());
+ command.add(verapdfPath);
+ command.addAll(veraPDFParameters);
+ command.add(fileName);
+
+ command = command.stream().map(parameter -> {
+ if (parameter.isEmpty()) {
+ return "\"\"";
+ }
+ return parameter;
+ }).collect(toList());
+
+ try {
+ ProcessBuilder pb = new ProcessBuilder();
+ pb.command(command);
+ pb.redirectError(ProcessBuilder.Redirect.INHERIT);
+
+ Process process = pb.start();
+ process.waitFor();
+ report = new FileInputStream(tempMrrFile);
+ } catch (IOException | InterruptedException exception) {
+ exception.printStackTrace();
+ }
+ }
+
private void validate() throws IOException {
- MetadataFixerConfig fixConf = FixerFactory.configFromValues("test", true);
+ MetadataFixerConfig fixConf = FixerFactory.configFromValues("test");
ProcessorConfig processorConfig = this.customProfile == null
? ProcessorFactory.fromValues(
- ValidatorFactory.createConfig(PDFAFlavour.NO_FLAVOUR, PDFAFlavour.PDFA_1_B, true, 0, false, isLogsEnabled, Level.WARNING, BaseValidator.DEFAULT_MAX_NUMBER_OF_DISPLAYED_FAILED_CHECKS),
+ ValidatorFactory.createConfig(PDFAFlavour.NO_FLAVOUR, PDFAFlavour.PDFA_1_B, true,
+ 0, false, isLogsEnabled, Level.WARNING, BaseValidator.DEFAULT_MAX_NUMBER_OF_DISPLAYED_FAILED_CHECKS, false, "", false, false),
null, null, fixConf, EnumSet.of(TaskType.VALIDATE), (String) null)
- : ProcessorFactory.fromValues(
- ValidatorFactory.createConfig(PDFAFlavour.NO_FLAVOUR, PDFAFlavour.NO_FLAVOUR, true, 0, false, isLogsEnabled, Level.WARNING, BaseValidator.DEFAULT_MAX_NUMBER_OF_DISPLAYED_FAILED_CHECKS),
+ : ProcessorFactory.fromValues(ValidatorFactory.createConfig(PDFAFlavour.NO_FLAVOUR,
+ PDFAFlavour.NO_FLAVOUR, true, 0, false, isLogsEnabled, Level.WARNING,
+ BaseValidator.DEFAULT_MAX_NUMBER_OF_DISPLAYED_FAILED_CHECKS, false, "", false, false),
null, null, fixConf, EnumSet.of(TaskType.VALIDATE), this.customProfile, null);
BatchProcessor processor = ProcessorFactory.fileBatchProcessor(processorConfig);
@@ -259,7 +307,8 @@ private void appendLogs() {
String occurrencesToBeReplaced = node.getAttributes().getNamedItem("occurrences").getNodeValue();
String levelToBeReplaced = node.getAttributes().getNamedItem("level").getNodeValue();
content.append(PolicyHelper.LOG
- .replace("{logToBeReplaced}", logToBeReplaced.replace("'", "'"))
+ .replace("{logToBeReplaced}", logToBeReplaced.replace("'", "'")
+ .replace(shortFilePath, ".pdf"))
.replace("{occurrencesToBeReplaced}", occurrencesToBeReplaced)
.replace("{levelToBeReplaced}", levelToBeReplaced));
diff --git a/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyHelper.java b/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyHelper.java
index 355fddf..e9840f8 100644
--- a/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyHelper.java
+++ b/policy-generator/src/main/java/org/verapdf/tools/policy/generator/PolicyHelper.java
@@ -10,7 +10,7 @@ public class PolicyHelper {
" \n" +
" \n" +
"\n" +
- " name = \"Checking the validationReport: profile\"\n" +
+ " \n" +
" \n" +
" Failed check, Expected: isCompliant=true\n" +
" \n" +
@@ -25,13 +25,13 @@ public class PolicyHelper {
" \n" +
"\n" +
"\n" +
- " name = \"Checking the validationReport: document is not compliant\"\n" +
+ " \n" +
" \n" +
" Failed check, Expected: isCompliant=false\n" +
" \n" +
" \n" +
"\n" +
- " name = \"Checking the validationReport: rules\"\n" +
+ " \n" +
" \n" +
" Failed check, Expected: {failedRulesToBeReplaced}\t\n" +
" \n" +
@@ -47,18 +47,22 @@ public class PolicyHelper {
"\n" +
" \n";
public static final String LOGS_REPORT = "\n" +
- " name = \"Checking the logs\"\n" +
+ " \n" +
+ " \n" +
+ " Failed check, Expected: contains logs\n" +
+ " \n" +
+ "\n" +
" \n" +
" Failed check, Expected: {logsCountToBeReplaced}\t\n" +
" \n";
- public static final String NO_LOGS = "\n name = \"Checking for the absence of logs\"\n" +
+ public static final String NO_LOGS = "\n \n" +
" \n" +
" Failed check, Expected: no logs\n" +
" \n";
public static final String LOGS = "\n" +
" \n" +
" \n" +
" \n" +
"\n" +
- " name = \"Checking the taskResult\"\n" +
- " \n" +
+ " \n" +
+ " \n" +
" \n" +
" Failed check, Expected Error: {exceptionToBeReplaced}\n" +
" \n" +
" \n" +
" \n" +
"\n" +
- " name = \"Checking the batchSummary\"\n" +
+ " \n" +
" \n" +
" \n" +
diff --git a/pom.xml b/pom.xml
index 1e7b341..91b4b2a 100644
--- a/pom.xml
+++ b/pom.xml
@@ -7,7 +7,7 @@
verapdf-parent
org.verapdf
- 1.21.1
+ 1.25.1
org.verapdf
@@ -15,7 +15,11 @@
pom
1.0-SNAPSHOT
+ corpus-wiki-generation
+ fix-metadata
generation-json-from-profile
+ jpeg-files-generation
+ outlines-editor
pdf-decode-fonts
pdf-decode-streams
pdf-deprecated-finder
@@ -23,12 +27,14 @@
pdf-stream-dump
policy-generator
preforma-classification
+ profiles-wiki-generation
tagged-structure-merger
+ verapdf-examples
veraPDF-parsers-performance
- [1.21.0,1.22.0-RC)
+ [1.25.0,1.26.0-RC)
diff --git a/preforma-classification/src/main/java/org/verapdf/tools/classification/CliProcessor.java b/preforma-classification/src/main/java/org/verapdf/tools/classification/CliProcessor.java
index 3c222d1..ba505cb 100644
--- a/preforma-classification/src/main/java/org/verapdf/tools/classification/CliProcessor.java
+++ b/preforma-classification/src/main/java/org/verapdf/tools/classification/CliProcessor.java
@@ -4,6 +4,7 @@
import javax.xml.transform.TransformerException;
import java.io.*;
+import java.nio.charset.StandardCharsets;
import java.util.*;
/**
@@ -99,7 +100,6 @@ private void processFiles(final List toProcess) throws FileNotFoundExcepti
System.out.println(out.getAbsolutePath());
os = new FileOutputStream(out);
}
- XsltTransformer.transform(source, xsltIS,
- os, arguments);
+ XsltTransformer.transform(source, xsltIS, new PrintWriter(new OutputStreamWriter(os, StandardCharsets.UTF_8)), arguments);
}
}
diff --git a/profiles-wiki-generation/pom.xml b/profiles-wiki-generation/pom.xml
new file mode 100644
index 0000000..ee8b498
--- /dev/null
+++ b/profiles-wiki-generation/pom.xml
@@ -0,0 +1,29 @@
+
+
+
+ verapdf-tools
+ org.verapdf
+ 1.0-SNAPSHOT
+
+ 4.0.0
+
+ profiles-wiki-generation
+
+
+ 11
+ 11
+
+
+
+
+
+ org.verapdf
+ core
+ ${verapdf.version}
+
+
+
+
+
diff --git a/profiles-wiki-generation/src/main/java/ProfilesWikiGenerator.java b/profiles-wiki-generation/src/main/java/ProfilesWikiGenerator.java
new file mode 100644
index 0000000..95d8639
--- /dev/null
+++ b/profiles-wiki-generation/src/main/java/ProfilesWikiGenerator.java
@@ -0,0 +1,88 @@
+import org.verapdf.pdfa.validation.profiles.Profiles;
+import org.verapdf.pdfa.validation.profiles.Reference;
+import org.verapdf.pdfa.validation.profiles.Rule;
+import org.verapdf.pdfa.validation.profiles.ValidationProfile;
+
+import java.io.*;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.SortedSet;
+import java.util.TreeSet;
+
+public class ProfilesWikiGenerator {
+
+ public static String inputFileName = "PDFA-4.xml";
+ public static String pdfa1_flavour = "PDF/A-1";
+ public static String pdfa2_flavour = "PDF/A-2";
+ public static String pdfua1_flavour = "PDF/UA-1";
+ public static String pdfa4_flavour = "PDF/A-4";
+ public static String flavour = pdfa4_flavour;
+ public static String outputFileName = "wiki_" + flavour.replace("/","") + ".md";
+
+ public static void main(String[] args) {
+ try (InputStream inputStream = Files.newInputStream(Paths.get(inputFileName));
+ PrintWriter out = new PrintWriter(outputFileName)) {
+ ValidationProfile profile = Profiles.profileFromXml(inputStream);
+ SortedSet rules = new TreeSet<>(new Profiles.RuleComparator());
+ rules.addAll(profile.getRules());
+ out.println("# " + flavour + " validation rules");
+ for (Rule rule : rules) {
+ out.println("## Rule " + rule.getRuleId().getClause() + "-" + rule.getRuleId().getTestNumber());
+ out.println();
+ out.println("### Requirement");
+ out.println();
+ String description = rule.getDescription().replace(" (*) ", "*\n\n>- *");
+ out.println(">*" + description + "*");
+ out.println();
+ out.println("### Error details");
+ out.println();
+ out.println(rule.getError().getMessage());
+ out.println();
+ out.println("* Object type: `" + rule.getObject() + "`");
+ out.println("* Test condition: `" + rule.getTest() + "`");
+ out.println("* Specification: " + getSpecification());
+ String levels = getLevels();
+ if (levels != null) {
+ out.println("* Levels: " + levels);
+ }
+ if (!rule.getReferences().isEmpty()) {
+ out.println("* Additional references:");
+ for (Reference reference : rule.getReferences()) {
+ if (reference.getClause().isEmpty()) {
+ out.println(" * " + reference.getSpecification());
+ } else {
+ out.println(" * " + reference.getSpecification() + ", " + reference.getClause());
+ }
+ }
+ }
+ out.println();
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+ }
+
+ private static String getLevels() {
+ if (pdfa4_flavour.equals(flavour)) {
+ return "4, 4E, 4F";
+ } else if (pdfa1_flavour.equals(flavour)) {
+ return "A, B";
+ } else if (pdfa2_flavour.equals(flavour)) {
+ return "A, B, E";
+ }
+ return null;
+ }
+
+ private static String getSpecification() {
+ if (pdfa4_flavour.equals(flavour)) {
+ return "ISO 19005-4:2020";
+ } else if (pdfa1_flavour.equals(flavour)) {
+ return "ISO 19005-1:2005";
+ } else if (pdfa2_flavour.equals(flavour)) {
+ return "ISO 19005-2:2011, ISO 19005-3:2012";
+ } else if (pdfua1_flavour.equals(flavour)) {
+ return "ISO 14289-1:2014";
+ }
+ return null;
+ }
+}
diff --git a/veraPDF-parsers-performance/pom.xml b/veraPDF-parsers-performance/pom.xml
index 2840ca7..6b7c956 100644
--- a/veraPDF-parsers-performance/pom.xml
+++ b/veraPDF-parsers-performance/pom.xml
@@ -54,12 +54,6 @@
jcommander
-
- log4j
- log4j
- 1.2.17
-
-
junit
junit
diff --git a/verapdf-examples/pom.xml b/verapdf-examples/pom.xml
new file mode 100644
index 0000000..031d7ef
--- /dev/null
+++ b/verapdf-examples/pom.xml
@@ -0,0 +1,25 @@
+
+
+
+ verapdf-tools
+ org.verapdf
+ 1.0-SNAPSHOT
+
+ 4.0.0
+
+ org.verapdf
+ verapdf-example
+ 1.0-SNAPSHOT
+
+
+
+ org.verapdf.apps
+ greenfield-apps
+ ${verapdf.version}
+
+
+
+
+
diff --git a/verapdf-examples/src/main/java/IOTest.java b/verapdf-examples/src/main/java/IOTest.java
new file mode 100644
index 0000000..b2dfc4d
--- /dev/null
+++ b/verapdf-examples/src/main/java/IOTest.java
@@ -0,0 +1,62 @@
+import org.verapdf.core.VeraPDFException;
+import org.verapdf.features.FeatureExtractorConfig;
+import org.verapdf.features.FeatureFactory;
+import org.verapdf.gf.foundry.VeraGreenfieldFoundryProvider;
+import org.verapdf.metadata.fixer.FixerFactory;
+import org.verapdf.metadata.fixer.MetadataFixerConfig;
+import org.verapdf.pdfa.validation.validators.ValidatorConfig;
+import org.verapdf.pdfa.validation.validators.ValidatorFactory;
+import org.verapdf.processor.*;
+import org.verapdf.processor.plugins.PluginsCollectionConfig;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.ArrayList;
+import java.util.EnumSet;
+import java.util.List;
+
+// example from https://docs.verapdf.org/develop/processor/ (veraPDF.github.io\develop\processor\index.md)
+public class IOTest {
+
+ public static void main(String[] args) {
+ // Foundry initialising. Can be changed into PDFBox based one
+ VeraGreenfieldFoundryProvider.initialise();
+ // Default validator config
+ ValidatorConfig validatorConfig = ValidatorFactory.defaultConfig();
+ // or it is possible to select the needed parameters using ValidatorConfigBuilder, for example flavour
+ // ValidatorConfig validatorConfig = new ValidatorConfigBuilder().flavour(PDFAFlavour.PDFA_4).build();
+ FormatOption format = FormatOption.MRR;
+ //create builder
+ //add VeraAppConfig
+ // Default features config
+ FeatureExtractorConfig featureConfig = FeatureFactory.defaultConfig();
+ // Default plugins config
+ PluginsCollectionConfig pluginsConfig = PluginsCollectionConfig.defaultConfig();
+ // Default fixer config
+ MetadataFixerConfig fixerConfig = FixerFactory.defaultConfig();
+ // Tasks configuring
+ EnumSet tasks = EnumSet.noneOf(TaskType.class);
+ tasks.add(TaskType.VALIDATE);
+ tasks.add(TaskType.EXTRACT_FEATURES);
+ tasks.add(TaskType.FIX_METADATA);
+ // Creating processor config
+ ProcessorConfig processorConfig = ProcessorFactory.fromValues(validatorConfig, featureConfig, pluginsConfig, fixerConfig, tasks);
+ // Creating processor and output stream. In this example output stream is System.out
+ try (BatchProcessor processor = ProcessorFactory.fileBatchProcessor(processorConfig);
+ OutputStream reportStream = System.out) {
+ // Generating list of files for processing
+ List files = new ArrayList<>();
+ files.add(new File("fail.pdf"));
+ // starting the processor
+ processor.process(files, ProcessorFactory.getHandler(format, true, reportStream,
+ processorConfig.getValidatorConfig().isRecordPasses()));
+ } catch (VeraPDFException e) {
+ System.err.println("Exception raised while processing batch");
+ e.printStackTrace();
+ } catch (IOException excep) {
+ System.err.println("Exception raised closing MRR temp file.");
+ excep.printStackTrace();
+ }
+ }
+}
diff --git a/verapdf-examples/src/main/java/IOTest2.java b/verapdf-examples/src/main/java/IOTest2.java
new file mode 100644
index 0000000..7758594
--- /dev/null
+++ b/verapdf-examples/src/main/java/IOTest2.java
@@ -0,0 +1,32 @@
+import org.verapdf.core.EncryptedPdfException;
+import org.verapdf.core.ModelParsingException;
+import org.verapdf.core.ValidationException;
+import org.verapdf.gf.foundry.VeraGreenfieldFoundryProvider;
+import org.verapdf.pdfa.Foundries;
+import org.verapdf.pdfa.PDFAParser;
+import org.verapdf.pdfa.PDFAValidator;
+import org.verapdf.pdfa.results.ValidationResult;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+
+// example from https://docs.verapdf.org/develop/ (veraPDF.github.io\develop\index.md)
+public class IOTest2 {
+
+
+ public static void main(String[] args) {
+ VeraGreenfieldFoundryProvider.initialise();
+ try (PDFAParser parser = Foundries.defaultInstance().createParser(new FileInputStream("mydoc.pdf"))) {
+ PDFAValidator validator = Foundries.defaultInstance().createValidator(parser.getFlavour(), false);
+ ValidationResult result = validator.validate(parser);
+ if (result.isCompliant()) {
+ // File is a valid PDF/A 1b
+ } else {
+ // it isn't
+ }
+ } catch (IOException | ValidationException | ModelParsingException | EncryptedPdfException exception) {
+ // Exception during validation
+ }
+ }
+
+}
diff --git a/verapdf-examples/src/main/java/IOTest3.java b/verapdf-examples/src/main/java/IOTest3.java
new file mode 100644
index 0000000..97732d9
--- /dev/null
+++ b/verapdf-examples/src/main/java/IOTest3.java
@@ -0,0 +1,33 @@
+import org.verapdf.core.EncryptedPdfException;
+import org.verapdf.core.ModelParsingException;
+import org.verapdf.core.ValidationException;
+import org.verapdf.gf.foundry.VeraGreenfieldFoundryProvider;
+import org.verapdf.pdfa.Foundries;
+import org.verapdf.pdfa.PDFAParser;
+import org.verapdf.pdfa.PDFAValidator;
+import org.verapdf.pdfa.flavours.PDFAFlavour;
+import org.verapdf.pdfa.results.ValidationResult;
+
+import java.io.FileInputStream;
+import java.io.IOException;
+
+// example from https://docs.verapdf.org/develop/ (veraPDF.github.io\develop\index.md)
+public class IOTest3 {
+
+ public static void main(String[] args) {
+ VeraGreenfieldFoundryProvider.initialise();
+ PDFAFlavour flavour = PDFAFlavour.fromString("1b");
+ try (PDFAParser parser = Foundries.defaultInstance().createParser(new FileInputStream("mydoc.pdf"), flavour)) {
+ PDFAValidator validator = Foundries.defaultInstance().createValidator(flavour, false);
+ ValidationResult result = validator.validate(parser);
+ if (result.isCompliant()) {
+ // File is a valid PDF/A 1b
+ } else {
+ // it isn't
+ }
+ } catch (IOException | ValidationException | ModelParsingException | EncryptedPdfException exception) {
+ // Exception during validation
+ }
+ }
+
+}
diff --git a/verapdf-examples/src/main/java/PluginExample.java b/verapdf-examples/src/main/java/PluginExample.java
new file mode 100644
index 0000000..698fe94
--- /dev/null
+++ b/verapdf-examples/src/main/java/PluginExample.java
@@ -0,0 +1,29 @@
+import java.util.ArrayList;
+import java.util.List;
+import java.util.logging.Level;
+import java.util.logging.Logger;
+// and from VeraPDF:
+import org.verapdf.core.FeatureParsingException;
+import org.verapdf.features.AbstractEmbeddedFileFeaturesExtractor;
+import org.verapdf.features.EmbeddedFileFeaturesData;
+import org.verapdf.features.tools.FeatureTreeNode;
+
+// example from https://docs.verapdf.org/plugins/ (veraPDF.github.io\plugins\index.md)
+public class PluginExample extends AbstractEmbeddedFileFeaturesExtractor {
+
+ private static final Logger LOGGER = Logger.getLogger(PluginExample.class.getCanonicalName());
+
+ @Override
+ public List getEmbeddedFileFeatures(EmbeddedFileFeaturesData embeddedFileFeaturesData) {
+ List res = new ArrayList<>();
+ try {
+ FeatureTreeNode node = FeatureTreeNode.createRootNode("Hello");
+ node.setValue("World");
+ res.add(node);
+ } catch (FeatureParsingException e) {
+ LOGGER.log(Level.SEVERE, e.getMessage(), e);
+ }
+ return res;
+ }
+
+}