From 273f698ad4a259b5518f45ef3f3bf43370139e6a Mon Sep 17 00:00:00 2001 From: Tushar Date: Sun, 29 Dec 2019 16:25:14 +0530 Subject: [PATCH] Fixed luscious ripper. --- .../ripme/ripper/rippers/LusciousRipper.java | 38 ++++++++++++++++++- .../ripper/rippers/LusciousRipperTest.java | 6 ++- 2 files changed, 40 insertions(+), 4 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java index e56f8dbcc..7eabfc6f0 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/LusciousRipper.java @@ -19,7 +19,7 @@ public class LusciousRipper extends AbstractHTMLRipper { private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception. - private Pattern p = Pattern.compile("^https?://(?:members.)?luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$"); + private static final Pattern P = Pattern.compile("^https?:\\/\\/(?:members\\.|old\\.|www\\.)?luscious.net\\/albums\\/([-_.0-9a-zA-Z]+)\\/?"); private DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool"); public LusciousRipper(URL url) throws IOException { @@ -69,7 +69,7 @@ public Document getNextPage(Document doc) throws IOException { @Override public String getGID(URL url) throws MalformedURLException { - Matcher m = p.matcher(url.toExternalForm()); + Matcher m = P.matcher(url.toExternalForm()); if (m.matches()) { return m.group(1); } @@ -87,6 +87,40 @@ public DownloadThreadPool getThreadPool() { return lusciousThreadPool; } + @Override + public URL sanitizeURL(URL url) throws MalformedURLException { + // Sanitizes the url removing GET parameters and convert to old api url. + // "https://old.luscious.net/albums/albumname" + try { + Matcher m = P.matcher(url.toString()); + if (m.matches()) { + String sanitizedUrl = m.group(); + sanitizedUrl = sanitizedUrl.replaceFirst( + "^https?:\\/\\/(?:members\\.|old\\.|www\\.)?luscious.net", + "https://old.luscious.net"); + return new URL(sanitizedUrl); + } + + throw new Exception("ERROR: Unable to sanitize url."); + } catch (Exception e) { + LOGGER.info("Error sanitizing the url."); + LOGGER.error(e); + return super.sanitizeURL(url); + } + } + + @Override + public String normalizeUrl(String url) { + try { + return url.toString().replaceFirst( + "^https?:\\/\\/(?:members\\.|old\\.)?luscious.net", "https://www.luscious.net"); + } catch (Exception e) { + LOGGER.info("Error normalizing the url."); + LOGGER.error(e); + return super.normalizeUrl(url); + } + } + public class LusciousDownloadThread extends Thread { private URL url; private int index; diff --git a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/LusciousRipperTest.java b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/LusciousRipperTest.java index 6362dcc6e..c6febd32b 100644 --- a/src/test/java/com/rarchives/ripme/tst/ripper/rippers/LusciousRipperTest.java +++ b/src/test/java/com/rarchives/ripme/tst/ripper/rippers/LusciousRipperTest.java @@ -8,7 +8,7 @@ import org.junit.jupiter.api.Test; public class LusciousRipperTest extends RippersTest { - @Test @Disabled("Flaky in the CI") + @Test public void testPahealRipper() throws IOException { // a photo set LusciousRipper ripper = new LusciousRipper( @@ -16,12 +16,14 @@ public void testPahealRipper() throws IOException { testRipper(ripper); } + @Test public void testGetGID() throws IOException { URL url = new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/"); LusciousRipper ripper = new LusciousRipper(url); assertEquals("h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609", ripper.getGID(url)); } - @Test @Disabled("Flaky in the CI") + + @Test public void testGetNextPage() throws IOException { URL multiPageAlbumUrl = new URL("https://luscious.net/albums/women-of-color_58/"); LusciousRipper multiPageRipper = new LusciousRipper(multiPageAlbumUrl);