Skip to content

Commit

Permalink
Merge pull request #1532 from Tush-r/lucius-fix
Browse files Browse the repository at this point in the history
Fixed LusciousRipper.
  • Loading branch information
cyian-1756 authored Jan 17, 2020
2 parents ff1ffb7 + 273f698 commit 414310a
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
public class LusciousRipper extends AbstractHTMLRipper {
private static final int RETRY_COUNT = 5; // Keeping it high for read timeout exception.

private Pattern p = Pattern.compile("^https?://(?:members.)?luscious\\.net/albums/([-_.0-9a-zA-Z]+).*$");
private static final Pattern P = Pattern.compile("^https?:\\/\\/(?:members\\.|old\\.|www\\.)?luscious.net\\/albums\\/([-_.0-9a-zA-Z]+)\\/?");
private DownloadThreadPool lusciousThreadPool = new DownloadThreadPool("lusciousThreadPool");

public LusciousRipper(URL url) throws IOException {
Expand Down Expand Up @@ -69,7 +69,7 @@ public Document getNextPage(Document doc) throws IOException {

@Override
public String getGID(URL url) throws MalformedURLException {
Matcher m = p.matcher(url.toExternalForm());
Matcher m = P.matcher(url.toExternalForm());
if (m.matches()) {
return m.group(1);
}
Expand All @@ -87,6 +87,40 @@ public DownloadThreadPool getThreadPool() {
return lusciousThreadPool;
}

@Override
public URL sanitizeURL(URL url) throws MalformedURLException {
// Sanitizes the url removing GET parameters and convert to old api url.
// "https://old.luscious.net/albums/albumname"
try {
Matcher m = P.matcher(url.toString());
if (m.matches()) {
String sanitizedUrl = m.group();
sanitizedUrl = sanitizedUrl.replaceFirst(
"^https?:\\/\\/(?:members\\.|old\\.|www\\.)?luscious.net",
"https://old.luscious.net");
return new URL(sanitizedUrl);
}

throw new Exception("ERROR: Unable to sanitize url.");
} catch (Exception e) {
LOGGER.info("Error sanitizing the url.");
LOGGER.error(e);
return super.sanitizeURL(url);
}
}

@Override
public String normalizeUrl(String url) {
try {
return url.toString().replaceFirst(
"^https?:\\/\\/(?:members\\.|old\\.)?luscious.net", "https://www.luscious.net");
} catch (Exception e) {
LOGGER.info("Error normalizing the url.");
LOGGER.error(e);
return super.normalizeUrl(url);
}
}

public class LusciousDownloadThread extends Thread {
private URL url;
private int index;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,22 @@
import org.junit.jupiter.api.Test;

public class LusciousRipperTest extends RippersTest {
@Test @Disabled("Flaky in the CI")
@Test
public void testPahealRipper() throws IOException {
// a photo set
LusciousRipper ripper = new LusciousRipper(
new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/"));
testRipper(ripper);
}

@Test
public void testGetGID() throws IOException {
URL url = new URL("https://luscious.net/albums/h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609/");
LusciousRipper ripper = new LusciousRipper(url);
assertEquals("h-na-alice-wa-suki-desu-ka-do-you-like-alice-when_321609", ripper.getGID(url));
}
@Test @Disabled("Flaky in the CI")

@Test
public void testGetNextPage() throws IOException {
URL multiPageAlbumUrl = new URL("https://luscious.net/albums/women-of-color_58/");
LusciousRipper multiPageRipper = new LusciousRipper(multiPageAlbumUrl);
Expand Down

0 comments on commit 414310a

Please sign in to comment.