-
-
Notifications
You must be signed in to change notification settings - Fork 15
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Feat(extractor): Export Perplexity Pages (#120)
* feat: extract Perplexity Pages paragraphs * fix: removing useless rule * feat: exporting Pages title * fix: too much newlines * Perplexity refactoring source extraction function * refactor: Perplexity source extraction genericized bug tile list extraction, only modal works) * fix: selectors to get correct titles and not exporting intro sources tiles * feat: extract Perplexity Pages sources * fix: correctly exporting sources (no duplication or missing sources) * docs: jsdoc * fix: extraction isolation (safeExecute) + sleep 10s instead of 100s * refactor: selector and newlines * refactor: cleaning code * fix: correcting conditions * docs: fix jsdoc
- Loading branch information
1 parent
7745296
commit a6033dd
Showing
7 changed files
with
197 additions
and
51 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
import {safeExecute} from "../../../shared/utils/jsShorteners"; | ||
import {interactAndCatch} from "./Perplexity"; | ||
|
||
export async function processMessage(content, format) { | ||
if (!content.hasChildNodes()) | ||
return ''; | ||
|
||
let markdown = ''; | ||
|
||
const title = content.querySelector('h2 > span'); | ||
markdown += title | ||
? `## ${title?.innerText}\n` | ||
: ''; | ||
|
||
const answer = content.querySelector('.flex-col > div > .relative > :first-child, [class="group/section"] .prose'); // first one selects the intro, second one the other article parts | ||
markdown += answer?.innerHTML && answer?.innerHTML !== '' | ||
? format(answer?.innerHTML) + '\n\n' | ||
: ''; | ||
|
||
// Display sources | ||
const src = await safeExecute(await extractSources(content, format)); | ||
if (src && src !== '') | ||
markdown += src + "\n"; | ||
|
||
return markdown; | ||
} | ||
|
||
async function extractSources(content, format) { | ||
const SOURCES_HEADER = "---\n**Sources:**\n"; | ||
let res = SOURCES_HEADER; | ||
|
||
// Open sources modal | ||
res = await interactAndCatch(content, [ | ||
{open: ['div.grid > div.flex:nth-last-of-type(1), .group\\/source'], close: [], selector: 'TODO'}, | ||
], res, format, '[data-testid="close-modal"]'); | ||
|
||
// Don't export header if no sources | ||
return res !== SOURCES_HEADER | ||
? res | ||
: ""; | ||
} |
19 changes: 19 additions & 0 deletions
19
src/scripts/content/extractor/domains/PerplexityPages.json
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
{ | ||
"domainName": "Perplexity Pages", | ||
"contentSelector": "main .mx-auto > div > div > div > div > div.flex-col", | ||
"turndown": { | ||
"init": { | ||
"blankReplacement": "getBlankReplacement_PerplexityPages" | ||
}, | ||
"rules": { | ||
"preserveLineBreaksInPre": { | ||
"filter": "filter_PreserveLineBreaksInPre_Perplexity", | ||
"replacement": "replacement_PreserveLineBreaksInPre_Perplexity" | ||
}, | ||
"formatCitationsInAnswer": { | ||
"filter": "filter_formatCitationsInAnswer_Perplexity", | ||
"replacement": "replacement_formatCitationsInAnswer_Perplexity" | ||
} | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters