Skip to content

Commit

Permalink
make gpt content easier to parse
Browse files Browse the repository at this point in the history
  • Loading branch information
gcharang committed Aug 6, 2024
1 parent 60f2870 commit 064e899
Show file tree
Hide file tree
Showing 11 changed files with 85,810 additions and 95,670 deletions.
61,548 changes: 28,825 additions & 32,723 deletions data-for-gpts/all-content.txt

Large diffs are not rendered by default.

15,974 changes: 7,418 additions & 8,556 deletions data-for-gpts/antara-content.txt

Large diffs are not rendered by default.

84 changes: 25 additions & 59 deletions data-for-gpts/historical-content.txt

Large diffs are not rendered by default.

30,290 changes: 14,499 additions & 15,791 deletions data-for-gpts/komodefi-api/all-api-content.txt

Large diffs are not rendered by default.

12,040 changes: 5,808 additions & 6,232 deletions data-for-gpts/komodefi-api/legacy-api-content.txt

Large diffs are not rendered by default.

8,162 changes: 3,945 additions & 4,217 deletions data-for-gpts/komodefi-api/v20-api-content.txt

Large diffs are not rendered by default.

7,882 changes: 3,731 additions & 4,151 deletions data-for-gpts/komodefi-api/v20-dev-api-content.txt

Large diffs are not rendered by default.

32,463 changes: 15,586 additions & 16,877 deletions data-for-gpts/komodo-defi-framework-content.txt

Large diffs are not rendered by default.

12,536 changes: 5,727 additions & 6,809 deletions data-for-gpts/smart-chains-content.txt

Large diffs are not rendered by default.

373 changes: 129 additions & 244 deletions data-for-gpts/start-here-content.txt

Large diffs are not rendered by default.

128 changes: 117 additions & 11 deletions utils/js/create_data_for_gpts.js
Original file line number Diff line number Diff line change
@@ -1,30 +1,136 @@
import * as fs from "fs";
import * as path from "path";
//import { EXIT, visitParents } from 'unist-util-visit-parents'
import { visit, EXIT } from 'unist-util-visit'
import path from 'path'
import { remark } from 'remark'
import remarkGfm from "remark-gfm";
import remarkMdx from "remark-mdx";


function walkDir(dirPath, callback, ...callbackArgs) {
fs.readdirSync(dirPath).forEach((file) => {
const filePath = path.join(dirPath, file);
async function walkDir(dir, callback, ...callbackArgs) {
let files = fs.readdirSync(dir);

for (const file of files) {
const filePath = path.join(dir, file);
const stat = fs.statSync(filePath);

if (stat.isDirectory()) {
walkDir(filePath, callback, ...callbackArgs);
} else {
callback(filePath, ...callbackArgs);
await walkDir(filePath, callback, ...callbackArgs);
} else if (stat.isFile() && !filePath.toLowerCase().includes(".ds_store")) {
await callback(filePath, ...callbackArgs);
}
});
}
}

function readFileAndAddContentToFIle(filePath, contentHolder) {
async function readFileProcessItAndAddContentToFile(filePath, contentHolder) {
const fileContent = fs.readFileSync(filePath, 'utf8')
contentHolder.content += fileContent;
const processedFileContent = await convertMdxToMd(fileContent, filePath)
contentHolder.content += processedFileContent;
}

const pathsNames = [["", "all"], ["komodo-defi-framework", "komodo-defi-framework"], ["historical", "historical"], ["smart-chains", "smart-chains"], ["antara", "antara"], ["start-here", "start-here"], ["komodo-defi-framework/api", "komodefi-api/all-api"], ["komodo-defi-framework/api/legacy", "komodefi-api/legacy-api"], ["komodo-defi-framework/api/v20", "komodefi-api/v20-api"], ["komodo-defi-framework/api/v20-dev", "komodefi-api/v20-dev-api"]]

for (let index = 0; index < pathsNames.length; index++) {
const element = pathsNames[index];
let contentHolder = { content: "" };
walkDir(`./src/pages/${element[0]}`, readFileAndAddContentToFIle, contentHolder)
await walkDir(`./src/pages/${element[0]}`, readFileProcessItAndAddContentToFile, contentHolder)
fs.writeFileSync(`./data-for-gpts/${element[1]}-content.txt`, contentHolder.content)
}

async function convertMdxToMd(fileContent, filePath) {
try {
const mdContent = await remark()
.use(remarkGfm)
.use(remarkMdx)
.use(() => (tree) => {

visit(tree, 'mdxJsxFlowElement', (node, index, parent) => {

if (node.name === 'BulletPoints') {
const titleProp = node.attributes.find(attr => attr.name === 'title');
const descProp = node.attributes.find(attr => attr.name === 'desc');

const newContent = [];

if (titleProp && titleProp.value) {
newContent.push({
type: 'heading',
depth: 3,
children: [{ type: 'text', value: titleProp.value }]
});
}


if (descProp && descProp.value && descProp.value.type === 'mdxJsxAttributeValueExpression') {
try {
const markdown = convertHtmlToMd(descProp.value.value);
newContent.push({
type: 'paragraph',
children: [{ type: 'text', value: markdown }]
});
} catch (error) {
console.error(error)
throw new Error(`Error parsing descProp in file: ${filePath}`, error);
}
}

if (newContent.length > 0) {
parent.children.splice(index, 1, ...newContent);
return [visit.SKIP, index];
} else {
parent.children.splice(index, 1);
return [visit.SKIP, index];
}
}

if (node.children && node.children.length > 0) {
parent.children.splice(index, 1, ...node.children);
return [visit.SKIP, index];
} else {
parent.children.splice(index, 1);
return [visit.SKIP, index];
}
});

visit(tree, 'mdxJsxTextElement', (node, index, parent) => {
if (node.children && node.children.length > 0) {
parent.children.splice(index, 1, ...node.children);
return [visit.SKIP, index];
} else {
parent.children.splice(index, 1);
return [visit.SKIP, index];
}
});



})
.process(fileContent);

return mdContent
} catch (error) {
if (error) {
throw new Error(`Error in file: ${filePath} \n ${error}`);
};
}
}

function convertHtmlToMd(html) {
// Remove leading/trailing whitespace and newlines
html = html.trim();

// Replace <ul> and </ul> tags
html = html.replace(/<\/?ul>/g, '');

// Replace <li> tags with Markdown list items
html = html.replace(/<li>/g, '- ');

// Remove </li> tags
html = html.replace(/<\/li>/g, '');

// Trim each line and remove empty lines
const lines = html.split('\n').map(line => line.trim()).filter(line => line !== '');

// Join the lines with newline characters
return lines.join('\n');
}

0 comments on commit 064e899

Please sign in to comment.