This is a private note that has been shared.
Convert URLs to md Links | Jihad
Updated 05092025-152551
- Action Directory
- Source Gist
- WTF
- ID:
AE188DDE-78B3-42D7-81B8-0983E8D98D12
- Simplenote Local
- Draft
Description
Convert all Plain URLs in the note to Markdown links with the website actual page title. Including Youtube links.
Source
function extractUrls(text) {
// Match anything that looks like a URL up to the first whitespace
let urlRegex = /(?<!\()https?:\/\/\S+/g;
return text.match(urlRegex);
}
function extractTitleFromHtml(html) {
let match = html.match(/<title>(.*?)<\/title>/i);
return match ? match[1].trim().replace(/\s+/g, " ") : null;
}
function extractYouTubeTitleFromJson(html) {
let match = html.match(/"title":"([^"]+?)","lengthSeconds":/);
return match ? match[1] : null;
}
function extractRedditTitleFromHtml(html) {
// Match the title attribute in shreddit-title tag
let match = html.match(/<shreddit-title[^>]*title="([^"]+)"/);
return match ? match[1] : null;
}
function extractMediumTitleFromHtml(html) {
// Match meta tag with property="og:title" and get its content
let match = html.match(/<meta[^>]*property="og:title"[^>]*content="([^"]+)"/);
return match ? match[1] : null;
}
function processUrls(urls, originalText) {
let http = HTTP.create();
let updatedText = originalText;
for (let url of urls) {
let title = null;
if (url.includes("youtu.be") || url.includes("youtube.com")) {
let resp = http.request({ url: url, method: "GET" });
if (resp.success) {
title = extractYouTubeTitleFromJson(resp.responseText);
}
} else if (url.includes("redd.it") || url.includes("reddit.com")) {
let resp = http.request({ url: url, method: "GET" });
if (resp.success) {
title = extractRedditTitleFromHtml(resp.responseText);
}
} else if (url.includes("medium.com")) {
let resp = http.request({ url: url, method: "GET" });
if (resp.success) {
title = extractMediumTitleFromHtml(resp.responseText);
}
}
// Fallback to normal <title> tag
if (!title) {
let resp = http.request({ url: url, method: "GET" });
if (resp.success) {
title = extractTitleFromHtml(resp.responseText);
}
}
if (!title) title = "Link";
updatedText = updatedText.replace(url, `[${title}](${url})`);
}
return updatedText;
}
function main() {
let urls = extractUrls(draft.content);
if (!urls) {
alert("No Plain URLs found.");
context.cancel();
return;
}
draft.content = processUrls(urls, draft.content);
draft.update();
}
main();
Babylon (ChatGPT)
This JavaScript script is intended to extract URLs from a given text (presumably within the Drafts app), retrieve the page titles for those URLs, and replace the URLs in the text with markdown-style links, using the title as the link text. The script specifically handles titles from YouTube, Reddit, Medium, and general HTML pages. Let’s break down each part of the script.
1.
Extracting URLs:
function extractUrls(text) {
// Match anything that looks like a URL up to the first whitespace
let urlRegex = /(?<!\()https?:\/\/\S+/g;
return text.match(urlRegex);
}
Explanation:
-
Uses a regular expression to find URLs in the given text:
-
The pattern https?://\S+ matches:
-
http or https (https?)
-
://
-
Any non-whitespace character (\S+)
-
-
The pattern (?<!() ensures that URLs within parentheses are not matched (to avoid capturing URLs as part of Markdown links).
-
-
The function returns an array of URLs or null if no match is found.
2.
Extracting Titles from HTML:
General HTML Title:
function extractTitleFromHtml(html) {
let match = html.match(/<title>(.*?)<\/title>/i);
return match ? match[1].trim().replace(/\s+/g, " ") : null;
}
- Uses a regular expression to capture the content inside the <title> tags.
- The i flag makes the match case-insensitive.
- Trims and normalizes whitespace within the title.
YouTube JSON Title:
function extractYouTubeTitleFromJson(html) {
let match = html.match(/"title":"([^"]+?)","lengthSeconds":/);
return match ? match[1] : null;
}
-
Uses a regex pattern to find the YouTube video title from JSON-encoded data on the page.
-
Matches:
-
“title”:" followed by any number of non-quote characters ([^"]+?)
-
“,“lengthSeconds”: to ensure it’s within the expected JSON structure.
-
Reddit Title:
function extractRedditTitleFromHtml(html) {
let match = html.match(/<shreddit-title[^>]*title="([^"]+)"/);
return match ? match[1] : null;
}
- Targets the
tag found on Reddit pages. - Captures the value of the title attribute from the tag.
Medium Title:
function extractMediumTitleFromHtml(html) {
let match = html.match(/<meta[^>]*property="og:title"[^>]*content="([^"]+)"/);
return match ? match[1] : null;
}
- Uses Open Graph metadata (property=“og:title”) to extract the title from Medium articles.
- Finds the content attribute containing the actual title text.
3.
Processing URLs and Fetching Titles:
function processUrls(urls, originalText) {
let http = HTTP.create();
let updatedText = originalText;
for (let url of urls) {
let title = null;
if (url.includes("youtu.be") || url.includes("youtube.com")) {
let resp = http.request({ url: url, method: "GET" });
if (resp.success) {
title = extractYouTubeTitleFromJson(resp.responseText);
}
} else if (url.includes("redd.it") || url.includes("reddit.com")) {
let resp = http.request({ url: url, method: "GET" });
if (resp.success) {
title = extractRedditTitleFromHtml(resp.responseText);
}
} else if (url.includes("medium.com")) {
let resp = http.request({ url: url, method: "GET" });
if (resp.success) {
title = extractMediumTitleFromHtml(resp.responseText);
}
}
// Fallback to normal <title> tag
if (!title) {
let resp = http.request({ url: url, method: "GET" });
if (resp.success) {
title = extractTitleFromHtml(resp.responseText);
}
}
if (!title) title = "Link";
updatedText = updatedText.replace(url, `[${title}](${url})`);
}
return updatedText;
}
Explanation:
-
Creates an HTTP object using the Drafts-specific HTTP.create() method.
-
Loops through each extracted URL and attempts to determine the page title:
-
YouTube URLs: Uses extractYouTubeTitleFromJson.
-
Reddit URLs: Uses extractRedditTitleFromHtml.
-
Medium URLs: Uses extractMediumTitleFromHtml.
-
Fallback: If no specialized title extraction works, it attempts to use the generic <title> tag.
-
-
If no title can be determined, it defaults to “Link”.
-
Replaces the URL in the original text with the markdown link format: title.
4.
Main Function:
function main() {
let urls = extractUrls(draft.content);
if (!urls) {
alert("No Plain URLs found.");
context.cancel();
return;
}
draft.content = processUrls(urls, draft.content);
draft.update();
}
main();
Explanation:
- Attempts to extract URLs from the current draft’s content.
- If no URLs are found, it shows an alert and cancels the operation.
- If URLs are found, it updates the draft’s content with the processed text (URLs replaced by markdown links).
- Finally, it saves the updated content back to the draft.
Contextual Notes:
- The script is designed for use within the Drafts app, which has a specific JavaScript API.
- It uses HTTP.create() to initiate HTTP requests and draft.content to access the text content of the current draft.
- The function draft.update() commits the changes.
Potential Issues:
-
Error Handling:
- The script does not handle network errors or non-200 HTTP responses explicitly.
-
Dynamic Content:
- If a page has dynamically loaded content (like many modern websites), the static HTML obtained via GET may not contain the desired title.
-
Performance:
- Fetching multiple URLs sequentially can be slow, especially if there are many links. Implementing asynchronous requests could improve performance.
-
Title Extraction Robustness:
- The YouTube JSON extraction method may break if YouTube changes its page structure.