gradio / node_modules /tldts-core /src /extract-hostname.ts
reisarod's picture
Upload folder using huggingface_hub
5fae594 verified
/**
* @param url - URL we want to extract a hostname from.
* @param urlIsValidHostname - hint from caller; true if `url` is already a valid hostname.
*/
export default function extractHostname(
url: string,
urlIsValidHostname: boolean,
): string | null {
let start = 0;
let end: number = url.length;
let hasUpper = false;
// If url is not already a valid hostname, then try to extract hostname.
if (!urlIsValidHostname) {
// Special handling of data URLs
if (url.startsWith('data:')) {
return null;
}
// Trim leading spaces
while (start < url.length && url.charCodeAt(start) <= 32) {
start += 1;
}
// Trim trailing spaces
while (end > start + 1 && url.charCodeAt(end - 1) <= 32) {
end -= 1;
}
// Skip scheme.
if (
url.charCodeAt(start) === 47 /* '/' */ &&
url.charCodeAt(start + 1) === 47 /* '/' */
) {
start += 2;
} else {
const indexOfProtocol = url.indexOf(':/', start);
if (indexOfProtocol !== -1) {
// Implement fast-path for common protocols. We expect most protocols
// should be one of these 4 and thus we will not need to perform the
// more expansive validity check most of the time.
const protocolSize = indexOfProtocol - start;
const c0 = url.charCodeAt(start);
const c1 = url.charCodeAt(start + 1);
const c2 = url.charCodeAt(start + 2);
const c3 = url.charCodeAt(start + 3);
const c4 = url.charCodeAt(start + 4);
if (
protocolSize === 5 &&
c0 === 104 /* 'h' */ &&
c1 === 116 /* 't' */ &&
c2 === 116 /* 't' */ &&
c3 === 112 /* 'p' */ &&
c4 === 115 /* 's' */
) {
// https
} else if (
protocolSize === 4 &&
c0 === 104 /* 'h' */ &&
c1 === 116 /* 't' */ &&
c2 === 116 /* 't' */ &&
c3 === 112 /* 'p' */
) {
// http
} else if (
protocolSize === 3 &&
c0 === 119 /* 'w' */ &&
c1 === 115 /* 's' */ &&
c2 === 115 /* 's' */
) {
// wss
} else if (
protocolSize === 2 &&
c0 === 119 /* 'w' */ &&
c1 === 115 /* 's' */
) {
// ws
} else {
// Check that scheme is valid
for (let i = start; i < indexOfProtocol; i += 1) {
const lowerCaseCode = url.charCodeAt(i) | 32;
if (
!(
(
(lowerCaseCode >= 97 && lowerCaseCode <= 122) || // [a, z]
(lowerCaseCode >= 48 && lowerCaseCode <= 57) || // [0, 9]
lowerCaseCode === 46 || // '.'
lowerCaseCode === 45 || // '-'
lowerCaseCode === 43
) // '+'
)
) {
return null;
}
}
}
// Skip 0, 1 or more '/' after ':/'
start = indexOfProtocol + 2;
while (url.charCodeAt(start) === 47 /* '/' */) {
start += 1;
}
}
}
// Detect first occurrence of '/', '?' or '#'. We also keep track of the
// last occurrence of '@', ']' or ':' to speed-up subsequent parsing of
// (respectively), identifier, ipv6 or port.
let indexOfIdentifier = -1;
let indexOfClosingBracket = -1;
let indexOfPort = -1;
for (let i = start; i < end; i += 1) {
const code: number = url.charCodeAt(i);
if (
code === 35 || // '#'
code === 47 || // '/'
code === 63 // '?'
) {
end = i;
break;
} else if (code === 64) {
// '@'
indexOfIdentifier = i;
} else if (code === 93) {
// ']'
indexOfClosingBracket = i;
} else if (code === 58) {
// ':'
indexOfPort = i;
} else if (code >= 65 && code <= 90) {
hasUpper = true;
}
}
// Detect identifier: '@'
if (
indexOfIdentifier !== -1 &&
indexOfIdentifier > start &&
indexOfIdentifier < end
) {
start = indexOfIdentifier + 1;
}
// Handle ipv6 addresses
if (url.charCodeAt(start) === 91 /* '[' */) {
if (indexOfClosingBracket !== -1) {
return url.slice(start + 1, indexOfClosingBracket).toLowerCase();
}
return null;
} else if (indexOfPort !== -1 && indexOfPort > start && indexOfPort < end) {
// Detect port: ':'
end = indexOfPort;
}
}
// Trim trailing dots
while (end > start + 1 && url.charCodeAt(end - 1) === 46 /* '.' */) {
end -= 1;
}
const hostname: string =
start !== 0 || end !== url.length ? url.slice(start, end) : url;
if (hasUpper) {
return hostname.toLowerCase();
}
return hostname;
}