Commit 6bc5f6c0 authored by duanjinfei's avatar duanjinfei

fix crawl data

parent ba1ac7ea
...@@ -37,13 +37,30 @@ async function fetchAllData(supabase, table: string, pageSize: number = 1000) { ...@@ -37,13 +37,30 @@ async function fetchAllData(supabase, table: string, pageSize: number = 1000) {
// 缓存机制:减少重复数据库查询 // 缓存机制:减少重复数据库查询
class DatabaseCache { class DatabaseCache {
private categoryCache = new Map<string, { id: string }>(); private categoryCache = new Map<string, { id: string }>();
private appNameIdCache = new Map<number, string>(); private appNameIdCache = new Map<string, number>();
private validColumns = [];
private fileNamesSet;
async init(supabase, table: string) { async init(supabase, table: string) {
const appAllData = await fetchAllData(supabase, table) const appAllData = await fetchAllData(supabase, table)
for (const app of appAllData) { for (const app of appAllData) {
this.appNameIdCache.set(app.app_id, app.name); this.appNameIdCache.set(app.name, app.app_id);
} }
const { data: app, error } = await supabase
.from(table)
.select("*")
.limit(1)
.single();
if (error) throw error;
delete app.id
// 提取列名
this.validColumns = this.getFirstLevelKeys(app)
}
async initFileData(supabase, bucket: string) {
const fileDatas = await this.getAllFiles(supabase, bucket);
// 创建一个 Set 用于快速查找文件名
this.fileNamesSet = new Set(fileDatas.map((file) => file.name));
} }
async getCategoryByPrettyUrl(supabase, pretty_url: string) { async getCategoryByPrettyUrl(supabase, pretty_url: string) {
...@@ -60,7 +77,7 @@ class DatabaseCache { ...@@ -60,7 +77,7 @@ class DatabaseCache {
} }
async checkAppExists(appId: number, appName: string) { async checkAppExists(appId: number, appName: string) {
return this.appNameIdCache.get(appId) === appName; // 通过 this.appNameIdCache 获取缓存值 return this.appNameIdCache.get(appName) === appId; // 通过 this.appNameIdCache 获取缓存值
} }
async batchInsertApps(supabase, apps: any[]) { async batchInsertApps(supabase, apps: any[]) {
...@@ -70,29 +87,11 @@ class DatabaseCache { ...@@ -70,29 +87,11 @@ class DatabaseCache {
if (error) throw error; if (error) throw error;
} }
} cleanAppData(app) {
function getFirstLevelKeys(obj) {
return Object.keys(obj);
}
async function cleanAppData(app, supabase, tableName) {
try { try {
// 一次性获取所有列信息
const { data: app, error } = await supabase
.from(tableName)
.select("*")
.limit(1)
.single();
if (error) throw error;
delete app.id
// 提取列名
const validColumns = getFirstLevelKeys(app)
// 动态清理对象 // 动态清理对象
return Object.keys(app) return Object.keys(app)
.filter(key => validColumns.includes(key)) .filter(key => this.validColumns.includes(key))
.reduce((obj, key) => { .reduce((obj, key) => {
obj.app_id = app.ID; obj.app_id = app.ID;
obj[key] = app[key]; obj[key] = app[key];
...@@ -102,6 +101,129 @@ async function cleanAppData(app, supabase, tableName) { ...@@ -102,6 +101,129 @@ async function cleanAppData(app, supabase, tableName) {
console.error('Error cleaning app data:', error); console.error('Error cleaning app data:', error);
return null; return null;
} }
}
getFirstLevelKeys(obj) {
return Object.keys(obj);
}
async getAllFiles(supabase, bucket) {
let allFiles = [];
let offset = 0;
const limit = 100; // 每次最多获取 100 个文件
try {
while (true) {
// 分页获取文件列表
const { data, error } = await supabase.storage.from(bucket).list("", {
limit,
offset,
});
if (error) {
console.error("获取文件列表失败:", error.message);
break;
}
// 将当前分页的数据追加到总列表中
allFiles = allFiles.concat(data);
// 如果获取的数据不足 limit,说明已经到最后一页
if (data.length < limit) {
break;
}
// 否则继续下一页
offset += limit;
}
return allFiles;
} catch (err) {
console.error("发生错误:", err.message);
return false;
}
}
async uploadIcon(supabase, element) {
if (element.icon.url) {
const iconUrl = element.icon.url.replace(/^\/+/, "");
let imageUrl = `https://ton.app/${iconUrl}`;
const response = await fetch(imageUrl);
if (!response.ok) {
console.error("Failed to fetch image:", response.statusText, imageUrl);
} else {
const blob = await response.blob(); // 将响应数据转为 Blob
const fileName = element.icon.url.split("/").pop();
const file = new File([blob], fileName, { type: blob.type }); // 将 Blob 转为 File 格式
let fileExists = await this.checkIsFileExist(
fileName
);
if (!fileExists) {
await this.uploadFileToStorage(supabase, file, fileName);
}
}
}
}
async uploadImages(supabase, element) {
if (element.images && element.images.length > 0) {
for (const image of element.images) {
if (image.url) {
let imageUrl = `https://ton.app/${image.url}`;
const response = await fetch(imageUrl);
if (!response.ok) {
console.error(
"Failed to fetch image:",
response.statusText,
imageUrl
);
} else {
const blob = await response.blob(); // 将响应数据转为 Blob
const fileName = image.url.split("/").pop();
const file = new File([blob], fileName, { type: blob.type }); // 将 Blob 转为 File 格式
let fileExists = await this.checkIsFileExist(
fileName
);
if (!fileExists) {
await this.uploadFileToStorage(file, fileName);
}
}
}
}
}
}
async uploadFileToStorage(supabase, file, fileName) {
try {
let refix =
"https://jokqrcagutpmvpilhcfq.supabase.co/storage/v1/object/public";
const { data, error } = await supabase.storage
.from("media")
.upload(`${fileName}`, file, {
cacheControl: "120",
contentType: "image/png",
upsert: false,
});
console.log("data:", data);
if (data != null) {
let url = `${refix}/${data.fullPath}`;
console.log("upload file success:", fileName);
return url;
}
if (error != null) {
console.log("upload file error:", fileName);
}
} catch (error) {
console.log("error:", error);
}
}
async checkIsFileExist(fileName) {
// 检查目标文件是否存在
const fileExists = this.fileNamesSet.has(fileName);
if (fileExists) {
console.log(`文件 "${fileName} ${fileExists}" 已存在`);
} else {
console.log(`文件 "${fileName} ${fileExists}" 不存在`);
}
return fileExists;
}
} }
async function getTonAppInfo(supabase) { async function getTonAppInfo(supabase) {
...@@ -141,7 +263,7 @@ async function getTonAppInfo(supabase) { ...@@ -141,7 +263,7 @@ async function getTonAppInfo(supabase) {
// 处理应用 // 处理应用
for (let app of apps) { for (let app of apps) {
// 清理应用数据 // 清理应用数据
const cleanedApp = await cleanAppData(app, supabase, 'app'); const cleanedApp = databaseCache.cleanAppData(app);
if (!cleanedApp) continue; if (!cleanedApp) continue;
// 检查应用是否已存在 // 检查应用是否已存在
...@@ -167,7 +289,12 @@ async function getTonAppInfo(supabase) { ...@@ -167,7 +289,12 @@ async function getTonAppInfo(supabase) {
// 批量插入应用 // 批量插入应用
console.log("await app to insert length", appsToInsert.length); console.log("await app to insert length", appsToInsert.length);
if (appsToInsert.length > 0) { if (appsToInsert.length > 0) {
await databaseache.batchInsertApps(supabase, appsToInsert); await databaseCache.batchInsertApps(supabase, appsToInsert);
await databaseCache.initFileData(supabase, "media");
for (const element of appsToInsert) {
await databaseCache.uploadIcon(supabase, element);
await databaseCache.uploadImages(supabase, element);
}
} }
return appsToInsert.length; return appsToInsert.length;
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment