Commit 0511b03a authored by duanjinfei's avatar duanjinfei

update crawl-data

parent 76d44e66
......@@ -43,6 +43,7 @@ class DatabaseCache {
async init(supabase, table: string) {
const appAllData = await fetchAllData(supabase, table)
console.log("appAllData:", appAllData.length);
for (const app of appAllData) {
this.appNameIdCache.set(app.name, app.app_id);
}
......@@ -165,7 +166,9 @@ class DatabaseCache {
if (element.images && element.images.length > 0) {
for (const image of element.images) {
if (image.url) {
let imageUrl = `https://ton.app/${image.url}`;
const imageUrlData = image.url.replace(/^\/+/, "");
let imageUrl = `https://ton.app/${imageUrlData}`;
console.log("imageUrl:", imageUrl);
const response = await fetch(imageUrl);
if (!response.ok) {
console.error(
......@@ -181,7 +184,7 @@ class DatabaseCache {
fileName
);
if (!fileExists) {
await this.uploadFileToStorage(file, fileName);
await this.uploadFileToStorage(supabase, file, fileName);
}
}
}
......@@ -207,7 +210,7 @@ class DatabaseCache {
return url;
}
if (error != null) {
console.log("upload file error:", fileName);
console.log("upload file error:", error);
}
} catch (error) {
console.log("error:", error);
......@@ -243,40 +246,59 @@ async function getTonAppInfo(supabase) {
try {
const url = `${baseUrl}${category}.json?category_slug=${category}`;
console.log("handle category:", category);
console.log("url:", url);
const response = await fetch(url);
const data = await response.json();
// 获取分类信息
const categoryInfo = await databaseCache.getCategoryByPrettyUrl(supabase, category);
const {
apps
items
} = data.pageProps.category;
console.log(
"crawl app data length:", apps.length
"crawl app data length:", items.length
);
if (!apps || apps.length == 0) {
if (!items || items.length == 0) {
continue
}
// 处理应用
for (let app of apps) {
// 清理应用数据
const cleanedApp = databaseCache.cleanAppData(app);
if (!cleanedApp) continue;
for (let app of items) {
// 检查应用是否已存在
const isExist = await databaseCache.checkAppExists(cleanedApp.app_id, cleanedApp.name);
console.log(`app_id: ${cleanedApp.app_id}, app_name: ${cleanedApp.name}`, isExist);
const isExist = await databaseCache.checkAppExists(app.id, app.name);
console.log(`app_id: ${app.id}, app_name: ${app.name}`, isExist);
if (!isExist) {
const appDetailUrl = `${baseUrl}${category}/${app.name.toLowerCase()}.json?id=${app.id}`;
const appDetailresponse = await fetch(appDetailUrl);
const appDetailData = await appDetailresponse.json();
console.log("appDetailData:", appDetailData);
const {
iconPath,
screenshotPaths,
links,
languagesCodes,
caption
} = appDetailData.pageProps.app;
const cleanedAppDetail = databaseCache.cleanAppData(app);
// 准备插入的应用数据
const insertApp = {
...cleanedApp,
...cleanedAppDetail,
caption: caption,
app_id: app.id,
category_id: categoryInfo.id
};
insertApp.icon = {
url: iconPath
}
insertApp.images = screenshotPaths.map(path => {
return { url: path };
});
insertApp.languages = (languagesCodes || []).join(" ");
insertApp.link = app.externalLink
insertApp.appPlatforms = links
if (!insertApp.appPlatforms && !insertApp.description && !insertApp.images && insertApp.images == [] && (!insertApp.link || insertApp.link == "https://")) {
continue
} else if (!insertApp.appPlatforms && !insertApp.description && (!insertApp.images || insertApp.images == [])) {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment