Commit ec0e7219 authored by duanjinfei's avatar duanjinfei

add crawl-data function

parent d7bbf06b
// Follow this setup guide to integrate the Deno language server with your editor:
// https://deno.land/manual/getting_started/setup_your_environment
// This enables autocomplete, go to definition, etc.
// Setup type definitions for built-in Supabase Runtime APIs
import { createClient } from 'jsr:@supabase/supabase-js@2'
import "jsr:@supabase/functions-js/edge-runtime.d.ts"
// 分页查询函数
async function fetchAllData(supabase, table: string, pageSize: number = 1000) {
let allData: any[] = [];
let offset = 0;
while (true) {
const { data, error } = await supabase
.from(table)
.select('app_id,name') // 可以根据需要调整字段
.order('created_at', { ascending: true }) // 按照创建时间排序
.range(offset, offset + pageSize - 1); // 分页范围
if (error) {
console.error(`Error fetching data from ${table}:`, error);
throw error;
}
if (data && data.length > 0) {
allData = allData.concat(data); // 合并当前页数据
offset += pageSize; // 移动到下一页
} else {
break; // 如果没有更多数据,则退出循环
}
}
return allData;
}
// 缓存机制:减少重复数据库查询
class DatabaseCache {
private categoryCache = new Map<number, { id: number }>();
private appNameIdCache = new Map<number, string>();
constructor(supabase, table: string) {
const appAllData = await fetchAllData(supabase, table)
for (const app of appAllData) {
appNameIdCache[app.app_id] = app.name
}
}
async getCategoryByPrettyUrl(supabase, pretty_url: string) {
if (!this.categoryCache.has(categoryId)) {
const { data, error } = await supabase
.from("category")
.select("id")
.eq("pretty_url", pretty_url)
.single();
if (error) throw error;
this.categoryCache.set(categoryId, data);
}
return this.categoryCache.get(categoryId);
}
async checkAppExists(supabase, appId: number, appName: string) {
return this.appNameCache[appId] == appName;
}
async batchInsertApps(supabase, apps: any[]) {
if (apps.length === 0) return;
const { error } = await supabase.from("app").insert(apps);
if (error) throw error;
}
}
function getFirstLevelKeys(obj) {
return Object.keys(obj);
}
async function cleanAppData(app, supabase, tableName) {
try {
// 一次性获取所有列信息
const { data: app, error } = await supabase
.from(tableName)
.select("*")
.limit(1)
.single();
if (error) throw error;
delete app.id
// 提取列名
const validColumns = getFirstLevelKeys(app)
// 动态清理对象
return Object.keys(app)
.filter(key => validColumns.includes(key))
.reduce((obj, key) => {
obj.app_id = app.ID;
obj[key] = app[key];
return obj;
}, {});
} catch (error) {
console.error('Error cleaning app data:', error);
return null;
}
}
async function getTonAppInfo(supabase) {
const categories = [
"exchanges", "wallets", "staking", "explorers", "bridges",
"utilities", "channels", "nft", "vpn", "nftservices",
"chats", "social", "gambling", "dex",
"games", "devtools", "shopping", "launchpads"
];
const baseUrl = "https://ton.app/_next/data/xTw0Vxd8Pu6ky2epaEtZJ/en/";
const databaseCache = new DatabaseCache(supabase, 'app');
const appsToInsert = [];
for (const category of categories) {
try {
const url = `${baseUrl}${category}.json?category_slug=${category}`;
const response = await fetch(url);
const data = await response.json();
// 获取分类信息
const categoryInfo = await databaseCache.getCategoryByPrettyUrl(supabase, category);
const {
apps
} = data.pageProps.category;
if (!apps || apps.length == 0) {
continue
}
// 处理应用
for (let app of apps) {
// 清理应用数据
const cleanedApp = await cleanAppData(app, supabase, 'app');
if (!cleanedApp) continue;
// 检查应用是否已存在
const isExist = await databaseCache.checkAppExists(supabase, cleanedApp.app_id, cleanedApp.name,);
if (!isExist) {
// 准备插入的应用数据
const insertApp = {
...cleanedApp,
category_id: categoryInfo.id
};
appsToInsert.push(insertApp);
}
}
} catch (error) {
console.error(`Error processing category ${category}:`, error);
}
}
// 批量插入应用
await databaseCache.batchInsertApps(supabase, appsToInsert);
return appsToInsert.length;
}
// Edge Function 入口
Deno.serve(async (req) => {
const supabase = createClient(
Deno.env.get('SUPABASE_URL') ?? '',
Deno.env.get('SUPABASE_ANON_KEY') ?? '',
{ global: { headers: { Authorization: req.headers.get('Authorization')! } } }
);
try {
let insertLen = await getTonAppInfo()
console.log("insertLen:", insertLen);
return new Response(
JSON.stringify({
message: 'Data uploaded successfully',
insertedAppsCount
}),
{
headers: { 'Content-Type': 'application/json' },
status: 200
}
);
} catch (err) {
console.error('Unexpected error:', err);
return new Response(JSON.stringify({ error: 'Internal server error' }), {
headers: { 'Content-Type': 'application/json' },
status: 500,
});
}
});
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment