Commit 3af78dfd authored by Ubuntu's avatar Ubuntu

retweet task ok

parent 1219bc0b
[
{
"domain": ".twitter.com",
"expirationDate": 1756348784.685643,
"hostOnly": false,
"httpOnly": true,
"name": "_twitter_sess",
"httpOnly": false,
"name": "_ga",
"path": "/",
"sameSite": "unspecified",
"secure": true,
"session": true,
"secure": false,
"session": false,
"storeId": "0",
"value": "BAh7BiIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7AA%253D%253D--1164b91ac812d853b877e93ddb612b7471bebc74",
"value": "GA1.2.1387953141.1721788785",
"id": 1
},
{
"domain": ".twitter.com",
"expirationDate": 1722772659.123745,
"hostOnly": false,
"httpOnly": true,
"name": "att",
"name": "_twitter_sess",
"path": "/",
"sameSite": "no_restriction",
"sameSite": "unspecified",
"secure": true,
"session": false,
"session": true,
"storeId": "0",
"value": "1-t27ASoVioPwK483s1zzCcXADK63c5SMNU0SbLt3i",
"value": "BAh7CSIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6Rmxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCFW%252FuN6QAToMY3NyZl9p%250AZCIlMmY4NmMwNTcxYzhjY2RkYmE2MTQwNWI0NDBjMDJjZWY6B2lkIiUyMzZh%250AODljY2Q3MTY5MDE2NDQ0ZmUxNmFiNzgxOTljNg%253D%253D--14854efc1913bc8618de3f72817a90f681021de9",
"id": 2
},
{
"domain": ".twitter.com",
"expirationDate": 1756900658.284772,
"expirationDate": 1756283662.960466,
"hostOnly": false,
"httpOnly": true,
"name": "auth_token",
......@@ -37,12 +37,12 @@
"secure": true,
"session": false,
"storeId": "0",
"value": "b3cb77558ba11670d592387937b2cd86355f8925",
"value": "6694c415423126c4099fc819b7d4142b578ebf23",
"id": 3
},
{
"domain": ".twitter.com",
"expirationDate": 1756900658.690693,
"expirationDate": 1756283663.360468,
"hostOnly": false,
"httpOnly": false,
"name": "ct0",
......@@ -51,54 +51,40 @@
"secure": true,
"session": false,
"storeId": "0",
"value": "e7d3edebd701cd68c1d9ca4ac755e71b896d1a3e03c4b7f77610f687fd6365967f61aaf5b6ab9bfb21fe9d7a38e4b92bf63c9193c2d7a0be42ae2e9f2252b9e789b2bc0917f70648f8c8be44645ccbea",
"value": "fef7fe7d5d33870bef38f705111f2d16b3ab6236e89312d7339eb792b2f3c7faf25345307fc1509264eb277368f5bdaa9392b85de1941f2423d0180debd9efc8c312b6f8a4b7ddcd348c677fb3cc3d56",
"id": 4
},
{
"domain": ".twitter.com",
"expirationDate": 1756895711.574034,
"expirationDate": 1756348723.939432,
"hostOnly": false,
"httpOnly": false,
"name": "dnt",
"name": "des_opt_in",
"path": "/",
"sameSite": "no_restriction",
"secure": true,
"sameSite": "unspecified",
"secure": false,
"session": false,
"storeId": "0",
"value": "1",
"value": "N",
"id": 5
},
{
"domain": ".twitter.com",
"expirationDate": 1723289601.759261,
"expirationDate": 1756451096.332794,
"hostOnly": false,
"httpOnly": false,
"name": "external_referer",
"name": "dnt",
"path": "/",
"sameSite": "unspecified",
"sameSite": "no_restriction",
"secure": true,
"session": false,
"storeId": "0",
"value": "padhuUp37zjgzgv1mFWxJ5Xq0CLV%2BbpWuS41v6lN3QU%3D|0|8e8t2xd8A2w%3D",
"value": "1",
"id": 6
},
{
"domain": ".twitter.com",
"expirationDate": 1722688497.654729,
"hostOnly": false,
"httpOnly": false,
"name": "gt",
"path": "/",
"sameSite": "unspecified",
"secure": true,
"session": false,
"storeId": "0",
"value": "1819675816772730888",
"id": 7
},
{
"domain": ".twitter.com",
"expirationDate": 1756895712.329647,
"expirationDate": 1751425532.227206,
"hostOnly": false,
"httpOnly": false,
"name": "guest_id",
......@@ -107,12 +93,12 @@
"secure": true,
"session": false,
"storeId": "0",
"value": "v1%3A172268131226928865",
"id": 8
"value": "v1%3A170860225263973367",
"id": 7
},
{
"domain": ".twitter.com",
"expirationDate": 1757239567.820636,
"expirationDate": 1757316314.85636,
"hostOnly": false,
"httpOnly": false,
"name": "guest_id_ads",
......@@ -121,12 +107,12 @@
"secure": true,
"session": false,
"storeId": "0",
"value": "v1%3A172267949548993887",
"id": 9
"value": "v1%3A170860225263973367",
"id": 8
},
{
"domain": ".twitter.com",
"expirationDate": 1757239567.820538,
"expirationDate": 1757316314.856412,
"hostOnly": false,
"httpOnly": false,
"name": "guest_id_marketing",
......@@ -135,12 +121,12 @@
"secure": true,
"session": false,
"storeId": "0",
"value": "v1%3A172267949548993887",
"id": 10
"value": "v1%3A170860225263973367",
"id": 9
},
{
"domain": ".twitter.com",
"expirationDate": 1756900658.284168,
"expirationDate": 1756283662.960332,
"hostOnly": false,
"httpOnly": true,
"name": "kdt",
......@@ -149,12 +135,12 @@
"secure": true,
"session": false,
"storeId": "0",
"value": "n4OBqkWAyTvlv8c0AVr7eJ07cldRIlGw9rVRpcHI",
"id": 11
"value": "LvWUyXTiPsV6xBShzLMHFKwP1lA2QtXqGG3BbgFj",
"id": 10
},
{
"domain": ".twitter.com",
"expirationDate": 1754222260.197705,
"expirationDate": 1754292309.732566,
"hostOnly": false,
"httpOnly": false,
"name": "night_mode",
......@@ -164,11 +150,11 @@
"session": false,
"storeId": "0",
"value": "2",
"id": 12
"id": 11
},
{
"domain": ".twitter.com",
"expirationDate": 1757239567.820814,
"expirationDate": 1757316314.856452,
"hostOnly": false,
"httpOnly": false,
"name": "personalization_id",
......@@ -177,12 +163,12 @@
"secure": true,
"session": false,
"storeId": "0",
"value": "\"v1_ZCUHat1WJYkTwmbDxH++aA==\"",
"id": 13
"value": "\"v1_wADam4N8E7iunHX/QZVB6g==\"",
"id": 12
},
{
"domain": ".twitter.com",
"expirationDate": 1754222269.877218,
"expirationDate": 1754292323.952039,
"hostOnly": false,
"httpOnly": false,
"name": "twid",
......@@ -191,7 +177,21 @@
"secure": true,
"session": false,
"storeId": "0",
"value": "u%3D1535642152566259712",
"value": "u%3D1815666691260702720",
"id": 13
},
{
"domain": "twitter.com",
"expirationDate": 1737275644,
"hostOnly": true,
"httpOnly": false,
"name": "g_state",
"path": "/",
"sameSite": "unspecified",
"secure": false,
"session": false,
"storeId": "0",
"value": "{\"i_l\":0}",
"id": 14
},
{
......
......@@ -66,7 +66,7 @@ func TaskIdx(done <-chan interface{}, inStream <-chan taskInterface) (<-chan tas
return
case task := <-inStream:
slog.Info("TaskIdx", "task.ID()", task.ID(), "task.UserName()", task.UserName(), "task.InitIdx()", task.InitIdx())
slog.Info("TaskIdx", "task.TaskType()", task.TaskType(), "task.ID()", task.ID(), "task.UserName()", task.UserName(), "task.InitIdx()", task.InitIdx())
if !task.InitIdx() {
......@@ -77,8 +77,9 @@ func TaskIdx(done <-chan interface{}, inStream <-chan taskInterface) (<-chan tas
case <-done:
return
case outListStream <- TaskIdAndList{
TaskId: task.ID(),
List: list,
TaskId: task.ID(),
List: list,
TaskType: task.TaskType(),
}:
}
......@@ -99,11 +100,13 @@ func TaskIdx(done <-chan interface{}, inStream <-chan taskInterface) (<-chan tas
}
type TaskIdAndList struct {
TaskId string
List *list.List
TaskType string
TaskId string
List *list.List
}
type TaskIdAndProfiles struct {
TaskType string
TaskId string
Profiles []Profile
}
......@@ -138,7 +141,7 @@ func BackListToQueue(done <-chan interface{}, inStream <-chan TaskIdAndList) <-c
c = c + 1
if c%100 == 0 {
fmt.Println("BackListToQueue", "len(inStream)", len(inStream), "len(outStream)", len(outStream))
fmt.Println("BackListToQueue", "len(inStream)", len(inStream), "len(outStream)", len(outStream), "len(res)", len(res))
select {
case <-done:
......@@ -146,6 +149,7 @@ func BackListToQueue(done <-chan interface{}, inStream <-chan TaskIdAndList) <-c
case outStream <- TaskIdAndProfiles{
Profiles: res,
TaskId: users.TaskId,
TaskType: users.TaskType,
}:
res = make([]Profile, 0, users.List.Len())
......@@ -155,7 +159,7 @@ func BackListToQueue(done <-chan interface{}, inStream <-chan TaskIdAndList) <-c
}
}
fmt.Println("BackListToQueue", "len(inStream)", len(inStream), "len(outStream)", len(outStream))
fmt.Println("BackListToQueue", "len(inStream)", len(inStream), "len(outStream)", len(outStream), "len(res)", len(res))
select {
case <-done:
......@@ -163,6 +167,7 @@ func BackListToQueue(done <-chan interface{}, inStream <-chan TaskIdAndList) <-c
case outStream <- TaskIdAndProfiles{
Profiles: res,
TaskId: users.TaskId,
TaskType: users.TaskType,
}:
}
......@@ -197,43 +202,70 @@ func InsertOrUpdateUsers(done <-chan interface{}, inStream <-chan TaskIdAndProfi
//rows := make([]map[string]string, 0, len(users.Profiles))
rows := make([]Follower, 0, len(users.Profiles))
//res, c, err := client.From("followers").Insert(rows, true, "", "representation", "").Execute()
var res []byte
var err error
for _, user := range users.Profiles {
if users.TaskType == FollowType {
rows := make([]Follower, 0, len(users.Profiles))
for _, user := range users.Profiles {
sDec, _ := b64.StdEncoding.DecodeString(user.UserID)
userId, _ := strings.CutPrefix(string(sDec), "User:")
sDec, _ := b64.StdEncoding.DecodeString(user.UserID)
userId, _ := strings.CutPrefix(string(sDec), "User:")
row := Follower{
Follower: userId,
UserName: user.Username,
UserId: users.TaskId,
row := Follower{
Follower: userId,
UserName: user.Username,
UserId: users.TaskId,
}
rows = append(rows, row)
}
rows = append(rows, row)
res, _, err = client.From("followers").Insert(rows, true, "", "representation", "").Execute()
}
//res, c, err := client.From("followers").Insert(rows, true, "", "representation", "").Execute()
if users.TaskType == RetweetType {
rows := make([]Retweeter, 0, len(users.Profiles))
res, _, err := client.From("followers").Insert(rows, true, "", "representation", "").Execute()
for _, user := range users.Profiles {
sDec, _ := b64.StdEncoding.DecodeString(user.UserID)
userId, _ := strings.CutPrefix(string(sDec), "User:")
row := Retweeter{
TweetId: users.TaskId, //string `json:"tweet_id"`
RetweeterId: userId, //string `json:"retweeter_id"`
RetweeterUserName: user.Username, //string `json:"retweeter_username"`
// Follower: userId,
// UserName: user.Username,
// UserId: users.TaskId,
}
rows = append(rows, row)
}
res, _, err = client.From("retweeters").Insert(rows, true, "", "representation", "").Execute()
}
if err != nil {
slog.Error("insert into followers", err)
slog.Error("insert into followers or retweeters ", err)
for _, user := range users.Profiles {
usersAsJson, err := json.Marshal(user)
if err != nil {
slog.Error("insert into followers json.Marshal", err)
slog.Error("insert into followers or retweeters json.Marshal", err)
continue
}
sDec, _ := b64.StdEncoding.DecodeString(user.UserID)
userId, _ := strings.CutPrefix(string(sDec), "User:")
slog.Error("insert into followers error", string(usersAsJson), userId)
slog.Error("insert into followers or retweeters error", string(usersAsJson), userId)
}
} else {
slog.Info("insert into followers", string(res), err)
slog.Info("insert into followers or retweeters", string(res), err)
}
fmt.Println("InsertOrUpdateUsers", "len(inStream)", len(inStream))
......@@ -334,7 +366,7 @@ func TaskImplement(done <-chan interface{}, inTaskStream <-chan taskInterface, i
return
case res := <-inResourceStream:
slog.Info("TaskImplement", "task.ID()", task.ID(), "task.UserName()", task.UserName(), "len(inResourceStream)", len(inResourceStream))
slog.Info("TaskImplement", "task.TaskType()", task.TaskType(), "task.ID()", task.ID(), "task.UserName()", task.UserName(), "len(inResourceStream)", len(inResourceStream))
if err := task.Fetch(res.Scraper); err != nil {
slog.Error("task.Fetch", "err", err.Error())
......
......@@ -44,6 +44,18 @@ type Follower struct {
UserName string `json:"follower_username"`
}
type RetweeterId struct {
Retweeter
Id int `json:"id"`
CreatedAt string `json:"created_at"`
}
type Retweeter struct {
TweetId string `json:"tweet_id"`
RetweeterId string `json:"retweeter_id"`
RetweeterUserName string `json:"retweeter_username"`
}
func GetTasksIdx() ([]taskInterface, error) {
tasks, err := QueryAllTask()
......@@ -82,19 +94,55 @@ func GetTasksIdx() ([]taskInterface, error) {
return nil, err
}
followTask := NewFollowTask(task.TaskId, task.User)
followTask := NewFollowTask(task.TaskId, task.User, task.TaskType)
followTask.Idx = userRes
if len(userRes) == 0 {
followTask.Init = true
}
// if len(userRes) == 0 {
// followTask.Init = true
// }
res = append(res, followTask)
}
if task.TaskType == RetweetType {
data, count, err := client.From("retweeters").Select("", "tweet_id", false).
Eq("tweet_id", task.TaskId).
Order("id", &postgrest.OrderOpts{
Ascending: false,
// NullsFirst bool
// ForeignTable string
}).Range(0, 10, "").Execute()
if err != nil {
slog.Error("select * from retweeters error", err)
return nil, err
}
_ = count
slog.Info("idx data", "tweet id", task.TaskId, "user name", task.User, "idx", data)
fmt.Println("idx data", string(data))
userRes := make([]RetweeterId, 0, 10)
if err := json.Unmarshal(data, &userRes); err != nil {
return nil, err
}
retweetTask := NewRetweetTask(task.TaskId, task.User, task.TaskType)
//followTask := NewFollowTask(task.TaskId, task.User, task.TaskType)
retweetTask.Idx = userRes
// if len(userRes) == 0 {
// followTask.Init = true
// }
res = append(res, retweetTask)
}
}
......
package main
import (
"net/http"
"os"
"strings"
"time"
b64 "encoding/base64"
"encoding/json"
twitterscraper "github.com/imperatrona/twitter-scraper"
)
......@@ -122,19 +119,19 @@ func InitScraper(user, password string) (*twitterscraper.Scraper, error) {
//err := scraper3.Login("Wade_Leeeee", "923881393time")
// if err := scraper3.Login(user, password); err != nil {
// return nil, err
// }
if err := scraper3.Login(user, password); err != nil {
return nil, err
}
// Deserialize from JSON
var cookies []*http.Cookie
f, _ := os.Open("cookies.json")
json.NewDecoder(f).Decode(&cookies)
// var cookies []*http.Cookie
// f, _ := os.Open("cookies.json")
// json.NewDecoder(f).Decode(&cookies)
scraper3.SetCookies(cookies)
if !scraper3.IsLoggedIn() {
panic("Invalid cookies")
}
// scraper3.SetCookies(cookies)
// if !scraper3.IsLoggedIn() {
// panic("Invalid cookies")
// }
return scraper3, nil
......
......@@ -38,7 +38,8 @@ type NewTask[T FollowTask | RetweetTask] struct {
type FollowTask struct {
// URL string
Init bool
//Init bool
TaskTypeStr string
UserIdStr string
UserNameStr string
Next string
......@@ -51,21 +52,24 @@ type FollowTask struct {
//Scraper *twitterscraper.Scraper
}
func NewFollowTask(userId, userName string) *FollowTask {
func NewFollowTask(userId, userName, taskType string) *FollowTask {
return &FollowTask{
backPushPop: list.New(),
Idx: make([]FollowerId, 0),
UserIdStr: userId,
UserNameStr: userName,
TaskTypeStr: taskType,
}
}
type taskInterface interface {
TaskType() string
ID() string
UserName() string
Fetch(scraper *twitterscraper.Scraper) error
InitIdx() bool
UpdateIdx() (*list.List, bool)
}
......@@ -82,6 +86,10 @@ func (f *FollowTask) InitIdx() bool {
return true
}
func (f *FollowTask) TaskType() string {
return f.TaskTypeStr
}
func (f *FollowTask) ID() string {
return f.UserIdStr
}
......@@ -155,6 +163,25 @@ func MatchIdx(data Users, idxs []FollowerId) ([]Profile, bool) {
return data.Profiles, false
}
func MatchIdxRetweet(data Users, idxs []RetweeterId) ([]Profile, bool) {
if len(idxs) == 0 {
return data.Profiles, true
}
for k, v := range data.Profiles {
for _, idx := range idxs {
if v.UserIdAsNumber == idx.Retweeter.RetweeterId {
//if v.UserIdAsNumber == idx.Follower.Follower {
return data.Profiles[:k], true
}
}
}
return data.Profiles, false
}
func (f *FollowTask) Fetch(scraper *twitterscraper.Scraper) error {
// f.UserId = "OnlyDD_D"
......@@ -176,6 +203,8 @@ func (f *FollowTask) Fetch(scraper *twitterscraper.Scraper) error {
UserIdAsNumber: userId,
Profile: v,
}
//slog.Info("fetch-----------------", item.UserID, item.Profile.Username, item.Profile.Joined)
usersWithUserNumber = append(usersWithUserNumber, item)
}
......@@ -195,19 +224,39 @@ func (f *FollowTask) Fetch(scraper *twitterscraper.Scraper) error {
type RetweetTask struct {
//URL string
TaskTypeStr string
TweetId string
UserNameStr string
Next string
Idx []FollowerId
Idx []RetweeterId
//[]RetweeterId
// backPushPop = list.New()
backPushPop list.List
backPushPop *list.List
NewIdx []RetweeterId
Res Users
}
// func (r *RetweetTask) URl() string {
// return r.URL
// }
func NewRetweetTask(Id, userName, taskType string) *RetweetTask {
return &RetweetTask{
TaskTypeStr: taskType,
TweetId: Id,
UserNameStr: userName,
backPushPop: list.New(),
Idx: make([]RetweeterId, 0),
}
}
func (f *RetweetTask) TaskType() string {
return f.TaskTypeStr
}
func (f *RetweetTask) UserName() string {
return f.UserNameStr
}
......@@ -222,7 +271,43 @@ func (f *RetweetTask) ID() string {
func (f *RetweetTask) Fetch(scraper *twitterscraper.Scraper) error {
return scraper.RetweetsUsers(f.TweetId)
slog.Info("retweet fetch", "f.TweetId", f.TweetId, "f.UserNameStr", f.UserNameStr, "f.Next", f.Next)
//return scraper.RetweetsUsers(f.TweetId)
users, newNext, err := scraper.RetweetsUsers(f.TweetId, 20, f.Next)
//users, newNext, err := scraper.FetchFollowers(f.UserId, 20, f.Next)
if err != nil {
return err
}
usersWithUserNumber := make([]Profile, 0, len(users))
for _, v := range users {
sDec, _ := b64.StdEncoding.DecodeString(v.UserID)
userId, _ := strings.CutPrefix(string(sDec), "User:")
item := Profile{
UserIdAsNumber: userId,
Profile: v,
}
//slog.Info("fetch-----------------", item.UserID, item.Profile.Username, item.Profile.Joined)
usersWithUserNumber = append(usersWithUserNumber, item)
}
res := Users{
Profiles: usersWithUserNumber,
Current: f.Next,
Next: newNext,
}
slog.Info("retweet fetch", "f.TweetId", f.TweetId, "f.UserNameStr", f.UserNameStr, "res.Current", res.Current, "res.Next", res.Next, "len(res.Profiles)", len(res.Profiles))
f.Res = res
return nil
}
......@@ -232,11 +317,56 @@ func (f *RetweetTask) Fetch(scraper *twitterscraper.Scraper) error {
func (f *RetweetTask) InitIdx() bool {
return f.Idx == nil || len(f.Idx) == 0
if len(f.Res.Profiles) > 0 {
return false
}
return true
}
func (f *RetweetTask) UpdateIdx() (*list.List, bool) {
return nil, false
if len(f.NewIdx) == 0 {
for k, v := range f.Res.Profiles {
f.NewIdx = append(f.NewIdx, RetweeterId{
Retweeter: Retweeter{
RetweeterId: v.UserIdAsNumber,
RetweeterUserName: v.Username,
},
})
if k > 5 {
break
}
}
}
profiles, ok := MatchIdxRetweet(f.Res, f.Idx)
for _, v := range profiles {
f.backPushPop.PushFront(v)
}
slog.Info("MatchIdx", "f.TweetId", f.TweetId, "f.UserNameStr", f.UserNameStr, "match", ok, "len(profiles)", len(profiles))
if !ok {
f.Next = f.Res.Next
return nil, false
}
// 这个想一想
if f.backPushPop.Len() == 0 {
f.Next = ""
//time.Sleep(time.Second * 20)
return nil, false
}
f.Idx = f.NewIdx
f.NewIdx = make([]RetweeterId, 0, len(f.Idx))
resList := f.backPushPop
f.backPushPop = list.New()
return resList, true
}
type TwitterAccount struct {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment