#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <pthread.h>
#include "network.h"
#include "convert.h"
#include "auth.h"
#include "struct.h"
struct problemlist
{
char *title;
char *id;
struct problemlist *next;
int tid;
} ;
FILE *centrallog;
pthread_mutex_t cs;
pthread_mutex_t fcs;
pthread_mutex_t tcs;
pthread_mutex_t hcs;
int action=0;
struct problemlist *head=0;
int threadc[1024];
struct problemlist threads[1024];
pthread_t threadpool[1024];
int threadnumber=0;
struct hashlist *redirectlist;
struct problemlist *query(char *offset,char *id);
struct problemlist *allpagequery();
void *threadfunc1(void *i);
void *threadfunc2(void *i);
int threadini(int count,void *tfc(void *));
int proceedchild(struct problemlist *p);
int main(int argc,char *argv[])
{
int count=0,i=0;
pthread_t thd;
if(argc<2) return -1;
buckini(500);
pthread_mutex_init(&cs,NULL);
pthread_mutex_init(&tcs,NULL);
pthread_mutex_init(&fcs,NULL);
pthread_mutex_init(&hcs,NULL);
redirectlist=hashini();
if(login("Antigng-bot",argv[1]))
{
return -1;
}
pthread_create(&thd,0,tokenmanage,0);
head=query(argv[2],argv[3]);
fflush(stdout);
centrallog=fopen("/data/project/antigng-bot/selflink.txt","w+");
fprintf(centrallog,"start logging\n");
threadini(1000,threadfunc1);
printf("Create %d threads to go through the problem list.\n",threadnumber);
action=1;
while(head)
{
sleep(1);
}
while(count<50)
{
count++;
pthread_mutex_lock(&tcs);
if(threadnumber>0)
{
printf("waiting for all threads to exit. Current thread number: %d\n",threadnumber);
fflush(stdout);
pthread_mutex_unlock(&tcs);
}
else
{
pthread_mutex_unlock(&tcs);
break;
}
sleep(1);
}
head=allpagequery();
action=0;
threadnumber=0;
threadini(1000,threadfunc2);
printf("Create %d threads to go through the problem list.\n",threadnumber);
action=1;
while(head)
{
sleep(1);
}
while(count<50)
{
count++;
pthread_mutex_lock(&tcs);
if(threadnumber>0)
{
printf("waiting for all threads to exit. Current thread number: %d\n",threadnumber);
fflush(stdout);
pthread_mutex_unlock(&tcs);
}
else
{
pthread_mutex_unlock(&tcs);
break;
}
sleep(1);
}
fclose(centrallog);
pthread_mutex_destroy(&cs);
pthread_mutex_destroy(&tcs);
pthread_mutex_destroy(&fcs);
pthread_mutex_destroy(&hcs);
hashdestroy(redirectlist);
fflush(stdout);
system("cp /data/project/antigng-bot/selflink.txt /data/project/antigng-bot/public_html/selflink.txt");
return 0;
}
struct problemlist *query(char *offset,char *iid)
{
HTTP f;
clock_t start,end;
char line[9050]={0},url[3000]={0},snd[1000]={0},ecd[200]={0},id[1000]={0},title[1000]={0},sroffset[1000]={0},idandtitle[5000]={0};
int next=0;
char *mtc[]={"fromid","title"};
char *mtv[2];
char *nxc[]={"arcontinue"};
char *nxv[1];
char urlenc[2000];
extern HASHMAX;
struct ext *point;
struct problemlist *pre,*temp,*hd=0;
struct hashlist *hsh;
int i,j;
mtv[0]=id;
mtv[1]=title;
nxv[0]=sroffset;
hsh=hashini();
strcpy(url,"/w/api.php?action=query&format=xml&list=allredirects&arnamespace=0&arlimit=5000&arprop=ids|title");
if(offset&&iid)
{
next=1;
sprintf(sroffset,"%s|%s",offset,iid);
}
do
{
strcpy(snd,url);
if(next)
{
strcat(snd,"&arcontinue=");
URLEncode(sroffset,strlen(sroffset),urlenc,1990);
strcat(snd,urlenc);
}
f=hopen();
if(get(snd,1,f))
{
hclose(f);
return NULL;
}
skipresponseheader(f);
next=0;
do
{
xmlparsetag(f,line);
if(!next&&!strcmp(line,"continue"))
{
xmlparsearg(f,1,nxc,nxv);
next=1;
}
if(!strcmp(line,"r"))
{
xmlparsearg(f,2,mtc,mtv);
for(i=0;title[i];i++)
{
if(title[i]==' ') title[i]='_';
}
sprintf(idandtitle,"%s %s",id,title);
hashadd(hsh,idandtitle);
}
}while(!heof(f));
hclose(f);
}while(next);
printf("query complete\n");
start=clock();
for(i=0;i<HASHMAX;i++)
{
if(hsh[i].flag)
{
point=hsh[i].node;
while(point)
{
strcpy(idandtitle,point->title);
sscanf(idandtitle,"%s %s",id,title);
temp=(struct problemlist *)malloc(sizeof(struct problemlist));
if(!hd)
{
pre=hd=temp;
}
else
{
pre->next=temp;
pre=temp;
}
temp->id=(char *)malloc(strlen(id)+5);
strcpy(temp->id,id);
temp->title=(char *)malloc(strlen(title)+5);
strcpy(temp->title,title);
temp->next=0;
point=point->next;
}
}
}
hashdestroy(hsh);
end=clock();
printf("sort done in %f\n",(double)(end-start)/CLOCKS_PER_SEC);
return hd;
}
int threadini(int count,void *tfd(void *))
{
pthread_attr_t a;
int i=0;
int flag=0;
pthread_attr_init (&a);
pthread_attr_setdetachstate (&a, PTHREAD_CREATE_DETACHED);
threadnumber=0;
if(count>1024||count<0) count=1024;
for(i=0;i<count;i++)
{
threadc[i]=i;
flag=pthread_create(&threadpool[i],&a,tfd,(void *)(threadc+i));
if(!flag) threadnumber++;
}
pthread_attr_destroy(&a);
return 0;
}
void *threadfunc1(void *c)
{
int i=*(int *)c;
int exit=0;
while(!action) sleep(1);
while(1)
{
pthread_mutex_lock(&hcs);
if(head)
{
threads[i].title=(char *)calloc(strlen(head->title)+5,1);
strcpy(threads[i].title,head->title);
threads[i].id=(char *)calloc(strlen(head->id)+5,1);
strcpy(threads[i].id,head->id);
free(head->id);free(head->title);
head=head->next;
}
else exit=1;
pthread_mutex_unlock(&hcs);
if(exit) break;
else
{
threads[i].tid=i;
proceedchild(&threads[i]);
}
}
pthread_mutex_lock(&tcs);
threadnumber--;
pthread_mutex_unlock(&tcs);
return NULL;
}
void *threadfunc2(void *c)
{
int i=*(int *)c;
int exit=0;
while(!action) sleep(1);
while(1)
{
pthread_mutex_lock(&hcs);
if(head)
{
threads[i].title=(char *)calloc(strlen(head->title)+5,1);
strcpy(threads[i].title,head->title);
free(head->id);free(head->title);
head=head->next;
}
else exit=1;
pthread_mutex_unlock(&hcs);
if(exit) break;
else
{
threads[i].tid=i;
checklink(threads[i].title);
free(threads[i].title);
}
}
pthread_mutex_lock(&tcs);
threadnumber--;
pthread_mutex_unlock(&tcs);
return NULL;
}
int checklink(char *target)
{
char tt[1024]={0};
char url[8192]={0};
HTTP h;
char title[256];
int ttp=0;
int status=0;
char ch=0,cht=0;
h=hopen();
URLEncode(target,strlen(target),tt,1023);
sprintf(url,"/w/index.php?action=raw&redirect=no&title=%s",tt);
h=hopen();
get(url,0,h);
skipresponseheader(h);
while(!heof(h))
{
cht=ch;
ch=hgetc(h);
switch(status)
{
case 0:
if(cht=='['&&ch=='[') status=1;
break;
case 1:
if(ch=='#'||ttp>254)
{
ttp=0;
status=0;
}
else if(ch=='|'||ch==']')
{
title[ttp]=0;
if(checkself(title,target))
{
pthread_mutex_lock(&fcs);
fprintf(centrallog,"#[[%s]] <- [[%s]]\n",target,title);
pthread_mutex_unlock(&fcs);
}
ttp=0;
status=0;
}
else
{
title[ttp]=(ch==' ')?'_':ch;
ttp++;
}
break;
}
}
hclose(h);
return 0;
}
int checkself(char *link,char *title)
{
char buffer[256];
if(hashqueryc(redirectlist,link,buffer))
{
if(!strcmp(title,buffer)) return 1;
}
return 0;
}
int proceedchild(struct problemlist *p)
{
char url[2000];
char line[5000];
int tpl=0;
char ch=0,chr=0;
HTTP f;
const char *titlematch="title=\"";
const int ttl=strlen(titlematch);
int ttc=0;
char target[1000];
int tgc=0;
int i;
char *mtc[]={"title"};
char *mtv[1];
mtv[0]=line;
sprintf(url,"/w/api.php?action=query&format=xml&pageids=%s",p->id);
f=hopen();
if(get(url,1,f))
{
hclose(f);
free(p->title);
free(p->id);
return -1;
}
if(skipresponseheader(f))
{
free(p->title);
free(p->id);
return -1;
}
ttc=0;
while(!heof(f))
{
xmlparsetag(f,line);
if(!strcmp(line,"page"))
{
xmlparsearg(f,1,mtc,mtv);
for(i=0;line[i];i++) target[i]=(line[i]==' ')?'_':line[i];
target[i]=0;
if(i>0) tgc=1;
break;
}
}
hclose(f);
if(!tgc)
{
free(p->title);
free(p->id);
return -2;
}
pthread_mutex_lock(&fcs);
hashaddc(redirectlist,target,p->title);
pthread_mutex_unlock(&fcs);
free(p->title);
free(p->id);
return 0;
}
struct problemlist *allpagequery()
{
HTTP f;
char line[2000]={0},url[1000]={0},snd[1000]={0},ecd[400]={0},id[400]={0},title[400]={0},sroffset[1000]={0},offseto[1000]={0};
int status=0,next=0,count=0;
int i=0;
struct problemlist *pre,*temp,*head=0;
char *ctm[]={"apcontinue"};
char *ctv[1];
char *idm[]={"pageid","title"};
char *idv[2];
ctv[0]=offseto;
idv[0]=id;
idv[1]=title;
strcpy(url,"/w/api.php?action=query&format=xml&list=allpages&apnamespace=0&aplimit=500");
do
{
strcpy(snd,url);
if(next)
{
strcat(snd,"&apcontinue=");
strcat(snd,sroffset);
}
f=hopen();
if(get(snd,1,f))
{
hclose(f);
return head;
}
skipresponseheader(f);
next=0;status=0;
do
{
xmlparsetag(f,line);
if(!next&&!strcmp(line,"continue"))
{
xmlparsearg(f,1,ctm,ctv);
URLEncode(offseto,strlen(offseto),sroffset,990);
next=1;
}
if(!strcmp(line,"p"))
{
xmlparsearg(f,2,idm,idv);
temp=(struct problemlist *)malloc(sizeof(struct problemlist));
temp->title=(char *)malloc(strlen(title)+5);
for(i=0;title[i];i++) temp->title[i]=title[i]==' '?'_':title[i];
temp->id=(char *)malloc(strlen(id)+5);
strcpy(temp->id,id);
temp->next=0;
if(head)
{
pre->next=temp;
pre=temp;
}
else pre=head=temp;
}
}while(!heof(f));
hclose(f);
}while(next);
return head;
}