使用者:Antigng-bot/selflink

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <malloc.h>
#include <pthread.h>
#include "network.h"
#include "convert.h"
#include "auth.h"
#include "struct.h"
struct problemlist
{
        char *title;
        char *id;
        struct problemlist *next;
        int tid;
} ;

FILE *centrallog;
pthread_mutex_t cs;
pthread_mutex_t fcs;
pthread_mutex_t tcs;
pthread_mutex_t hcs;
int action=0;
struct problemlist *head=0;
int threadc[1024];
struct problemlist threads[1024];
pthread_t threadpool[1024];
int threadnumber=0;
struct hashlist *redirectlist;
struct problemlist *query(char *offset,char *id);
struct problemlist *allpagequery();
void *threadfunc1(void *i);
void *threadfunc2(void *i);
int threadini(int count,void *tfc(void *));
int proceedchild(struct problemlist *p);
int main(int argc,char *argv[])
{
        int count=0,i=0;
        pthread_t thd;
        if(argc<2) return -1;
        buckini(500);
        pthread_mutex_init(&cs,NULL);
        pthread_mutex_init(&tcs,NULL);
        pthread_mutex_init(&fcs,NULL);
        pthread_mutex_init(&hcs,NULL);
        redirectlist=hashini();
        if(login("Antigng-bot",argv[1]))
        {
             return -1;
        }
        pthread_create(&thd,0,tokenmanage,0);
        head=query(argv[2],argv[3]);
        fflush(stdout);
        centrallog=fopen("/data/project/antigng-bot/selflink.txt","w+");
        fprintf(centrallog,"start logging\n");
        threadini(1000,threadfunc1);
        printf("Create %d threads to go through the problem list.\n",threadnumber);
        action=1;
      while(head)
     {

       sleep(1);
     }
    while(count<50)
    {
      count++;
      pthread_mutex_lock(&tcs);
     if(threadnumber>0)
     {
      
      printf("waiting for all threads to exit. Current thread number: %d\n",threadnumber);
      fflush(stdout);
      pthread_mutex_unlock(&tcs);
     }
     else
     {
        pthread_mutex_unlock(&tcs);
        break; 
      }
      sleep(1);
    }     
       head=allpagequery();
       action=0;
       threadnumber=0;
       threadini(1000,threadfunc2);
       printf("Create %d threads to go through the problem list.\n",threadnumber);
      action=1;
      while(head)
     {

       sleep(1);
     }
    while(count<50)
    {
      count++;
      pthread_mutex_lock(&tcs);
     if(threadnumber>0)
     {
      printf("waiting for all threads to exit. Current thread number: %d\n",threadnumber);
      fflush(stdout);
      pthread_mutex_unlock(&tcs);
     }
     else
     {
        pthread_mutex_unlock(&tcs);
        break;
      }
      sleep(1);
    }
        fclose(centrallog);
        pthread_mutex_destroy(&cs);
        pthread_mutex_destroy(&tcs);
        pthread_mutex_destroy(&fcs);
        pthread_mutex_destroy(&hcs);
        hashdestroy(redirectlist);
       fflush(stdout);
     system("cp /data/project/antigng-bot/selflink.txt /data/project/antigng-bot/public_html/selflink.txt");
        return 0;
}

struct problemlist *query(char *offset,char *iid)
{
    HTTP f;
    clock_t start,end;
        char line[9050]={0},url[3000]={0},snd[1000]={0},ecd[200]={0},id[1000]={0},title[1000]={0},sroffset[1000]={0},idandtitle[5000]={0};
        int next=0;
        char *mtc[]={"fromid","title"};
        char *mtv[2];
         char *nxc[]={"arcontinue"};
        char *nxv[1];
        char urlenc[2000];
        extern HASHMAX;
        struct ext *point;
        struct problemlist *pre,*temp,*hd=0;
        struct hashlist *hsh;
        int i,j;
        mtv[0]=id;
       mtv[1]=title;
        nxv[0]=sroffset;
        hsh=hashini();
        strcpy(url,"/w/api.php?action=query&format=xml&list=allredirects&arnamespace=0&arlimit=5000&arprop=ids|title");
        if(offset&&iid)
        {
             next=1;
             sprintf(sroffset,"%s|%s",offset,iid);
        }
        do
        {
                strcpy(snd,url);
                if(next)
                {
                     strcat(snd,"&arcontinue=");
                     URLEncode(sroffset,strlen(sroffset),urlenc,1990);
                         strcat(snd,urlenc);
                }
                f=hopen();
       if(get(snd,1,f))
           {
                hclose(f);
                        return NULL;
                }
          skipresponseheader(f);
          next=0;
          do
          {
              xmlparsetag(f,line);
              if(!next&&!strcmp(line,"continue"))
              {
                    xmlparsearg(f,1,nxc,nxv);
                    next=1;
               }
               if(!strcmp(line,"r"))
               {
                   xmlparsearg(f,2,mtc,mtv);
                   for(i=0;title[i];i++) 
                   {
                       if(title[i]==' ') title[i]='_';
                   }
       
                            sprintf(idandtitle,"%s %s",id,title);
                            hashadd(hsh,idandtitle);
                }                
          }while(!heof(f));
     hclose(f);        
        }while(next);
        printf("query complete\n");
        start=clock();
        for(i=0;i<HASHMAX;i++)
        {
                if(hsh[i].flag)
                {
                        point=hsh[i].node;
                        while(point)
                        {
                                strcpy(idandtitle,point->title);
                                sscanf(idandtitle,"%s %s",id,title);
                                temp=(struct problemlist *)malloc(sizeof(struct problemlist));
                               if(!hd)
                               {
                                     pre=hd=temp;
                              }
                             else
                             {
                                pre->next=temp;
                                pre=temp;
                              }
                              temp->id=(char *)malloc(strlen(id)+5);
                              strcpy(temp->id,id);
                              temp->title=(char *)malloc(strlen(title)+5);
                              strcpy(temp->title,title);
                               temp->next=0;
                                point=point->next;
                        }
                }
        }
        hashdestroy(hsh);
        end=clock();
        printf("sort done in %f\n",(double)(end-start)/CLOCKS_PER_SEC);
        return hd;
}
int threadini(int count,void *tfd(void *))
{
  pthread_attr_t a;
   int i=0;
   int flag=0;
   pthread_attr_init (&a);
  pthread_attr_setdetachstate (&a, PTHREAD_CREATE_DETACHED);
  threadnumber=0;
   if(count>1024||count<0) count=1024;
  for(i=0;i<count;i++)
  {
      threadc[i]=i;
      flag=pthread_create(&threadpool[i],&a,tfd,(void *)(threadc+i));
      if(!flag) threadnumber++;
  }
  pthread_attr_destroy(&a);
    return 0;
}
void *threadfunc1(void *c)
{
     int i=*(int *)c;
    int exit=0;
      while(!action) sleep(1);
      while(1)
      {
            pthread_mutex_lock(&hcs);
            if(head)
            {
                  threads[i].title=(char *)calloc(strlen(head->title)+5,1);
                 strcpy(threads[i].title,head->title);
                 threads[i].id=(char *)calloc(strlen(head->id)+5,1);
                  strcpy(threads[i].id,head->id);
                  free(head->id);free(head->title);
                  head=head->next;
            }
            else exit=1;
            pthread_mutex_unlock(&hcs);
            if(exit) break;
            else
            {
                   threads[i].tid=i;
                proceedchild(&threads[i]);
            }
      }
      pthread_mutex_lock(&tcs);
      threadnumber--;
      pthread_mutex_unlock(&tcs);
      return NULL;
}
void *threadfunc2(void *c)
{
     int i=*(int *)c;
    int exit=0;
      while(!action) sleep(1);
      while(1)
      {
            pthread_mutex_lock(&hcs);
            if(head)
            {
                  threads[i].title=(char *)calloc(strlen(head->title)+5,1);
                 strcpy(threads[i].title,head->title);
                  free(head->id);free(head->title);
                  head=head->next;
            }
            else exit=1;
            pthread_mutex_unlock(&hcs);
            if(exit) break;
            else
            {
                   threads[i].tid=i;
                checklink(threads[i].title);
               free(threads[i].title);
            }
      }
      pthread_mutex_lock(&tcs);
      threadnumber--;
      pthread_mutex_unlock(&tcs);
      return NULL;
}
int checklink(char *target)
{
	char tt[1024]={0};
	char url[8192]={0};
	HTTP h;
	char title[256];
	int ttp=0;
	int status=0;
	char ch=0,cht=0;
	h=hopen();
	URLEncode(target,strlen(target),tt,1023);
	sprintf(url,"/w/index.php?action=raw&redirect=no&title=%s",tt);
	h=hopen();
	get(url,0,h);
	skipresponseheader(h);
	while(!heof(h))
	{
		cht=ch;
		ch=hgetc(h);
		switch(status)
		{
		case 0:
			if(cht=='['&&ch=='[') status=1;
			break;
		case 1:
			if(ch=='#'||ttp>254)
			{
				ttp=0;
				status=0;
			}
			else if(ch=='|'||ch==']')
			{
				title[ttp]=0;
				if(checkself(title,target))
				{                                       
                                        pthread_mutex_lock(&fcs);
					fprintf(centrallog,"#[[%s]] <- [[%s]]\n",target,title);
                                        pthread_mutex_unlock(&fcs);
				}
				ttp=0;
                                status=0;
			}
			else 
			{
				title[ttp]=(ch==' ')?'_':ch;
				ttp++;
			}
			break;
		}
	}
	hclose(h);
	return 0;
}
int checkself(char *link,char *title)
{
	char buffer[256];
	if(hashqueryc(redirectlist,link,buffer))
	{
		if(!strcmp(title,buffer)) return 1;
	}
	return 0;
}

int proceedchild(struct problemlist *p)
{
    char url[2000];
     char line[5000];
        int tpl=0;
        char ch=0,chr=0;
        HTTP f;
        const char *titlematch="title=\"";
        const int ttl=strlen(titlematch);
        int ttc=0;
        char target[1000];
        int tgc=0;
        int i;
        char *mtc[]={"title"};
        char *mtv[1];
       mtv[0]=line;
        sprintf(url,"/w/api.php?action=query&format=xml&pageids=%s",p->id);
        f=hopen();
        if(get(url,1,f))
        {
          hclose(f);
           free(p->title);
       free(p->id);
          return -1;
        }
        if(skipresponseheader(f))
        {
            free(p->title);
            free(p->id);
            return -1;
        }        
         ttc=0;
        while(!heof(f))
        {
            xmlparsetag(f,line);
            if(!strcmp(line,"page"))
            {
               xmlparsearg(f,1,mtc,mtv);
               for(i=0;line[i];i++) target[i]=(line[i]==' ')?'_':line[i];
               target[i]=0;
              if(i>0) tgc=1;
               break;
            }
       }                 
         hclose(f);
         if(!tgc)
         {
              free(p->title);
              free(p->id);
            return -2;
        }      
        pthread_mutex_lock(&fcs);
        hashaddc(redirectlist,target,p->title);
        pthread_mutex_unlock(&fcs);
        free(p->title);
        free(p->id);
        return 0;
}            
struct problemlist *allpagequery()
{
    HTTP f;
        char line[2000]={0},url[1000]={0},snd[1000]={0},ecd[400]={0},id[400]={0},title[400]={0},sroffset[1000]={0},offseto[1000]={0};
        int status=0,next=0,count=0;
        int i=0;
        struct problemlist *pre,*temp,*head=0;
        char *ctm[]={"apcontinue"};
        char *ctv[1];
        char *idm[]={"pageid","title"};
        char *idv[2];
        ctv[0]=offseto;
        idv[0]=id;
        idv[1]=title;
        strcpy(url,"/w/api.php?action=query&format=xml&list=allpages&apnamespace=0&aplimit=500");
        do
        {
                 strcpy(snd,url);
                if(next)
                {
                     strcat(snd,"&apcontinue=");
                         strcat(snd,sroffset);
                }
                f=hopen();
       if(get(snd,1,f))
           {
                hclose(f);
                return head;
           }
           skipresponseheader(f);
          next=0;status=0;
          do
          {
            xmlparsetag(f,line);
            if(!next&&!strcmp(line,"continue"))
            {
                xmlparsearg(f,1,ctm,ctv);
                URLEncode(offseto,strlen(offseto),sroffset,990);
                next=1;
             }
             if(!strcmp(line,"p"))
             {
                xmlparsearg(f,2,idm,idv);
                temp=(struct problemlist *)malloc(sizeof(struct problemlist));
                 temp->title=(char *)malloc(strlen(title)+5);
               for(i=0;title[i];i++) temp->title[i]=title[i]==' '?'_':title[i];
                temp->id=(char *)malloc(strlen(id)+5);
                strcpy(temp->id,id);
                temp->next=0;
               if(head)
               {
                   pre->next=temp;
                  pre=temp;
               }
              else pre=head=temp;
            }
          }while(!heof(f));
           hclose(f);
        }while(next);
        return head;
}