User:Antigng-bot/search

维基百科,自由的百科全书
#include <stdio.h>
#include <time.h>
#include <string.h>
#include <malloc.h>
#include <pthread.h>
#include "network.h"
#include "convert.h"
#include "auth.h"
struct problemlist
{
        char *title;
         char *id;
        struct problemlist *next;
} ;
FILE *debug;
clock_t start;
int threadc[1200];
char threads[1050][3000];
pthread_t threadpool[1200];
int threadnumber=0;
pthread_mutex_t cs;
pthread_mutex_t tcs;
pthread_mutex_t lock;
pthread_mutex_t hcs;
char matchstring[1000];
int matchlength;
int *nextm;
struct problemlist *head;
int action=0;
int parsearg(int argc,char *argv[],char *user,char *passwd,char *filen,char *target);
int kmp(HTTP h);
void *threadfunc(void *i);
int threadini(int count);
int proceedchild(char *title);
struct problemlist *allpagequery();
int kmpini();
int main(int argc,char *argv[])
{     int count=0;
       char username[500];
       char passwd[300];
       char filen[100];
       clock_t end;
        start=clock();
        if(parsearg(argc,argv,username,passwd,filen,matchstring))
       {
            printf("useage:%s -u username -p password -f filenam -s string\n",argv[0]);
            return -1;
       }
        debug=fopen(filen,"w+");
       if(!debug)
       {
             printf("can't create file.\n");
             return -1;
       }
       pthread_mutex_init(&cs,NULL);
        pthread_mutex_init(&lock,NULL);
       pthread_mutex_init(&tcs,NULL);
      pthread_mutex_init(&hcs,NULL);
        count=login(username,passwd);
       if(count)
        {
                  printf("login error.\n");
                 return -1;
       }
        kmpini();
        head=allpagequery();
        if(head==NULL)
        {
               printf("Query page error.\n");
               return -1;
        }
        printf("query ok\n");
        fflush(stdout);
        threadini(1010);
       printf("Create %d threads to go through the problem list.\n",threadnumber);
       fflush(stdout);
        action=1;
        while(head!=NULL)
        {
         sleep(1);
        }
        count=0;
      while(count<20)
        {
           count++;
            pthread_mutex_lock(&tcs);
         if(threadnumber>0)
         {
             pthread_mutex_unlock(&tcs);
          printf("waiting for all threads to exit. Current thread number: %d\n",threadnumber);
         }
         else
         {
             pthread_mutex_unlock(&tcs);
             break;
          }
      fflush(stdout);
         sleep(1);
        }

        fclose(debug);
       end=clock();
       printf("----------------end time:%f-------------------\n",(double)(end-start)/CLOCKS_PER_SEC);
       fflush(stdout);
        pthread_mutex_destroy(&cs);
        pthread_mutex_destroy(&tcs);
        pthread_mutex_destroy(&lock);
        pthread_mutex_destroy(&hcs);

        return 0;
}

int parsearg(int argc,char *argv[],char *user,char *passwd,char *filen,char *target)
{
    int hasu=0,hasp=0,hasf=0,hast=0;
    int i=0;
    for(i=1;i<argc;i++)
   {
      if(argv[i][0]=='-'&&argv[i+1]&&argv[i+1][0]!='-')
      {
         switch(argv[i][1])
         {
            case 'u':
                    strcpy(user,argv[i+1]);
                    hasu=1;
                    break;
            case 'f':
                    strcpy(filen,argv[i+1]);
                    hasf=1;
                    break;
            case 'p':
                    strcpy(passwd,argv[i+1]);
                    hasp=1;
                    break;
           case 's':
                    strcpy(target,argv[i+1]);
                    hast=1;
                    break;
          }
          i++;
        }
    }

    if(hasu&&hasf&&hasp&&hast) return 0;
    else return 1;
}
int threadini(int count)
{
  pthread_attr_t a;
   int i=0;
   int flag=0;
   pthread_attr_init (&a);
  pthread_attr_setdetachstate (&a, PTHREAD_CREATE_DETACHED);
  threadnumber=0;
  for(i=0;i<count;i++)
  {
      threadc[i]=i;
      threads[i][0]=0;
      flag=pthread_create(&threadpool[i],&a,threadfunc,(void *)(threadc+i));
      if(!flag) threadnumber++;
  }
  pthread_attr_destroy(&a);

    return 0;
}


void *threadfunc(void *c)
{
     int i=*(int *)c;
    int exit=0;
      while(!action) sleep(1);

      while(1)
      {
            pthread_mutex_lock(&hcs);
            if(head)
            {
                  strcpy(threads[i],head->title);
                  head=head->next;
            }
            else exit=1;
            pthread_mutex_unlock(&hcs);
            if(exit) break;
            else
            {
                proceedchild(threads[i]);
            }
    }
      pthread_mutex_lock(&tcs);
      threadnumber--;
      pthread_mutex_unlock(&tcs);
      return NULL;
}
int proceedchild(char *title)
{
    HTTP f;
       char c[3000],tt[5000];
        int todo=0;

        if(!title)
        {
          return -1;
        }
        strcpy(c,"/w/index.php?action=raw&title=");
        URLEncode(title,strlen(title),tt,500);
        strcat(c,tt);
        f=hopen();
        if(get(c,0,f))
        {
           hclose(f);
           return -5;
         }
         if(skipresponseheader(f)) return -1;
       todo=kmp(f);
        if(todo)
        {

                 pthread_mutex_lock(&lock);
                  fprintf(debug,"#[[%s]] \r\n",title);
                  fflush(debug);
                  pthread_mutex_unlock(&lock);

        }
        hclose(f);
    return 0;
}
struct problemlist *allpagequery()
{
    HTTP f;
        char line[2000]={0},url[1000]={0},snd[1000]={0},ecd[400]={0},id[400]={0},title[400]={0},sroffset[1000]={0},offseto[1000]={0};
        int status=0,next=0,count=0;
        struct problemlist *pre,*temp,*head=0;
        char *ctm[]={"apcontinue"};
        char *ctv[1];
        char *idm[]={"pageid","title"};
        char *idv[2];
        ctv[0]=offseto;
        idv[0]=id;
        idv[1]=title;
        strcpy(url,"/w/api.php?action=query&format=xml&list=allpages&apnamespace=0&aplimit=500");
        do
        {
                 strcpy(snd,url);
                if(next)
                {
                     strcat(snd,"&apcontinue=");
                         strcat(snd,sroffset);
                }
                f=hopen();
       if(get(snd,1,f))
           {
                hclose(f);
                return NULL;
           }
           skipresponseheader(f);
          next=0;status=0;
          do
          {
            xmlparsetag(f,line);
            if(!next&&!strcmp(line,"continue"))
            {
                xmlparsearg(f,1,ctm,ctv);
                URLEncode(offseto,strlen(offseto),sroffset,990);
                next=1;
             }
             if(!strcmp(line,"p"))
             {
                xmlparsearg(f,2,idm,idv);
                 temp=(struct problemlist *)malloc(sizeof(struct problemlist));
 temp->title=(char *)malloc(strlen(title)+5);
                strcpy(temp->title,title);
                temp->id=(char *)malloc(strlen(id)+5);
                strcpy(temp->id,id);
                temp->next=0;
               if(head)
               {
                   pre->next=temp;
                  pre=temp;
               }
              else pre=head=temp;
            }
          }while(!heof(f));
     hclose(f);
        }while(next);
        return head;
}

int kmpini()
{
   int i,j;
    matchlength=strlen(matchstring);
    nextm=(int *)malloc(matchlength*sizeof(int));
    i=0;
    nextm[0]=j=-1;
    while(i<matchlength-1)
    {
       if(j==-1)
       {
          j=0;
          i++;
          nextm[i]=0;
        }
        else if(matchstring[i]==matchstring[j])
        {
             i++;j++;
             nextm[i]=j;
         }
        else j=nextm[j];
       }


    return 0;
}

int kmp(HTTP h)
{
   char ch;
   int i=0;
   ch=hgetc(h);
   while(!heof(h)&&i<matchlength)
   {

        if(i==-1)
        {
             ch=hgetc(h);
             i=0;
         }
        else if(ch==matchstring[i])
        {
             ch=hgetc(h);
             i++;
         }

         else i=nextm[i];
    }
    if(i==matchlength) return 1;
    else return 0;
}