Skip to content Skip to sidebar Skip to footer

Naver Crawler: Combining Dataframe Per Each Loop Python

I am working on my Naver Crawler (its a Korea Google :P). I have working on this code for a week now, and I have one last task to solve! So my code below shows Data Crawling throug

Solution 1:

Consider using a list of dataframes that are concatenated outside of for loop. Whereas the individual loops run horizontal merge, the final master combine runs a vertical append.

Also, for a DRY-er solution, consider using a defined method that runs the response to dataframe, passing in as a parameter the body variable, the only difference between if blocks.

...
defresponse_to_df(body):
   request = urllib.request.Request(url)
   request.add_header("X-Naver-Client-Id",client_id)
   request.add_header("X-Naver-Client-Secret",client_secret)
   request.add_header("Content-Type","application/json")
   response = urllib.request.urlopen(request, data=body.encode("utf-8"))
   rescode = response.getcode()
   if(rescode==200):
       response_body = response.read()
       js = response_body.decode('utf-8')
    else:
       print("Error Code:" + rescode)
    d = json.loads(js)
    lst = [pd.DataFrame.from_dict(r['data']).set_index('period')\
                                  .rename(columns={'ratio' : r['title']})
           for r in d['results']]

    # HORIZONTAL MERGE
    df = pd.concat(lst, axis=1)
    df = Data.join(df)
    return df


df_list = []
for i inrange(len(DNA), 5):
    if((len(DNA) % 5==0) or (i < (len(DNA) - (len(DNA) % 5)))):
        body = body_intro + endDate + body_endDate + DNA[i] + \
               body_keywords + DNA[i] + body_groupName + DNA[i+1] + \
               body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + \
               body_keywords + DNA[i+2] + body_groupName + DNA[i+3] + \
               body_keywords + DNA[i+3] + body_groupName + DNA[i+4] + \
               body_keywords + DNA[i+4] + body_last    
        print("5")

        tmp = response_to_df(body)
        df_list.append(tmp)

    elif(len(DNA) % 5 == 4):
        body = body_intro + endDate + body_endDate + DNA[i] + \
               body_keywords + DNA[i] + body_groupName + DNA[i+1] + \
               body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + \
               body_keywords + DNA[i+2] + body_groupName + DNA[i+3] + \
               body_keywords + DNA[i+3] + body_last    
        print("4")

        tmp = response_to_df(body)
        df_list.append(tmp)

    elif(len(DNA) % 5 == 3):
        body = body_intro + endDate + body_endDate + DNA[i] + \
               body_keywords + DNA[i] + body_groupName + DNA[i+1] + \
               body_keywords + DNA[i+1] + body_groupName + DNA[i+2] + \
               body_keywords + DNA[i+2] + body_last    
        print("3")

        tmp = response_to_df(body)
        df_list.append(tmp)

    elif(len(DNA) % 5 == 2):
        body = body_intro + endDate + body_endDate + DNA[i] + \
               body_keywords + DNA[i] + body_groupName + DNA[i+1] + \
               body_keywords + DNA[i+1] + body_last    
        print("2")

        tmp = response_to_df(body)
        df_list.append(tmp) 

    else:
        body = body_intro + endDate + body_endDate + DNA[i] + \
               body_keywords + DNA[i] + body_last 
        print("1")

        tmp = response_to_df(body)
        df_list.append(tmp)


# Combining all Data (VERTICAL APPEND)
Naver = pd.concat(df_list, axis=0)
print("ddd")
Naver

Post a Comment for "Naver Crawler: Combining Dataframe Per Each Loop Python"