Skip to content
Snippets Groups Projects
Commit f3483653 authored by George Marchment's avatar George Marchment
Browse files

New Graph

parent b6b1196b
No related branches found
No related tags found
No related merge requests found
This diff is collapsed.
import json
import os
#with open("./wf_crawl_nextflow.json") as json_file:
# crawler = json.load(json_file)
#
#crawler.pop("last_date")
def download(path, crawler):
#These are the licenses that we are keeping
LICENSE = ["Apache License 2.0", "GNU General Public License v3.0", "MIT License"]
#Code for downloading the files
index = 1
downloaded = 0
for project in crawler:
print(f'* {index}/{len(crawler)}')
if(crawler[project]["license"] in LICENSE):
print(f"Downloading...'{project}'")
downloaded+=1
os.system(f"mkdir -p ")
os.system(f'git clone https://github.com/{project}.git {path}/{project}')
index+=1
print('\n\n')
print(downloaded)
\ No newline at end of file
......@@ -5,7 +5,7 @@ from datetime import datetime
from dateutil.relativedelta import relativedelta
g = Github("YOUR API KEY")
g = Github("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")
def increment_month(x):
......@@ -44,6 +44,11 @@ def check_rate(val):
time.sleep(600)
return 0
def try_and_get_thing(thing):
try:
return thing
except:
return None
#If you give this function the json file that already exists it will continue where it last stopped
#The parameter start needs to follow the syntax '%Y-%m-%d'
......@@ -117,8 +122,8 @@ def search(name='wf_crawl.json', start = '2017-01-01'):
val=check_rate(val)
if(nextflow_file_in_root):
#Checking if there is a License
if(repo.license!=None):
if(True):
#Adding the different information to the json file
wf_crawl[repo.full_name] = {}
......@@ -140,27 +145,29 @@ def search(name='wf_crawl.json', start = '2017-01-01'):
#Nb stars
wf_crawl[repo.full_name]['stars'] = repo.stargazers_count
#License
wf_crawl[repo.full_name]['license'] = repo.license.name
try:
wf_crawl[repo.full_name]['license'] = repo.license.name
except:
wf_crawl[repo.full_name]['license'] = None
#Commits
val=check_rate(val)
commits = list(repo.get_commits())
val=check_rate(val)
wf_crawl[repo.full_name]['last_commit_date'] = date_to_string(commits[0].commit.author.date)
val=check_rate(val)
wf_crawl[repo.full_name]['last_commit_id'] = str(commits[0].commit._identity)
val=check_rate(val)
wf_crawl[repo.full_name]['first_commit_date'] = date_to_string(commits[-1].commit.author.date)
val=check_rate(val)
wf_crawl[repo.full_name]['first_commit_id'] = str(commits[-1].commit._identity)
val=check_rate(val)
wf_crawl[repo.full_name]['nb_commmits'] = len(commits)
val=check_rate(val)
#val=check_rate(val)
#commits = list(repo.get_commits())
#val=check_rate(val)
#wf_crawl[repo.full_name]['last_commit_date'] = date_to_string(commits[0].commit.author.date)
#val=check_rate(val)
#wf_crawl[repo.full_name]['last_commit_id'] = str(commits[0].commit._identity)
#val=check_rate(val)
#wf_crawl[repo.full_name]['first_commit_date'] = date_to_string(commits[-1].commit.author.date)
#val=check_rate(val)
#wf_crawl[repo.full_name]['first_commit_id'] = str(commits[-1].commit._identity)
#val=check_rate(val)
#wf_crawl[repo.full_name]['nb_commmits'] = len(commits)
#val=check_rate(val)
else:
nb=-1
except GithubException as error:
print(error)
break
#Showing the progress
print(f'Between {first_date} and {second_date}, found {nb_result} repo(s), total repo(s): {nb}')
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment