From adb3040b8fda209ccd7994a2e994d65c7c92f3bf Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Fri, 3 Oct 2025 17:57:42 +0300 Subject: [PATCH 01/14] update org repo info getting --- clone_repos.sh | 5 +- export_org_repos.py | 149 ++++++++++++++++++++++++++++---------------- main.sh | 2 +- requirements.txt | 2 +- 4 files changed, 101 insertions(+), 57 deletions(-) diff --git a/clone_repos.sh b/clone_repos.sh index a859c30..94b0dcd 100755 --- a/clone_repos.sh +++ b/clone_repos.sh @@ -19,11 +19,14 @@ dir=`pwd` exit 99 } i=1 -while read name priv issues perms; do +while read name archived has_issues has_wiki is_private; do if [ "$i" == '1' ]; then i=0 continue # skip column's name fi + if [ "$archived" == 'True' ]; then + continue # skip archived repo + fi LINK="git@github.com:$ORG/$name.git" if [ ! -d "$ORG/$name" ]; then git clone $LINK $ORG/$name diff --git a/export_org_repos.py b/export_org_repos.py index 0968847..491ac3c 100644 --- a/export_org_repos.py +++ b/export_org_repos.py @@ -1,26 +1,35 @@ #!/bin/python3 -# usage: python3 export_org_repos.py --token --github_nickname --orgs +# usage: python3 export_org_repos.py --token --username --orgs import argparse -from github import Github -import json +from github import Github, Auth +from github.Repository import Repository import csv -import requests +from json import dump as json_dump from time import sleep + def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('--token', type=str, required=True, - dest='token', - help='file w/github token') - parser.add_argument('--github_nickname', type=str, required=True, - dest='github_nickname', - help='organization_names_file') - parser.add_argument('--orgs', type=str, required=True, - dest='orgs', - help='organization_names_file') + parser.add_argument( + "--token", type=str, required=True, dest="token", help="file w/github token" + ) + parser.add_argument( + "--username", + type=str, + required=True, + dest="username", + help="github username", + ) + parser.add_argument( + "--orgs", type=str, required=True, dest="orgs", help="organization_names_file" + ) + parser.add_argument( + "--verbose", action="store_true", dest="verbose", help="verbose result" + ) results = parser.parse_args() return results + def get_token(filename): with open(filename) as file: token = file.readline().strip() @@ -33,49 +42,81 @@ def get_orgs(filename): return orgs -def get_writer_rows(): - return [ - "repo_name", - "is_private", - "issues_count", - "permissions" - ] - - -def get_repo_info(repo, org_name="", username=""): - users = "" - try: - for u in repo.get_collaborators(): - # params = { - # "accept": "application/vnd.github.v3+json" - # } - # sleep(1) - # res = requests.get(f"https://api.github.com/repos/{org_name}/{repo.name}/collaborators/{u.login}/permission", params=params, auth=(username, args.token)) - - users += f"{u.login}:{str(u.permissions)}," - # print(users) - except Exception as exc: - print(f"Error getting collaborators: {exc}") - return [ - str(repo.name), - str(repo.private), - str(len(list(repo.get_issues(state='all')))), - str(users) - ] - - -if __name__ == '__main__': +def get_writer_rows(verbose=False): + headers = "repo_name,archived,has_issues,has_wiki,is_private,last_pushed_at,size,pr_count,issues_count,users_count,permissions".split( + "," + ) + return headers if verbose else headers[:5] + + +def get_repo_info(repo: Repository, org_name: str, username: str, verbose=False): + info = { + "repo_name": repo.name, + "is_private": int(repo.private), + "archived": int(repo.archived), + "has_wiki": int(repo.has_wiki), + "has_issues": int(repo.has_issues) + } + if verbose: + users = "" + try: + users_info = repo.get_collaborators() + users_count = users_info.totalCount + for u in users_info: + users += f"{u.login}:{str(u.permissions)}," + except Exception as exc: + print(f"Error getting collaborators: {exc}") + + pr_count, issues_count = 0, 0 + if repo.has_issues: + all_issues = list( + repo.get_issues(state="all") + ) # TODO: use totalCount after release + issues_count = sum(not issue.pull_request for issue in all_issues) + pr_count = len(all_issues) - issues_count + + info.extend( + { + "last_pushed_at": repo.pushed_at.strftime(r"%d.%m.%y %H:%M:%S"), + "size": repo.size, + "pr_count": pr_count, + "issues_count": issues_count, + "users_count": users_count, + "permissions": users, + } + ) + return info + + +if __name__ == "__main__": args = parse_args() - g = Github(get_token(args.token)) + g = Github(auth=Auth.Token(get_token(args.token))) + + orgs_data = {} + for org_name in get_orgs(args.orgs): - print('get org [{}]'.format(org_name)) + print(f"get org [{org_name}]") + orgs_data[org_name] = [] org = g.get_organization(org_name) - org_repos = org.get_repos() - with open(f'{org_name}.csv', 'w') as file: - writer = csv.writer(file, delimiter=';', quotechar='|', quoting=csv.QUOTE_MINIMAL) - writer.writerow(get_writer_rows()) - for repo in org_repos: + + with open(f"{org_name}.csv", "w", newline="") as file: + writer = csv.DictWriter( + file, + fieldnames=get_writer_rows(args.verbose), + delimiter=";", + quotechar="|", + quoting=csv.QUOTE_MINIMAL, + ) + writer.writeheader() + + repos = org.get_repos() + for repo in repos: print(f"Handling repo [{repo.name}]") -# sleep(10) - info = get_repo_info(repo, org_name, args.github_nickname) + info = get_repo_info(repo, org_name, args.username, args.verbose) + orgs_data[org_name].append(info) writer.writerow(info) + sleep(0.1) + + if args.verbose: + with open("orgs_info.json", "w", encoding="utf-8") as file: + json_dump(orgs_data, file, ensure_ascii=False, indent=4) diff --git a/main.sh b/main.sh index 01fdcbe..2380266 100644 --- a/main.sh +++ b/main.sh @@ -6,7 +6,7 @@ # ./orgs - orgs list (separated with newline) echo "Running export_org_repos.py" -python3 ./export_org_repos.py --token ./token --github_nickname `cat ./username` --orgs ./orgs +python3 ./export_org_repos.py --token ./token --username `cat ./username` --orgs ./orgs echo "Cloning repos" diff --git a/requirements.txt b/requirements.txt index c833144..b889660 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1 @@ -PyGithub==1.55 +PyGithub==2.8.1 From 5b65e972b9dfc5de8689202e607b37ffaf53f903 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sat, 4 Oct 2025 10:12:34 +0300 Subject: [PATCH 02/14] rm useless args (username) for export_org_repos.py --- export_org_repos.py | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/export_org_repos.py b/export_org_repos.py index 491ac3c..1bd73a0 100644 --- a/export_org_repos.py +++ b/export_org_repos.py @@ -1,5 +1,5 @@ #!/bin/python3 -# usage: python3 export_org_repos.py --token --username --orgs +# usage: python3 export_org_repos.py --token --orgs import argparse from github import Github, Auth from github.Repository import Repository @@ -13,13 +13,6 @@ def parse_args(): parser.add_argument( "--token", type=str, required=True, dest="token", help="file w/github token" ) - parser.add_argument( - "--username", - type=str, - required=True, - dest="username", - help="github username", - ) parser.add_argument( "--orgs", type=str, required=True, dest="orgs", help="organization_names_file" ) @@ -49,7 +42,7 @@ def get_writer_rows(verbose=False): return headers if verbose else headers[:5] -def get_repo_info(repo: Repository, org_name: str, username: str, verbose=False): +def get_repo_info(repo: Repository, verbose=False): info = { "repo_name": repo.name, "is_private": int(repo.private), @@ -112,7 +105,7 @@ def get_repo_info(repo: Repository, org_name: str, username: str, verbose=False) repos = org.get_repos() for repo in repos: print(f"Handling repo [{repo.name}]") - info = get_repo_info(repo, org_name, args.username, args.verbose) + info = get_repo_info(repo, args.verbose) orgs_data[org_name].append(info) writer.writerow(info) sleep(0.1) From 016424930328b92f27a25403390080ee05f7eb72 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sat, 4 Oct 2025 10:13:17 +0300 Subject: [PATCH 03/14] rework issues_backup.py (more info, skip archived repo) --- issues_backup.py | 111 +++++++++++++++++++++++++++-------------------- 1 file changed, 65 insertions(+), 46 deletions(-) diff --git a/issues_backup.py b/issues_backup.py index 0a4659e..4246fbf 100644 --- a/issues_backup.py +++ b/issues_backup.py @@ -2,7 +2,7 @@ # usage: python3 issues_backup.py --token --repos import os -from github import Github +from github import Auth, Github, Issue import argparse import csv from json import dump, load @@ -12,21 +12,26 @@ from datetime import datetime import pytz -utc=pytz.UTC +utc = pytz.UTC + +DELAY = 1 # Delay to avoiding reach of Github API limit -DELAY=1 # Delay to avoiding reach of Github API limit def parse_args(): parser = argparse.ArgumentParser() - parser.add_argument('--token', type=str, required=True, - dest='token', - help='file with github token') - parser.add_argument('--repos', type=str, required=True, - dest='repos', - help='csv file w/repos list') - parser.add_argument('--force', action='store_true', required=False, - dest='force', - help='force rewrite issues') + parser.add_argument( + "--token", type=str, required=True, dest="token", help="file with github token" + ) + parser.add_argument( + "--repos", type=str, required=True, dest="repos", help="csv file w/repos list" + ) + parser.add_argument( + "--force", + action="store_true", + required=False, + dest="force", + help="force rewrite issues", + ) results = parser.parse_args() return results @@ -47,79 +52,92 @@ def get_token(filename): def get_repos(filename): repos = [] with open(filename) as file: - reader = csv.reader(file, delimiter=';', quotechar='|') - next(reader, None) + reader = csv.DictReader(file, delimiter=";", quotechar="|") print("REPOS:") for row in reader: - repos.append((row[:1][0], int(row[2]))) + repos.append( + (row["repo_name"], bool(int(row["archived"])), bool(int(row["has_issues"]))) + ) return repos -def get_issue_info(issue): +def get_issue_info(issue: Issue.Issue): return { - 'id': issue.id, - 'title': issue.title, - 'assignees': [assignee.login for assignee in issue.assignees], - 'created_at': str(issue.created_at), - 'labels': [label.name for label in issue.get_labels()], - 'state': issue.state, - 'user': issue.user.login + "id": issue.id, + "title": issue.title, + "assignees": [assignee.login for assignee in issue.assignees], + "created_at": issue.created_at.strftime(r"%d.%m.%y %H:%M:%S"), + "labels": [label.name for label in issue.get_labels()], + "state": issue.state, + "user": issue.user.login, + "body": issue.body, + "is_pr": bool(issue.pull_request) } def get_issues_info(repo): - return repo.get_issues(state='all') + return repo.get_issues(state="all") -if __name__ == '__main__': +if __name__ == "__main__": args = parse_args() - g = Github(get_token(args.token)) - org_name = args.repos.split('.')[0] + g = Github(auth=Auth.Token(get_token(args.token))) + org_name = args.repos.split(".")[0] checked_repos = [] if not path.exists(org_name): os.mkdir(org_name) else: checked_repos = get_checked_repos(org_name) repos = get_repos(args.repos) - i=0 - for repo_item in repos: - reponame = repo_item[0] - issues_count = repo_item[1] - print(f"Processing {reponame} with {issues_count} issues {i}/{len(repos)}") - i=i+1 - if issues_count == 0: + + for i, repo_item in enumerate(repos, start=1): + reponame, is_archived, has_issues = repo_item + print( + f"Processing {i}/{len(repos)}: {reponame}, archived = {is_archived}, has_issues = {has_issues}" + ) + if not has_issues: print(f"Skipping {reponame} (zero issues)...") continue - - if (not args.force) and (reponame in checked_repos): + elif is_archived: + print(f"Skipping {reponame} (archived repo)...") + continue + elif (not args.force) and (reponame in checked_repos): print(f"Skipping {reponame} (backup exists)...") continue - sleep(DELAY) while True: try: full_reponame = f"{org_name}/{reponame}" - print('Recieving data for {}'.format(full_reponame)) + print("Recieving data for {}".format(full_reponame)) repo = g.get_repo(full_reponame) - file_name = '{}/{}.issues.json'.format(org_name, reponame.replace('/', '--')) + file_name = "{}/{}.issues.json".format( + org_name, reponame.replace("/", "--") + ) if path.exists(file_name): repo_updated_at = repo.updated_at.replace(tzinfo=utc) - file_updated = datetime.fromtimestamp(path.getmtime(file_name)).replace(tzinfo=utc) - print(f"Repo {full_reponame} updated at {repo_updated_at}, file updated at {file_updated}") + file_updated = datetime.fromtimestamp( + path.getmtime(file_name) + ).replace(tzinfo=utc) + print( + f"Repo {full_reponame} updated at {repo_updated_at}, file updated at {file_updated}" + ) if repo_updated_at < file_updated: - print(f"Repo {full_reponame} does not have new changes, skipping") + print( + f"Repo {full_reponame} does not have new changes, skipping" + ) break else: print(f"Repo {full_reponame} has new changes, need to backup") else: print(f"File for repo {full_reponame} does not exist") - sleep(DELAY) issues = get_issues_info(repo) issues_info = [] for issue in issues: + if not issue.pull_request: + # skip PR, that also in issues + issues_info.append(get_issue_info(issue)) sleep(DELAY) - issues_info.append(get_issue_info(issue)) - with open(file_name, 'w') as file: + with open(file_name, "w") as file: dump(issues_info, file, ensure_ascii=False, indent=3) break @@ -127,5 +145,6 @@ def get_issues_info(repo): ## Sleep 1h print("Got exception, will wait for 1h and continue") print(e) - sleep(60*61) + sleep(60 * 61) continue + sleep(DELAY) From 3c0fa021f32c0e741ae41e3d67100cac1c390c4e Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sat, 4 Oct 2025 10:13:53 +0300 Subject: [PATCH 04/14] update main.sh (results of prev commits) --- main.sh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/main.sh b/main.sh index 2380266..03c9b54 100644 --- a/main.sh +++ b/main.sh @@ -2,11 +2,10 @@ # Requirements # ./token - contains github token -# ./username - contains github username (token owner) # ./orgs - orgs list (separated with newline) echo "Running export_org_repos.py" -python3 ./export_org_repos.py --token ./token --username `cat ./username` --orgs ./orgs +python3 ./export_org_repos.py --token ./token --orgs ./orgs echo "Cloning repos" @@ -22,7 +21,7 @@ while IFS= read -r org; do ../wiki_saver/save_wiki.sh ${org}_list.txt echo "Backing up issues of $org" - python3.8 ./issues_backup.py --token token --repos $org.csv --force + python3 ./issues_backup.py --token token --repos $org.csv --force done <./orgs read -n 1 -s -r -p "Press any key to continue" From 246a58228821b06cdc30e5eb3dc33d935db19f77 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sat, 4 Oct 2025 10:23:08 +0300 Subject: [PATCH 05/14] add BACKUP_DIR for cloning scripts --- clone_repos.sh | 16 ++++++++++------ main.sh | 4 +++- 2 files changed, 13 insertions(+), 7 deletions(-) diff --git a/clone_repos.sh b/clone_repos.sh index 94b0dcd..9e40b1e 100755 --- a/clone_repos.sh +++ b/clone_repos.sh @@ -1,15 +1,18 @@ #!/bin/bash # Usage: bash ./clone_repos.sh file.csv CubitCodeReview -# +# [backup_dir=..] INPUT=${1:-"file.csv"} ORG=${2:-"org"} +BACKUP_DIR=${3:-".."} +BACKUP_ORG_DIR=$BACKUP_DIR/$ORG OLDIFS=$IFS IFS=';' + if [ ! -d "$ORG" ]; then - mkdir -p $ORG + mkdir -p $BACKUP_ORG_DIR fi dir=`pwd` @@ -28,11 +31,12 @@ while read name archived has_issues has_wiki is_private; do continue # skip archived repo fi LINK="git@github.com:$ORG/$name.git" - if [ ! -d "$ORG/$name" ]; then - git clone $LINK $ORG/$name + $BACKUP_REPO_DIR=$BACKUP_ORG_DIR/$name + if [ ! -d "$BACKUP_REPO_DIR" ]; then + git clone $LINK $BACKUP_REPO_DIR else - echo "REPO $name (`pwd`/$ORG/$name) EXISTS. FETCHING." - cd $ORG/$name + echo "REPO $name ($BACKUP_REPO_DIR) EXISTS. FETCHING." + cd $BACKUP_REPO_DIR git fetch -a cd $dir fi diff --git a/main.sh b/main.sh index 03c9b54..bc2a7fb 100644 --- a/main.sh +++ b/main.sh @@ -3,17 +3,19 @@ # Requirements # ./token - contains github token # ./orgs - orgs list (separated with newline) +# BACKUP_DIR: optional. default = ".." echo "Running export_org_repos.py" python3 ./export_org_repos.py --token ./token --orgs ./orgs +BACKUP_DIR=${1:-".."} echo "Cloning repos" while IFS= read -r org; do echo "Processing $org" echo "Cloning repos of $org" - ./clone_repos.sh $org.csv $org + ./clone_repos.sh $org.csv $org $BACKUP_DIR echo "Cloning wikis of $org" From c5cea34d9a9b6ef96abca6b07247a0a647430386 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sat, 4 Oct 2025 11:21:40 +0300 Subject: [PATCH 06/14] add wiki saver --- main.sh | 4 ++-- wiki_saver/csv_to_wiki_list.sh | 11 +++++++++++ wiki_saver/save_wiki.sh | 13 +++++++++++++ 3 files changed, 26 insertions(+), 2 deletions(-) mode change 100644 => 100755 main.sh create mode 100755 wiki_saver/csv_to_wiki_list.sh create mode 100755 wiki_saver/save_wiki.sh diff --git a/main.sh b/main.sh old mode 100644 new mode 100755 index bc2a7fb..b001f94 --- a/main.sh +++ b/main.sh @@ -19,8 +19,8 @@ while IFS= read -r org; do echo "Cloning wikis of $org" - ../wiki_saver/csv_to_plain_list.sh $org.csv ${org}_list.txt ${org} - ../wiki_saver/save_wiki.sh ${org}_list.txt + ./wiki_saver/csv_to_wiki_list.sh $org.csv ${org}_list.txt ${org} + ./wiki_saver/save_wiki.sh ${org}_list.txt echo "Backing up issues of $org" python3 ./issues_backup.py --token token --repos $org.csv --force diff --git a/wiki_saver/csv_to_wiki_list.sh b/wiki_saver/csv_to_wiki_list.sh new file mode 100755 index 0000000..0147448 --- /dev/null +++ b/wiki_saver/csv_to_wiki_list.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +# Transforms CSV list of repos (w/has_wiki flag) to plain list in format (only repos w/has_wiki=1): +# org/repo + + +src=${1} # Source file (csv) +dst=${2} # Destination file (plain text) +org=${3} # Organization + +tail -n +2 "${src}" | awk -F';' '$4 == "1" {print "'"${org}"'/"$1}' > ${dst} diff --git a/wiki_saver/save_wiki.sh b/wiki_saver/save_wiki.sh new file mode 100755 index 0000000..d3393f3 --- /dev/null +++ b/wiki_saver/save_wiki.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +repo_list=${1} + +while read line; +do + echo $line; + org=$(dirname $line) + mkdir -p ../wikis_${org} + git clone git@github.com:${line}.wiki.git ../wikis_${line} + echo "__________________________________________" +done < ${repo_list} + From 40e0cd8551eb8b57a3635e550c203a9e709e5970 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sat, 4 Oct 2025 11:22:23 +0300 Subject: [PATCH 07/14] add archived clone logic --- clone_repos.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/clone_repos.sh b/clone_repos.sh index 9e40b1e..c0ecb5f 100755 --- a/clone_repos.sh +++ b/clone_repos.sh @@ -1,11 +1,13 @@ #!/bin/bash # Usage: bash ./clone_repos.sh file.csv CubitCodeReview -# [backup_dir=..] +# +# clone_archived: "1" (clone only archived), "1" (clone only not archived) or "2" (clone all) INPUT=${1:-"file.csv"} ORG=${2:-"org"} BACKUP_DIR=${3:-".."} +ARCHIVE_CLONE=${4:-"0"} BACKUP_ORG_DIR=$BACKUP_DIR/$ORG OLDIFS=$IFS IFS=';' @@ -27,11 +29,13 @@ while read name archived has_issues has_wiki is_private; do i=0 continue # skip column's name fi - if [ "$archived" == 'True' ]; then + if [ "$archived" == "$ARCHIVE_CLONE" ]; then + echo "REPO $name (skip by ARCHIVE_CLONE setting). SKIPPING." + echo "__________________________________________" continue # skip archived repo fi LINK="git@github.com:$ORG/$name.git" - $BACKUP_REPO_DIR=$BACKUP_ORG_DIR/$name + BACKUP_REPO_DIR=$BACKUP_ORG_DIR/$name if [ ! -d "$BACKUP_REPO_DIR" ]; then git clone $LINK $BACKUP_REPO_DIR else From 460a8a00cb2959eabac7a372f67562b8e1d4cbee Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sat, 4 Oct 2025 13:20:20 +0300 Subject: [PATCH 08/14] unify get_github_client for modules --- export_org_repos.py | 12 +++--------- issues_backup.py | 13 ++++--------- utils.py | 11 +++++++++++ 3 files changed, 18 insertions(+), 18 deletions(-) create mode 100644 utils.py diff --git a/export_org_repos.py b/export_org_repos.py index 1bd73a0..bd90ac6 100644 --- a/export_org_repos.py +++ b/export_org_repos.py @@ -1,11 +1,11 @@ #!/bin/python3 # usage: python3 export_org_repos.py --token --orgs import argparse -from github import Github, Auth from github.Repository import Repository import csv from json import dump as json_dump from time import sleep +from utils import get_github_client def parse_args(): @@ -23,12 +23,6 @@ def parse_args(): return results -def get_token(filename): - with open(filename) as file: - token = file.readline().strip() - return token - - def get_orgs(filename): with open(filename) as file: orgs = (org.strip() for org in file.readlines() if org.strip()) @@ -48,7 +42,7 @@ def get_repo_info(repo: Repository, verbose=False): "is_private": int(repo.private), "archived": int(repo.archived), "has_wiki": int(repo.has_wiki), - "has_issues": int(repo.has_issues) + "has_issues": int(repo.has_issues), } if verbose: users = "" @@ -83,7 +77,7 @@ def get_repo_info(repo: Repository, verbose=False): if __name__ == "__main__": args = parse_args() - g = Github(auth=Auth.Token(get_token(args.token))) + g = get_github_client(args.token) orgs_data = {} diff --git a/issues_backup.py b/issues_backup.py index 4246fbf..0637f4c 100644 --- a/issues_backup.py +++ b/issues_backup.py @@ -2,15 +2,16 @@ # usage: python3 issues_backup.py --token --repos import os -from github import Auth, Github, Issue +from github import Issue import argparse import csv -from json import dump, load +from json import dump import os.path as path import glob from time import sleep from datetime import datetime import pytz +from utils import get_github_client utc = pytz.UTC @@ -43,12 +44,6 @@ def get_checked_repos(path): return res -def get_token(filename): - with open(filename) as file: - token = file.readline().strip() - return token - - def get_repos(filename): repos = [] with open(filename) as file: @@ -81,7 +76,7 @@ def get_issues_info(repo): if __name__ == "__main__": args = parse_args() - g = Github(auth=Auth.Token(get_token(args.token))) + g = get_github_client(args.token) org_name = args.repos.split(".")[0] checked_repos = [] if not path.exists(org_name): diff --git a/utils.py b/utils.py new file mode 100644 index 0000000..e645c9f --- /dev/null +++ b/utils.py @@ -0,0 +1,11 @@ +from github import Github, Auth + + +def get_token(filename): + with open(filename) as file: + token = file.readline().strip() + return token + + +def get_github_client(token_filepath): + return Github(auth=Auth.Token(get_token(token_filepath))) \ No newline at end of file From 3eee588b84ba638704ea2ef909b08565a4aabf16 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sat, 4 Oct 2025 18:27:31 +0300 Subject: [PATCH 09/14] fix dict updating (export_org_repos) --- export_org_repos.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/export_org_repos.py b/export_org_repos.py index bd90ac6..cec0e8b 100644 --- a/export_org_repos.py +++ b/export_org_repos.py @@ -62,7 +62,7 @@ def get_repo_info(repo: Repository, verbose=False): issues_count = sum(not issue.pull_request for issue in all_issues) pr_count = len(all_issues) - issues_count - info.extend( + info.update( { "last_pushed_at": repo.pushed_at.strftime(r"%d.%m.%y %H:%M:%S"), "size": repo.size, From b777669394423165bfaa457a5a487324de7aeba2 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sat, 4 Oct 2025 20:06:59 +0300 Subject: [PATCH 10/14] add utils and todo --- export_org_repos.py | 10 ++-------- issues_backup.py | 2 +- utils.py | 7 ++++++- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/export_org_repos.py b/export_org_repos.py index cec0e8b..65e90c5 100644 --- a/export_org_repos.py +++ b/export_org_repos.py @@ -5,7 +5,7 @@ import csv from json import dump as json_dump from time import sleep -from utils import get_github_client +from utils import get_github_client, get_lines_from_file def parse_args(): @@ -23,12 +23,6 @@ def parse_args(): return results -def get_orgs(filename): - with open(filename) as file: - orgs = (org.strip() for org in file.readlines() if org.strip()) - return orgs - - def get_writer_rows(verbose=False): headers = "repo_name,archived,has_issues,has_wiki,is_private,last_pushed_at,size,pr_count,issues_count,users_count,permissions".split( "," @@ -81,7 +75,7 @@ def get_repo_info(repo: Repository, verbose=False): orgs_data = {} - for org_name in get_orgs(args.orgs): + for org_name in get_lines_from_file(args.orgs): print(f"get org [{org_name}]") orgs_data[org_name] = [] org = g.get_organization(org_name) diff --git a/issues_backup.py b/issues_backup.py index 0637f4c..d185753 100644 --- a/issues_backup.py +++ b/issues_backup.py @@ -131,7 +131,7 @@ def get_issues_info(repo): # skip PR, that also in issues issues_info.append(get_issue_info(issue)) sleep(DELAY) - + # TODO: check, that issues_info isn't empty with open(file_name, "w") as file: dump(issues_info, file, ensure_ascii=False, indent=3) diff --git a/utils.py b/utils.py index e645c9f..bed2b22 100644 --- a/utils.py +++ b/utils.py @@ -8,4 +8,9 @@ def get_token(filename): def get_github_client(token_filepath): - return Github(auth=Auth.Token(get_token(token_filepath))) \ No newline at end of file + return Github(auth=Auth.Token(get_token(token_filepath))) + + +def get_lines_from_file(filename): + with open(filename) as file: + return (line.strip() for line in file.readlines() if line.strip()) From ba90e03c1a71b881294e186f33b3517ec56e0954 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sun, 5 Oct 2025 14:04:57 +0300 Subject: [PATCH 11/14] update default vakue for ARCHIVE_CLONE (clone_repo.sh) --- clone_repos.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clone_repos.sh b/clone_repos.sh index c0ecb5f..fbe1853 100755 --- a/clone_repos.sh +++ b/clone_repos.sh @@ -2,12 +2,12 @@ # Usage: bash ./clone_repos.sh file.csv CubitCodeReview # -# clone_archived: "1" (clone only archived), "1" (clone only not archived) or "2" (clone all) +# clone_archived: "0" (clone only archived), "1" (clone only not archived) or "2" (clone all) INPUT=${1:-"file.csv"} ORG=${2:-"org"} BACKUP_DIR=${3:-".."} -ARCHIVE_CLONE=${4:-"0"} +ARCHIVE_CLONE=${4:-"1"} BACKUP_ORG_DIR=$BACKUP_DIR/$ORG OLDIFS=$IFS IFS=';' From ecc3b08ff8ad2a961600f26214d613a25940401d Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Sun, 5 Oct 2025 19:41:59 +0300 Subject: [PATCH 12/14] export_org_repos: issue_count - count only real issues (not PR) --- export_org_repos.py | 22 ++++++++++------------ issues_backup.py | 8 ++++---- 2 files changed, 14 insertions(+), 16 deletions(-) diff --git a/export_org_repos.py b/export_org_repos.py index 65e90c5..1757602 100644 --- a/export_org_repos.py +++ b/export_org_repos.py @@ -24,19 +24,27 @@ def parse_args(): def get_writer_rows(verbose=False): - headers = "repo_name,archived,has_issues,has_wiki,is_private,last_pushed_at,size,pr_count,issues_count,users_count,permissions".split( + headers = "repo_name,archived,issues_count,has_wiki,is_private,last_pushed_at,size,pr_count,users_count,permissions".split( "," ) return headers if verbose else headers[:5] def get_repo_info(repo: Repository, verbose=False): + pr_count, issues_count = 0, 0 + + if repo.has_issues: + all_issues = tuple( + repo.get_issues(state="all") + ) # TODO: use totalCount after release + issues_count = sum(not issue.pull_request for issue in all_issues) + pr_count = len(all_issues) - issues_count info = { "repo_name": repo.name, "is_private": int(repo.private), "archived": int(repo.archived), "has_wiki": int(repo.has_wiki), - "has_issues": int(repo.has_issues), + "issues_count": issues_count, } if verbose: users = "" @@ -47,21 +55,11 @@ def get_repo_info(repo: Repository, verbose=False): users += f"{u.login}:{str(u.permissions)}," except Exception as exc: print(f"Error getting collaborators: {exc}") - - pr_count, issues_count = 0, 0 - if repo.has_issues: - all_issues = list( - repo.get_issues(state="all") - ) # TODO: use totalCount after release - issues_count = sum(not issue.pull_request for issue in all_issues) - pr_count = len(all_issues) - issues_count - info.update( { "last_pushed_at": repo.pushed_at.strftime(r"%d.%m.%y %H:%M:%S"), "size": repo.size, "pr_count": pr_count, - "issues_count": issues_count, "users_count": users_count, "permissions": users, } diff --git a/issues_backup.py b/issues_backup.py index d185753..a40b9a2 100644 --- a/issues_backup.py +++ b/issues_backup.py @@ -51,7 +51,7 @@ def get_repos(filename): print("REPOS:") for row in reader: repos.append( - (row["repo_name"], bool(int(row["archived"])), bool(int(row["has_issues"]))) + (row["repo_name"], bool(int(row["archived"])), bool(int(row["issues_count"]))) ) return repos @@ -86,11 +86,11 @@ def get_issues_info(repo): repos = get_repos(args.repos) for i, repo_item in enumerate(repos, start=1): - reponame, is_archived, has_issues = repo_item + reponame, is_archived, issues_count = repo_item print( - f"Processing {i}/{len(repos)}: {reponame}, archived = {is_archived}, has_issues = {has_issues}" + f"Processing {i}/{len(repos)}: {reponame}, archived = {is_archived}, issues_count = {issues_count}" ) - if not has_issues: + if not issues_count: print(f"Skipping {reponame} (zero issues)...") continue elif is_archived: From 104b100946f649199b4db0baf6adc3efd33a8971 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 7 Oct 2025 11:01:02 +0300 Subject: [PATCH 13/14] convert issues_count to int (issues_backup.py) --- issues_backup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/issues_backup.py b/issues_backup.py index a40b9a2..7785313 100644 --- a/issues_backup.py +++ b/issues_backup.py @@ -51,7 +51,7 @@ def get_repos(filename): print("REPOS:") for row in reader: repos.append( - (row["repo_name"], bool(int(row["archived"])), bool(int(row["issues_count"]))) + (row["repo_name"], bool(int(row["archived"])), int(row["issues_count"])) ) return repos From aecee0bedac3b22d6cbb93286ce8e2df4a637932 Mon Sep 17 00:00:00 2001 From: Dmitry Ivanov Date: Tue, 7 Oct 2025 20:27:33 +0300 Subject: [PATCH 14/14] update wiki saver (chech local repo existance and fetch) --- wiki_saver/save_wiki.sh | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/wiki_saver/save_wiki.sh b/wiki_saver/save_wiki.sh index d3393f3..d18ba30 100755 --- a/wiki_saver/save_wiki.sh +++ b/wiki_saver/save_wiki.sh @@ -7,7 +7,17 @@ do echo $line; org=$(dirname $line) mkdir -p ../wikis_${org} - git clone git@github.com:${line}.wiki.git ../wikis_${line} + + LINK="git@github.com:${line}.wiki.git" + BACKUP_REPO_DIR=../wikis_${line} + if [ ! -d "$BACKUP_REPO_DIR" ]; then + git clone $LINK $BACKUP_REPO_DIR + else + echo "REPO WIKI $name ($BACKUP_REPO_DIR) EXISTS. FETCHING." + cd $BACKUP_REPO_DIR + git fetch -a + cd - + fi echo "__________________________________________" done < ${repo_list}