From 941f1b5088dfa07cc55c02a24dd3697911dc0e75 Mon Sep 17 00:00:00 2001 From: HR Date: Sun, 2 Jun 2019 14:57:29 +0100 Subject: [PATCH 01/18] update deps --- Pipfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Pipfile b/Pipfile index 6257ab1..9615dd1 100644 --- a/Pipfile +++ b/Pipfile @@ -6,4 +6,4 @@ name = "pypi" [dev-packages] [packages] -requests = "*" \ No newline at end of file +requests = ">=2.20.0" \ No newline at end of file From 35aa51565607821aa2a42c1155c99b5ce78847d1 Mon Sep 17 00:00:00 2001 From: HR Date: Sun, 2 Jun 2019 17:15:15 +0100 Subject: [PATCH 02/18] Implement cloning for default ref (master) --- Pipfile.lock | 44 +++++++++-------------- clone.py | 98 ++++++++++++++++++++++++---------------------------- 2 files changed, 62 insertions(+), 80 deletions(-) diff --git a/Pipfile.lock b/Pipfile.lock index 44dc379..022795c 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,20 +1,7 @@ { "_meta": { "hash": { - "sha256": "a0e63f8a0d1e3df046dc19b3ffbaaedfa151afc12af5a5b960ae7393952f8679" - }, - "host-environment-markers": { - "implementation_name": "cpython", - "implementation_version": "0", - "os_name": "posix", - "platform_machine": "x86_64", - "platform_python_implementation": "CPython", - "platform_release": "17.0.0", - "platform_system": "Darwin", - "platform_version": "Darwin Kernel Version 17.0.0: Thu Aug 24 21:48:19 PDT 2017; root:xnu-4570.1.46~2/RELEASE_X86_64", - "python_full_version": "2.7.14", - "python_version": "2.7", - "sys_platform": "darwin" + "sha256": "1ea6b5746645f026e205a0019135790a26bd72a2dd8abc3b4ad4fe0133977711" }, "pipfile-spec": 6, "requires": {}, @@ -29,38 +16,39 @@ "default": { "certifi": { "hashes": [ - "sha256:54a07c09c586b0e4c619f02a5e94e36619da8e2b053e20f594348c0611803704", - "sha256:40523d2efb60523e113b44602298f0960e900388cf3bb6043f645cf57ea9e3f5" + "sha256:59b7658e26ca9c7339e00f8f4636cdfe59d34fa37b9b04f6f9e9926b3cece1a5", + "sha256:b26104d6835d1f5e49452a26eb2ff87fe7090b89dfcaee5ea2212697e1e1d7ae" ], - "version": "==2017.7.27.1" + "version": "==2019.3.9" }, "chardet": { "hashes": [ - "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691", - "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae" + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" ], "version": "==3.0.4" }, "idna": { "hashes": [ - "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4", - "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f" + "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", + "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" ], - "version": "==2.6" + "version": "==2.8" }, "requests": { "hashes": [ - "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b", - "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e" + "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", + "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" ], - "version": "==2.18.4" + "index": "pypi", + "version": "==2.22.0" }, "urllib3": { "hashes": [ - "sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b", - "sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f" + "sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", + "sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232" ], - "version": "==1.22" + "version": "==1.25.3" } }, "develop": {} diff --git a/clone.py b/clone.py index b86332f..48f32ec 100644 --- a/clone.py +++ b/clone.py @@ -2,93 +2,87 @@ import re import sys import os +import errno -recursive = True -base_url = 'https://api.github.com' -# /repos/:owner/:repo/git/trees/:sha?recursive=:bool -tree_endpoint = base_url + '/repos/{}/{}/git/trees/{}?recursive={}' -contents_endpoint = base_url + '/repos/{}/{}/contents' -commits_endpoint = base_url + '/repos/{}/{}/commits' -base_normalize_regex = re.compile(r'.*github\.com\/') +GH_API_BASE_URL = 'https://api.github.com' +GH_REPO_CONTENTS_ENDPOINT = GH_API_BASE_URL + '/repos/{}/{}/contents' +BASE_NORMALIZE_REGEX = re.compile(r'.*github\.com\/') def exit_with_m(m='An error occured'): - print m - sys.exit() - - -def joinp(*args): - '/'.join(args) - + print(m) + sys.exit(1) def mkdir_p(path): try: os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST and os.path.isdir(path): + except OSError as err: # Python >2.5 + if err.errno == errno.EEXIST and os.path.isdir(path): pass else: raise - -def fetch_file(req_url, file_path): - r = requests.get(req_url, stream=True) +def clone_file(download_url, file_path): + """ + Clones the file at the download_url to the file_path + """ + print('Cloning file', file_path) + r = requests.get(download_url, stream=True) try: r.raise_for_status() except Exception as e: - exit_with_m('Failed fetching ' + req_url, e) + exit_with_m('Failed cloneing ' + download_url, e) with open(file_path, 'wb') as fd: - for chunk in req.iter_content(chunk_size=128): + for chunk in r.iter_content(chunk_size=128): fd.write(chunk) - -def fetch(base_url, path=None): +def clone(base_url, path=None): """ - Recursively fetch the repo metadata + Recursively clones the path """ - req_url = base_url if not path else joinp(base_url, path) - # Request + print('Cloning directory', path) + req_url = base_url if not path else os.path.join(base_url, path) + # Get path metadata r = requests.get(req_url) - try: r.raise_for_status() except Exception as e: - exit_with_m('Failed fetching repo metdata: ', e) - + exit_with_m('Failed fetching metadata of dir: ', e) repo_data = r.json() + # Create path locally + mkdir_p(path) + if isinstance(repo_data, list): - # Recursively fetch content + # Recursively clone content for item in repo_data: if item['type'] == 'dir': - # create dir and then fetch recursively - print 'Walking dir: %s' % item['path'] - path = joinp(path, item['path']) - fetch(joinp(base_url, path)) + # Fetch dir recursively + clone(base_url, item['path']) else: - # download it - # Ensure dir directory exists locally - mkdir_p(path) - print 'Fetching file: %s' % item['path'] + # Fetch the file + clone_file(item['download_url'], item['path']) -if len(sys.argv) > 1: +### +# Main +### +arg_len = len(sys.argv) +if arg_len >= 2: + # Github URL gh_url = sys.argv[1] + # Normalize & parse input + normal_gh_url = re.sub(BASE_NORMALIZE_REGEX, '', gh_url).replace('/tree', '') + gh_url_comps = normal_gh_url.split('/') + user, repo = gh_url_comps[:2] + branch = gh_url_comps[2] + path = os.path.join(*gh_url_comps[3:]) else: exit_with_m('Nothing to clone :(') -# Normalize & parse input -norm_gh_url = re.sub(base_normalize_regex, '', gh_url) -gh_url_comps = norm_gh_url.split('/') -user, repo = gh_url_comps[:2] -branch = gh_url_comps[3] -path = joinp(gh_url_comps[4:]) - - -api_req_url = contents_endpoint.format(user, repo) - -print "Fetching sub repo %s..." % (api_req_url) - -fetch(api_req_url, path) +api_req_url = GH_REPO_CONTENTS_ENDPOINT.format(user, repo) +print("Cloning into '%s'..." % path) +clone(api_req_url, path) +print("done.") \ No newline at end of file From e53b7ab6b8751a4646f6df50112d556cc7084559 Mon Sep 17 00:00:00 2001 From: HR Date: Sun, 2 Jun 2019 17:38:19 +0100 Subject: [PATCH 03/18] Add ref support --- clone.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/clone.py b/clone.py index 48f32ec..a25dad9 100644 --- a/clone.py +++ b/clone.py @@ -27,7 +27,6 @@ def clone_file(download_url, file_path): """ Clones the file at the download_url to the file_path """ - print('Cloning file', file_path) r = requests.get(download_url, stream=True) try: r.raise_for_status() @@ -38,14 +37,13 @@ def clone_file(download_url, file_path): for chunk in r.iter_content(chunk_size=128): fd.write(chunk) -def clone(base_url, path=None): +def clone(base_url, path=None, ref=None): """ Recursively clones the path """ - print('Cloning directory', path) req_url = base_url if not path else os.path.join(base_url, path) # Get path metadata - r = requests.get(req_url) + r = requests.get(req_url) if not ref else requests.get(req_url, params={'ref': ref}) try: r.raise_for_status() except Exception as e: @@ -60,10 +58,11 @@ def clone(base_url, path=None): for item in repo_data: if item['type'] == 'dir': # Fetch dir recursively - clone(base_url, item['path']) + clone(base_url, item['path'], ref) else: # Fetch the file clone_file(item['download_url'], item['path']) + print('Cloned', item['path']) ### @@ -77,12 +76,12 @@ def clone(base_url, path=None): normal_gh_url = re.sub(BASE_NORMALIZE_REGEX, '', gh_url).replace('/tree', '') gh_url_comps = normal_gh_url.split('/') user, repo = gh_url_comps[:2] - branch = gh_url_comps[2] + ref = gh_url_comps[2] path = os.path.join(*gh_url_comps[3:]) else: exit_with_m('Nothing to clone :(') api_req_url = GH_REPO_CONTENTS_ENDPOINT.format(user, repo) print("Cloning into '%s'..." % path) -clone(api_req_url, path) +clone(api_req_url, path, ref) print("done.") \ No newline at end of file From 82a4d669ec2be6c30832d028b588dbfe09122898 Mon Sep 17 00:00:00 2001 From: HR Date: Sun, 2 Jun 2019 19:59:49 +0100 Subject: [PATCH 04/18] Add OAuth token support, docs and better arg parsing via docopt --- Pipfile | 3 +- Pipfile.lock | 9 +++++- README.md | 37 +++++++++++++++++++++++-- clone.py => ghclone.py | 62 +++++++++++++++++++++++++++++++----------- 4 files changed, 91 insertions(+), 20 deletions(-) rename clone.py => ghclone.py (52%) diff --git a/Pipfile b/Pipfile index 9615dd1..d184e3b 100644 --- a/Pipfile +++ b/Pipfile @@ -6,4 +6,5 @@ name = "pypi" [dev-packages] [packages] -requests = ">=2.20.0" \ No newline at end of file +requests = ">=2.20.0" +docopt = "*" diff --git a/Pipfile.lock b/Pipfile.lock index 022795c..58e777a 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "1ea6b5746645f026e205a0019135790a26bd72a2dd8abc3b4ad4fe0133977711" + "sha256": "366c8eebaa31892208c95484ad7e21a06dd8c35edb39f4e20a5eff55a91db95c" }, "pipfile-spec": 6, "requires": {}, @@ -28,6 +28,13 @@ ], "version": "==3.0.4" }, + "docopt": { + "hashes": [ + "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" + ], + "index": "pypi", + "version": "==0.6.2" + }, "idna": { "hashes": [ "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", diff --git a/README.md b/README.md index 633e751..25f9f73 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,35 @@ -# github-clone -Recursively clone a GitHub repo sub-dir +# GitHub clone +Git clone sub-directories of a GitHub repository (at any reference) without having to clone the entire repository. +Uses the GitHub API to recursively clone the sub-directories tree and files. + +# Rate limit +The GitHub API imposes a [rate limiting](https://developer.github.com/v3/#rate-limiting) of up to 60 requests per hour applies but can be increased to up to 5000 requests per hour using an _OAuth token_ (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). + +# Private repositories +To clone private repositories you need to supply an _OAuth token_ for an account with access to the private repository (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). + +# Installation + + +# Usage +``` +GitHub clone (git.io/ghclone) + +Usage: + ghclone.py [-t | --token=] + ghclone.py (-h | --help) + ghclone.py (-v | --version) + +Examples: + ghclone.py https://github.com/HR/Crypter/tree/master/app + ghclone.py https://github.com/HR/Crypter/tree/dev/app + ghclone.py https://github.com/HR/Crypter/tree/v3.1.0/build + ghclone.py https://github.com/HR/Crypter/tree/cbee54dd720bb8aaa3a2111fcec667ca5f700510/build + ghclone.py https://github.com/HR/Picturesque/tree/master/app/src -t li50d67757gm20556d53f08126215725a698560b + +Options: + -h --help Show this screen. + -v --version Show version. + -t --token= Set a GitHub OAuth token (see https://developer.github.com/v3/#rate-limiting). +``` + diff --git a/clone.py b/ghclone.py similarity index 52% rename from clone.py rename to ghclone.py index a25dad9..38bd25e 100644 --- a/clone.py +++ b/ghclone.py @@ -1,14 +1,40 @@ +""" +GitHub clone (git.io/ghclone) + +Usage: + ghclone.py [-t | --token=] + ghclone.py (-h | --help) + ghclone.py (-v | --version) + +Examples: + ghclone.py https://github.com/HR/Crypter/tree/master/app + ghclone.py https://github.com/HR/Crypter/tree/dev/app + ghclone.py https://github.com/HR/Crypter/tree/v3.1.0/build + ghclone.py https://github.com/HR/Crypter/tree/cbee54dd720bb8aaa3a2111fcec667ca5f700510/build + ghclone.py https://github.com/HR/Picturesque/tree/master/app/src -t li50d67757gm20556d53f08126215725a698560b + +Options: + -h --help Show this screen. + -v --version Show version. + -t --token= Set a GitHub OAuth token (see https://developer.github.com/v3/#rate-limiting). + +(C) 2019 Habib Rehman (git.io/HR) +""" import requests import re import sys import os import errno +from docopt import docopt - +VERSION = '1.0.0' GH_API_BASE_URL = 'https://api.github.com' GH_REPO_CONTENTS_ENDPOINT = GH_API_BASE_URL + '/repos/{}/{}/contents' BASE_NORMALIZE_REGEX = re.compile(r'.*github\.com\/') +verbose = False +req = requests.Session() +req.headers.update({'User-Agent': 'git.io/ghclone '+VERSION}) def exit_with_m(m='An error occured'): print(m) @@ -27,11 +53,11 @@ def clone_file(download_url, file_path): """ Clones the file at the download_url to the file_path """ - r = requests.get(download_url, stream=True) + r = req.get(download_url, stream=True) try: r.raise_for_status() except Exception as e: - exit_with_m('Failed cloneing ' + download_url, e) + exit_with_m('Failed cloning ' + download_url, e) with open(file_path, 'wb') as fd: for chunk in r.iter_content(chunk_size=128): @@ -43,11 +69,11 @@ def clone(base_url, path=None, ref=None): """ req_url = base_url if not path else os.path.join(base_url, path) # Get path metadata - r = requests.get(req_url) if not ref else requests.get(req_url, params={'ref': ref}) + r = req.get(req_url) if not ref else req.get(req_url, params={'ref': ref}) try: r.raise_for_status() except Exception as e: - exit_with_m('Failed fetching metadata of dir: ', e) + exit_with_m('Failed fetching metadata for ' + path, e) repo_data = r.json() # Create path locally @@ -68,20 +94,24 @@ def clone(base_url, path=None, ref=None): ### # Main ### -arg_len = len(sys.argv) -if arg_len >= 2: - # Github URL - gh_url = sys.argv[1] +if __name__ == '__main__': + arguments = docopt(__doc__) + if arguments['--version']: + print(VERSION) + sys.exit(0) + + # Get params + gh_url = arguments[''] + token = arguments['--token'] + if token: + req.headers.update({'Authorization': 'token '+token[0]}) # Normalize & parse input normal_gh_url = re.sub(BASE_NORMALIZE_REGEX, '', gh_url).replace('/tree', '') gh_url_comps = normal_gh_url.split('/') user, repo = gh_url_comps[:2] ref = gh_url_comps[2] path = os.path.join(*gh_url_comps[3:]) -else: - exit_with_m('Nothing to clone :(') - -api_req_url = GH_REPO_CONTENTS_ENDPOINT.format(user, repo) -print("Cloning into '%s'..." % path) -clone(api_req_url, path, ref) -print("done.") \ No newline at end of file + api_req_url = GH_REPO_CONTENTS_ENDPOINT.format(user, repo) + print("Cloning into '%s'..." % path) + clone(api_req_url, path, ref) + print("done.") \ No newline at end of file From 36e857abd1a6f15f9873ce73b93198d8b8fd5865 Mon Sep 17 00:00:00 2001 From: HR Date: Sun, 2 Jun 2019 20:27:43 +0100 Subject: [PATCH 05/18] Make pip compatible --- Pipfile | 10 --------- Pipfile.lock | 62 ---------------------------------------------------- README.md | 2 +- ghclone.py | 22 +++++++++++-------- setup.py | 57 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 71 insertions(+), 82 deletions(-) delete mode 100644 Pipfile delete mode 100644 Pipfile.lock create mode 100644 setup.py diff --git a/Pipfile b/Pipfile deleted file mode 100644 index d184e3b..0000000 --- a/Pipfile +++ /dev/null @@ -1,10 +0,0 @@ -[[source]] -url = "https://pypi.python.org/simple" -verify_ssl = true -name = "pypi" - -[dev-packages] - -[packages] -requests = ">=2.20.0" -docopt = "*" diff --git a/Pipfile.lock b/Pipfile.lock deleted file mode 100644 index 58e777a..0000000 --- a/Pipfile.lock +++ /dev/null @@ -1,62 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "366c8eebaa31892208c95484ad7e21a06dd8c35edb39f4e20a5eff55a91db95c" - }, - "pipfile-spec": 6, - "requires": {}, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.python.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "certifi": { - "hashes": [ - "sha256:59b7658e26ca9c7339e00f8f4636cdfe59d34fa37b9b04f6f9e9926b3cece1a5", - "sha256:b26104d6835d1f5e49452a26eb2ff87fe7090b89dfcaee5ea2212697e1e1d7ae" - ], - "version": "==2019.3.9" - }, - "chardet": { - "hashes": [ - "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", - "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" - ], - "version": "==3.0.4" - }, - "docopt": { - "hashes": [ - "sha256:49b3a825280bd66b3aa83585ef59c4a8c82f2c8a522dbe754a8bc8d08c85c491" - ], - "index": "pypi", - "version": "==0.6.2" - }, - "idna": { - "hashes": [ - "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", - "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" - ], - "version": "==2.8" - }, - "requests": { - "hashes": [ - "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", - "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" - ], - "index": "pypi", - "version": "==2.22.0" - }, - "urllib3": { - "hashes": [ - "sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", - "sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232" - ], - "version": "==1.25.3" - } - }, - "develop": {} -} diff --git a/README.md b/README.md index 25f9f73..e5c7468 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # GitHub clone -Git clone sub-directories of a GitHub repository (at any reference) without having to clone the entire repository. +Git clone any sub-directories of any GitHub repository (at any reference) without having to clone the entire repository. Uses the GitHub API to recursively clone the sub-directories tree and files. # Rate limit diff --git a/ghclone.py b/ghclone.py index 38bd25e..25ab9ce 100644 --- a/ghclone.py +++ b/ghclone.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 """ GitHub clone (git.io/ghclone) @@ -22,23 +24,22 @@ """ import requests import re -import sys import os import errno +from sys import exit from docopt import docopt -VERSION = '1.0.0' +__version__ = '1.0.0' GH_API_BASE_URL = 'https://api.github.com' GH_REPO_CONTENTS_ENDPOINT = GH_API_BASE_URL + '/repos/{}/{}/contents' BASE_NORMALIZE_REGEX = re.compile(r'.*github\.com\/') -verbose = False req = requests.Session() -req.headers.update({'User-Agent': 'git.io/ghclone '+VERSION}) +req.headers.update({'User-Agent': 'git.io/ghclone '+__version__}) def exit_with_m(m='An error occured'): print(m) - sys.exit(1) + exit(1) def mkdir_p(path): try: @@ -94,11 +95,11 @@ def clone(base_url, path=None, ref=None): ### # Main ### -if __name__ == '__main__': +def main(): arguments = docopt(__doc__) if arguments['--version']: - print(VERSION) - sys.exit(0) + print(__version__) + exit(0) # Get params gh_url = arguments[''] @@ -114,4 +115,7 @@ def clone(base_url, path=None, ref=None): api_req_url = GH_REPO_CONTENTS_ENDPOINT.format(user, repo) print("Cloning into '%s'..." % path) clone(api_req_url, path, ref) - print("done.") \ No newline at end of file + print("done.") + +if __name__ == '__main__': + main() diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..bc7a8d8 --- /dev/null +++ b/setup.py @@ -0,0 +1,57 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 -*- + +import codecs +import os +import re +from setuptools import find_packages, setup + + +version_regex = r'__version__ = ["\']([^"\']*)["\']' +with open('ghclone.py',) as f: + text = f.read() + match = re.search(version_regex, text) + + if match: + version = match.group(1) + else: + raise RuntimeError("No version number found!") + + +def local_file(name): + return os.path.relpath(os.path.join(os.path.dirname(__file__), name)) + + +README = local_file('README.md') +long_description = codecs.open(README, encoding='utf-8').read() + + +setup( + name='ghclone', + version=version, + description='A script for cloning any sub-directories of any GitHub repository', + long_description=long_description, + url='https://github.com/HR/github-clone', + author='Habib Rehman', + author_email='h@rehman.email', + license='Apache 2.0', + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Environment :: Console', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: Apache Software License', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + ], + install_requires=[ + 'requests>=2.20.0', + 'docopt>=0.6.2', + ], + entry_points={ + 'console_scripts': [ + 'ghclone=ghclone:main', + ], + }, +) \ No newline at end of file From 82824f5c6d92fcaebfec765e369bc8fe8acbc316 Mon Sep 17 00:00:00 2001 From: HR Date: Sun, 2 Jun 2019 20:36:50 +0100 Subject: [PATCH 06/18] Add installation docs --- README.md | 26 +++++++++++++++++--------- ghclone.py | 16 ++++++++-------- 2 files changed, 25 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index e5c7468..bc7ae5b 100644 --- a/README.md +++ b/README.md @@ -9,23 +9,31 @@ The GitHub API imposes a [rate limiting](https://developer.github.com/v3/#rate-l To clone private repositories you need to supply an _OAuth token_ for an account with access to the private repository (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). # Installation - +Install the script via the `pip`: +``` +pip install -e git+git://github.com/HR/github-clone#egg=ghclone +``` +or via `pipsi`: +``` +pipsi install -e git+git://github.com/HR/github-clone#egg=ghclone +``` +Uses Python 3.3+ # Usage ``` GitHub clone (git.io/ghclone) Usage: - ghclone.py [-t | --token=] - ghclone.py (-h | --help) - ghclone.py (-v | --version) + ghclone [-t | --token=] + ghclone (-h | --help) + ghclone (-v | --version) Examples: - ghclone.py https://github.com/HR/Crypter/tree/master/app - ghclone.py https://github.com/HR/Crypter/tree/dev/app - ghclone.py https://github.com/HR/Crypter/tree/v3.1.0/build - ghclone.py https://github.com/HR/Crypter/tree/cbee54dd720bb8aaa3a2111fcec667ca5f700510/build - ghclone.py https://github.com/HR/Picturesque/tree/master/app/src -t li50d67757gm20556d53f08126215725a698560b + ghclone https://github.com/HR/Crypter/tree/master/app + ghclone https://github.com/HR/Crypter/tree/dev/app + ghclone https://github.com/HR/Crypter/tree/v3.1.0/build + ghclone https://github.com/HR/Crypter/tree/cbee54dd720bb8aaa3a2111fcec667ca5f700510/build + ghclone https://github.com/HR/Picturesque/tree/master/app/src -t li50d67757gm20556d53f08126215725a698560b Options: -h --help Show this screen. diff --git a/ghclone.py b/ghclone.py index 25ab9ce..7e924c0 100644 --- a/ghclone.py +++ b/ghclone.py @@ -4,16 +4,16 @@ GitHub clone (git.io/ghclone) Usage: - ghclone.py [-t | --token=] - ghclone.py (-h | --help) - ghclone.py (-v | --version) + ghclone [-t | --token=] + ghclone (-h | --help) + ghclone (-v | --version) Examples: - ghclone.py https://github.com/HR/Crypter/tree/master/app - ghclone.py https://github.com/HR/Crypter/tree/dev/app - ghclone.py https://github.com/HR/Crypter/tree/v3.1.0/build - ghclone.py https://github.com/HR/Crypter/tree/cbee54dd720bb8aaa3a2111fcec667ca5f700510/build - ghclone.py https://github.com/HR/Picturesque/tree/master/app/src -t li50d67757gm20556d53f08126215725a698560b + ghclone https://github.com/HR/Crypter/tree/master/app + ghclone https://github.com/HR/Crypter/tree/dev/app + ghclone https://github.com/HR/Crypter/tree/v3.1.0/build + ghclone https://github.com/HR/Crypter/tree/cbee54dd720bb8aaa3a2111fcec667ca5f700510/build + ghclone https://github.com/HR/Picturesque/tree/master/app/src -t li50d67757gm20556d53f08126215725a698560b Options: -h --help Show this screen. From 0e0b9baacfca05b7c3dbd65f92d48afa868fe38c Mon Sep 17 00:00:00 2001 From: HR Date: Sun, 2 Jun 2019 20:37:52 +0100 Subject: [PATCH 07/18] cleanup styles --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index bc7ae5b..e2b8f5e 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,10 @@ Git clone any sub-directories of any GitHub repository (at any reference) without having to clone the entire repository. Uses the GitHub API to recursively clone the sub-directories tree and files. -# Rate limit +## Rate limit The GitHub API imposes a [rate limiting](https://developer.github.com/v3/#rate-limiting) of up to 60 requests per hour applies but can be increased to up to 5000 requests per hour using an _OAuth token_ (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). -# Private repositories +## Private repositories To clone private repositories you need to supply an _OAuth token_ for an account with access to the private repository (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). # Installation From 567197c481d0efeece8c6d84bb853db0766f45c4 Mon Sep 17 00:00:00 2001 From: HR Date: Tue, 3 Sep 2019 13:03:04 +0100 Subject: [PATCH 08/18] implement entire repo cloning --- ghclone.py | 71 +++++++++++++++++++++++++++++++++++------------------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/ghclone.py b/ghclone.py index 7e924c0..c11414a 100644 --- a/ghclone.py +++ b/ghclone.py @@ -29,18 +29,21 @@ from sys import exit from docopt import docopt -__version__ = '1.0.0' +__version__ = '1.1.0' GH_API_BASE_URL = 'https://api.github.com' +GH_REPO_ENDPOINT = GH_API_BASE_URL + '/repos/{}/{}' GH_REPO_CONTENTS_ENDPOINT = GH_API_BASE_URL + '/repos/{}/{}/contents' BASE_NORMALIZE_REGEX = re.compile(r'.*github\.com\/') req = requests.Session() -req.headers.update({'User-Agent': 'git.io/ghclone '+__version__}) +req.headers.update({'User-Agent': 'git.io/ghclone ' + __version__}) + def exit_with_m(m='An error occured'): print(m) exit(1) + def mkdir_p(path): try: os.makedirs(path) @@ -50,6 +53,7 @@ def mkdir_p(path): else: raise + def clone_file(download_url, file_path): """ Clones the file at the download_url to the file_path @@ -58,38 +62,41 @@ def clone_file(download_url, file_path): try: r.raise_for_status() except Exception as e: - exit_with_m('Failed cloning ' + download_url, e) + exit_with_m('Failed to clone ' + download_url, e) with open(file_path, 'wb') as fd: for chunk in r.iter_content(chunk_size=128): fd.write(chunk) + def clone(base_url, path=None, ref=None): """ Recursively clones the path """ - req_url = base_url if not path else os.path.join(base_url, path) + if path: + req_url = os.path.join(base_url, path) + # Create path locally + mkdir_p(path) + else: + req_url = base_url + # Get path metadata r = req.get(req_url) if not ref else req.get(req_url, params={'ref': ref}) try: r.raise_for_status() except Exception as e: - exit_with_m('Failed fetching metadata for ' + path, e) + exit_with_m('Failed to fetch metadata for ' + path, e) repo_data = r.json() - # Create path locally - mkdir_p(path) - - if isinstance(repo_data, list): - # Recursively clone content - for item in repo_data: - if item['type'] == 'dir': - # Fetch dir recursively - clone(base_url, item['path'], ref) - else: - # Fetch the file - clone_file(item['download_url'], item['path']) - print('Cloned', item['path']) + # Recursively clone content + for item in repo_data: + if item['type'] == 'dir': + # Fetch dir recursively + clone(base_url, item['path'], ref) + else: + # Fetch the file + clone_file(item['download_url'], item['path']) + # print('Cloned', item['path']) ### @@ -105,17 +112,31 @@ def main(): gh_url = arguments[''] token = arguments['--token'] if token: - req.headers.update({'Authorization': 'token '+token[0]}) + req.headers.update({'Authorization': 'token ' + token[0]}) # Normalize & parse input - normal_gh_url = re.sub(BASE_NORMALIZE_REGEX, '', gh_url).replace('/tree', '') - gh_url_comps = normal_gh_url.split('/') - user, repo = gh_url_comps[:2] - ref = gh_url_comps[2] - path = os.path.join(*gh_url_comps[3:]) + normal_gh_url = re.sub(BASE_NORMALIZE_REGEX, '', + gh_url).replace('/tree', '') + gh_args = normal_gh_url.split('/') + user, repo = gh_args[:2] + + if len(gh_args) > 2: + # Clone subdirectory + ref = gh_args[2] + path = os.path.join(*gh_args[3:]) + print("Cloning into '%s'..." % path) + else: + # Clone entire repo + mkdir_p(repo) + os.chdir(repo) + ref = None + path = None + print("Cloning into '%s'..." % repo) + api_req_url = GH_REPO_CONTENTS_ENDPOINT.format(user, repo) - print("Cloning into '%s'..." % path) + clone(api_req_url, path, ref) print("done.") + if __name__ == '__main__': main() From cb15d94263051327eb89dcc0b06b2c8b695173e2 Mon Sep 17 00:00:00 2001 From: HR Date: Tue, 3 Sep 2019 13:52:57 +0100 Subject: [PATCH 09/18] Fix cloning of the entire directory tree --- ghclone.py | 40 +++++++++++++++++++++++----------------- 1 file changed, 23 insertions(+), 17 deletions(-) diff --git a/ghclone.py b/ghclone.py index c11414a..48f9054 100644 --- a/ghclone.py +++ b/ghclone.py @@ -31,7 +31,6 @@ __version__ = '1.1.0' GH_API_BASE_URL = 'https://api.github.com' -GH_REPO_ENDPOINT = GH_API_BASE_URL + '/repos/{}/{}' GH_REPO_CONTENTS_ENDPOINT = GH_API_BASE_URL + '/repos/{}/{}/contents' BASE_NORMALIZE_REGEX = re.compile(r'.*github\.com\/') @@ -69,16 +68,11 @@ def clone_file(download_url, file_path): fd.write(chunk) -def clone(base_url, path=None, ref=None): +def clone(base_url, rel_url=None, path=None, ref=None): """ Recursively clones the path """ - if path: - req_url = os.path.join(base_url, path) - # Create path locally - mkdir_p(path) - else: - req_url = base_url + req_url = os.path.join(base_url, rel_url) if rel_url else base_url # Get path metadata r = req.get(req_url) if not ref else req.get(req_url, params={'ref': ref}) @@ -92,13 +86,26 @@ def clone(base_url, path=None, ref=None): for item in repo_data: if item['type'] == 'dir': # Fetch dir recursively - clone(base_url, item['path'], ref) + clone(base_url, item['path'], path, ref) else: # Fetch the file - clone_file(item['download_url'], item['path']) + new_file_path = resolve_path(item['path'], path) + new_path = os.path.dirname(new_file_path) + # Create path locally + mkdir_p(new_path) + # Download the file + clone_file(item['download_url'], new_file_path) # print('Cloned', item['path']) +def resolve_path(path, dir): + index = path.find(dir) + if index is -1: + return os.path.join(dir, path) + else: + return path[index:] + + ### # Main ### @@ -122,19 +129,18 @@ def main(): if len(gh_args) > 2: # Clone subdirectory ref = gh_args[2] - path = os.path.join(*gh_args[3:]) - print("Cloning into '%s'..." % path) + rel_url = os.path.join(*gh_args[3:]) + path = gh_args[-1] else: # Clone entire repo - mkdir_p(repo) - os.chdir(repo) ref = None - path = None - print("Cloning into '%s'..." % repo) + rel_url = None + path = repo api_req_url = GH_REPO_CONTENTS_ENDPOINT.format(user, repo) - clone(api_req_url, path, ref) + print("Cloning into '%s'..." % path) + clone(api_req_url, rel_url, path, ref) print("done.") From 9d751dbb77ed715db2fcfb669c6fed598dd35f31 Mon Sep 17 00:00:00 2001 From: HR Date: Tue, 3 Sep 2019 15:55:15 +0100 Subject: [PATCH 10/18] cleanup --- ghclone/__init__.py | 0 ghclone/__main__.py | 4 ++++ ghclone.py => ghclone/ghclone.py | 0 setup.py | 5 +++-- 4 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 ghclone/__init__.py create mode 100644 ghclone/__main__.py rename ghclone.py => ghclone/ghclone.py (100%) diff --git a/ghclone/__init__.py b/ghclone/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/ghclone/__main__.py b/ghclone/__main__.py new file mode 100644 index 0000000..d0b02f2 --- /dev/null +++ b/ghclone/__main__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- + +from .ghclone import main +main() diff --git a/ghclone.py b/ghclone/ghclone.py similarity index 100% rename from ghclone.py rename to ghclone/ghclone.py diff --git a/setup.py b/setup.py index bc7a8d8..1803e9a 100644 --- a/setup.py +++ b/setup.py @@ -8,7 +8,7 @@ version_regex = r'__version__ = ["\']([^"\']*)["\']' -with open('ghclone.py',) as f: +with open('ghclone/ghclone.py',) as f: text = f.read() match = re.search(version_regex, text) @@ -28,6 +28,7 @@ def local_file(name): setup( name='ghclone', + packages=['ghclone'], version=version, description='A script for cloning any sub-directories of any GitHub repository', long_description=long_description, @@ -51,7 +52,7 @@ def local_file(name): ], entry_points={ 'console_scripts': [ - 'ghclone=ghclone:main', + 'ghclone=ghclone.ghclone:main', ], }, ) \ No newline at end of file From 071bb995575bf94c0a707383ef2a3c0adb1ab888 Mon Sep 17 00:00:00 2001 From: HR Date: Tue, 3 Sep 2019 15:59:10 +0100 Subject: [PATCH 11/18] make ready for release --- README.md | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index e2b8f5e..36611ec 100644 --- a/README.md +++ b/README.md @@ -11,11 +11,11 @@ To clone private repositories you need to supply an _OAuth token_ for an account # Installation Install the script via the `pip`: ``` -pip install -e git+git://github.com/HR/github-clone#egg=ghclone +pip install git+git://github.com/HR/github-clone#egg=ghclone ``` or via `pipsi`: ``` -pipsi install -e git+git://github.com/HR/github-clone#egg=ghclone +pipsi install git+git://github.com/HR/github-clone#egg=ghclone ``` Uses Python 3.3+ @@ -40,4 +40,18 @@ Options: -v --version Show version. -t --token= Set a GitHub OAuth token (see https://developer.github.com/v3/#rate-limiting). ``` +# License +Copyright (C) 2019 Habib Rehman (https://git.io/HR) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. From d02f14abe1a15d040ed3a3daf38afe2e41ce1486 Mon Sep 17 00:00:00 2001 From: HR Date: Tue, 10 Sep 2019 21:06:33 +0100 Subject: [PATCH 12/18] Make compatible with windows; fix #3 --- ghclone/ghclone.py | 39 +++++++++++++++++++++++---------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/ghclone/ghclone.py b/ghclone/ghclone.py index 48f9054..49bcadc 100644 --- a/ghclone/ghclone.py +++ b/ghclone/ghclone.py @@ -29,7 +29,7 @@ from sys import exit from docopt import docopt -__version__ = '1.1.0' +__version__ = '1.2.0' GH_API_BASE_URL = 'https://api.github.com' GH_REPO_CONTENTS_ENDPOINT = GH_API_BASE_URL + '/repos/{}/{}/contents' BASE_NORMALIZE_REGEX = re.compile(r'.*github\.com\/') @@ -61,7 +61,7 @@ def clone_file(download_url, file_path): try: r.raise_for_status() except Exception as e: - exit_with_m('Failed to clone ' + download_url, e) + exit_with_m('Failed to clone ' + download_url) with open(file_path, 'wb') as fd: for chunk in r.iter_content(chunk_size=128): @@ -72,14 +72,14 @@ def clone(base_url, rel_url=None, path=None, ref=None): """ Recursively clones the path """ - req_url = os.path.join(base_url, rel_url) if rel_url else base_url + req_url = base_url + '/' + rel_url if rel_url else base_url # Get path metadata r = req.get(req_url) if not ref else req.get(req_url, params={'ref': ref}) try: r.raise_for_status() except Exception as e: - exit_with_m('Failed to fetch metadata for ' + path, e) + exit_with_m('Failed to fetch metadata for ' + path) repo_data = r.json() # Recursively clone content @@ -101,9 +101,9 @@ def clone(base_url, rel_url=None, path=None, ref=None): def resolve_path(path, dir): index = path.find(dir) if index is -1: - return os.path.join(dir, path) + return os.path.abspath(os.path.join(dir, path)) else: - return path[index:] + return os.path.abspath(path[index:]) ### @@ -121,21 +121,28 @@ def main(): if token: req.headers.update({'Authorization': 'token ' + token[0]}) # Normalize & parse input - normal_gh_url = re.sub(BASE_NORMALIZE_REGEX, '', - gh_url).replace('/tree', '') - gh_args = normal_gh_url.split('/') + normal_gh_url = re.sub(BASE_NORMALIZE_REGEX, '', gh_url) + gh_args = normal_gh_url.replace('/tree', '').split('/') + + if len(gh_args) < 2 or normal_gh_url == gh_url: + exit_with_m('Invalid GitHub URI') + user, repo = gh_args[:2] + ref = None + rel_url = None - if len(gh_args) > 2: - # Clone subdirectory + if len(gh_args) >= 2: + # Clone entire repo + path = repo + + if len(gh_args) >= 3: + # Clone entire repo at the branch ref = gh_args[2] + + if len(gh_args) >= 4: + # Clone subdirectory rel_url = os.path.join(*gh_args[3:]) path = gh_args[-1] - else: - # Clone entire repo - ref = None - rel_url = None - path = repo api_req_url = GH_REPO_CONTENTS_ENDPOINT.format(user, repo) From 042fb856f85f1a834f103897698c47e063d445c0 Mon Sep 17 00:00:00 2001 From: Habib Rehman Date: Mon, 2 Dec 2019 14:35:04 +0000 Subject: [PATCH 13/18] Clarify rate limiting --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 36611ec..5410549 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,9 @@ Git clone any sub-directories of any GitHub repository (at any reference) withou Uses the GitHub API to recursively clone the sub-directories tree and files. ## Rate limit -The GitHub API imposes a [rate limiting](https://developer.github.com/v3/#rate-limiting) of up to 60 requests per hour applies but can be increased to up to 5000 requests per hour using an _OAuth token_ (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). +The GitHub API imposes a [rate limiting](https://developer.github.com/v3/#rate-limiting) of up to 60 requests per hour applies but can be increased to up to 5000 requests per hour using an _OAuth token_ (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). + +GitHub clone makes an initial request to fetch repo metadata and then, a request for every subfolder in the repo. The request to download the files within the folders are not counted against the rate limit so in most cases (i.e. the folder/repo you're trying to clone has less than 60 subfolders) the rate limit should not be a problem. ## Private repositories To clone private repositories you need to supply an _OAuth token_ for an account with access to the private repository (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). From f79ed9e70c0bc3647c31cbb569b89b4848f802a2 Mon Sep 17 00:00:00 2001 From: Habib Rehman Date: Mon, 2 Dec 2019 14:35:53 +0000 Subject: [PATCH 14/18] Update README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5410549..75f5bb1 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ Uses the GitHub API to recursively clone the sub-directories tree and files. ## Rate limit The GitHub API imposes a [rate limiting](https://developer.github.com/v3/#rate-limiting) of up to 60 requests per hour applies but can be increased to up to 5000 requests per hour using an _OAuth token_ (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). -GitHub clone makes an initial request to fetch repo metadata and then, a request for every subfolder in the repo. The request to download the files within the folders are not counted against the rate limit so in most cases (i.e. the folder/repo you're trying to clone has less than 60 subfolders) the rate limit should not be a problem. +GitHub clone makes an initial request to fetch repo metadata and then, a request for every subfolder in the repo. The requests to download the files within the folders are not counted against the rate limit so in most cases (i.e. the folder/repo you're trying to clone has less than 60 subfolders) the rate limit should not be a problem. ## Private repositories To clone private repositories you need to supply an _OAuth token_ for an account with access to the private repository (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). From 0eb162bba9eca734d4d8158f18b40feef0d7ad17 Mon Sep 17 00:00:00 2001 From: Habib Rehman Date: Sat, 27 Feb 2021 16:43:27 +0000 Subject: [PATCH 15/18] Update README.md --- README.md | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 75f5bb1..bf05959 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,11 @@ # GitHub clone -Git clone any sub-directories of any GitHub repository (at any reference) without having to clone the entire repository. +Git clone (download) any sub-directories of any GitHub repository (at any reference) without having to clone the entire repository, with only its GitHub URL. Uses the GitHub API to recursively clone the sub-directories tree and files. +## Motivation + +I often find myself wanting to only download a certain directory, path or package of an especially big repo that I'm currently viewing (without even cloning the entire repo at depth 1) and to do so by simply copy & pasting the GitHub URL so that's why. Probably more instances where this might come in handy ;) + ## Rate limit The GitHub API imposes a [rate limiting](https://developer.github.com/v3/#rate-limiting) of up to 60 requests per hour applies but can be increased to up to 5000 requests per hour using an _OAuth token_ (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). @@ -43,7 +47,7 @@ Options: -t --token= Set a GitHub OAuth token (see https://developer.github.com/v3/#rate-limiting). ``` # License -Copyright (C) 2019 Habib Rehman (https://git.io/HR) +Copyright (C) 2019-2021 Habib Rehman (https://git.io/HR) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. From 2fa36c0ff562124f107e4c630c47a7d244e88967 Mon Sep 17 00:00:00 2001 From: HR Date: Sun, 4 Jul 2021 19:19:23 +0100 Subject: [PATCH 16/18] Release v1.2.0 on PyPi --- LICENSE | 2 +- README.md | 4 +- ghclone/__init__.py | 151 ++++++++++++++++++++++++++++++++++++++++++ ghclone/__main__.py | 6 +- ghclone/ghclone.py | 155 -------------------------------------------- pyproject.toml | 6 ++ setup.py | 33 +++++----- 7 files changed, 179 insertions(+), 178 deletions(-) delete mode 100644 ghclone/ghclone.py create mode 100644 pyproject.toml diff --git a/LICENSE b/LICENSE index 8dada3e..dbf8f6c 100644 --- a/LICENSE +++ b/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright {yyyy} {name of copyright owner} + Copyright 2019-2021 Habib Rehman (https://git.io/HR) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index bf05959..9a223cb 100644 --- a/README.md +++ b/README.md @@ -17,11 +17,11 @@ To clone private repositories you need to supply an _OAuth token_ for an account # Installation Install the script via the `pip`: ``` -pip install git+git://github.com/HR/github-clone#egg=ghclone +pip install github-clone ``` or via `pipsi`: ``` -pipsi install git+git://github.com/HR/github-clone#egg=ghclone +pipsi install github-clone ``` Uses Python 3.3+ diff --git a/ghclone/__init__.py b/ghclone/__init__.py index e69de29..fb4b168 100644 --- a/ghclone/__init__.py +++ b/ghclone/__init__.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +# -*- encoding: utf-8 +""" +GitHub clone (git.io/ghclone) + +Usage: + ghclone [-t | --token=] + ghclone (-h | --help) + ghclone (-v | --version) + +Examples: + ghclone https://github.com/HR/Crypter/tree/master/app + ghclone https://github.com/HR/Crypter/tree/dev/app + ghclone https://github.com/HR/Crypter/tree/v3.1.0/build + ghclone https://github.com/HR/Crypter/tree/cbee54dd720bb8aaa3a2111fcec667ca5f700510/build + ghclone https://github.com/HR/Picturesque/tree/master/app/src -t li50d67757gm20556d53f08126215725a698560b + +Options: + -h --help Show this screen. + -v --version Show version. + -t --token= Set a GitHub OAuth token (see https://developer.github.com/v3/#rate-limiting). + +(C) 2019-2021 Habib Rehman (git.io/HR) +""" +import requests +import re +import os +import errno +from sys import exit +from docopt import docopt + +__version__ = '1.2.0' +GH_API_BASE_URL = 'https://api.github.com' +GH_REPO_CONTENTS_ENDPOINT = GH_API_BASE_URL + '/repos/{}/{}/contents' +BASE_NORMALIZE_REGEX = re.compile(r'.*github\.com\/') + +req = requests.Session() +req.headers.update({'User-Agent': 'git.io/ghclone ' + __version__}) + + +def exit_with_m(m='An error occured'): + print(m) + exit(1) + + +def mkdir_p(path): + try: + os.makedirs(path) + except OSError as err: # Python >2.5 + if err.errno == errno.EEXIST and os.path.isdir(path): + pass + else: + raise + + +def clone_file(download_url, file_path): + """ + Clones the file at the download_url to the file_path + """ + r = req.get(download_url, stream=True) + try: + r.raise_for_status() + except Exception as e: + exit_with_m('Failed to clone ' + download_url) + + with open(file_path, 'wb') as fd: + for chunk in r.iter_content(chunk_size=128): + fd.write(chunk) + + +def clone(base_url, rel_url=None, path=None, ref=None): + """ + Recursively clones the path + """ + req_url = base_url + '/' + rel_url if rel_url else base_url + + # Get path metadata + r = req.get(req_url) if not ref else req.get(req_url, params={'ref': ref}) + try: + r.raise_for_status() + except Exception as e: + exit_with_m('Failed to fetch metadata for ' + path) + repo_data = r.json() + + # Recursively clone content + for item in repo_data: + if item['type'] == 'dir': + # Fetch dir recursively + clone(base_url, item['path'], path, ref) + else: + # Fetch the file + new_file_path = resolve_path(item['path'], path) + new_path = os.path.dirname(new_file_path) + # Create path locally + mkdir_p(new_path) + # Download the file + clone_file(item['download_url'], new_file_path) + # print('Cloned', item['path']) + + +def resolve_path(path, dir): + index = path.find(dir) + if index is -1: + return os.path.abspath(os.path.join(dir, path)) + else: + return os.path.abspath(path[index:]) + + +### +# Main +### +def main(): + arguments = docopt(__doc__) + if arguments['--version']: + print(__version__) + exit(0) + + # Get params + gh_url = arguments[''] + token = arguments['--token'] + if token: + req.headers.update({'Authorization': 'token ' + token[0]}) + # Normalize & parse input + normal_gh_url = re.sub(BASE_NORMALIZE_REGEX, '', gh_url) + gh_args = normal_gh_url.replace('/tree', '').split('/') + + if len(gh_args) < 2 or normal_gh_url == gh_url: + exit_with_m('Invalid GitHub URI') + + user, repo = gh_args[:2] + ref = None + rel_url = None + + if len(gh_args) >= 2: + # Clone entire repo + path = repo + + if len(gh_args) >= 3: + # Clone entire repo at the branch + ref = gh_args[2] + + if len(gh_args) >= 4: + # Clone subdirectory + rel_url = os.path.join(*gh_args[3:]) + path = gh_args[-1] + + api_req_url = GH_REPO_CONTENTS_ENDPOINT.format(user, repo) + + print("Cloning into '%s'..." % path) + clone(api_req_url, rel_url, path, ref) + print("done.") diff --git a/ghclone/__main__.py b/ghclone/__main__.py index d0b02f2..1ddec88 100644 --- a/ghclone/__main__.py +++ b/ghclone/__main__.py @@ -1,4 +1,6 @@ # -*- coding: utf-8 -*- -from .ghclone import main -main() +from . import main + +if __name__ == '__main__': + main() diff --git a/ghclone/ghclone.py b/ghclone/ghclone.py deleted file mode 100644 index 49bcadc..0000000 --- a/ghclone/ghclone.py +++ /dev/null @@ -1,155 +0,0 @@ -#!/usr/bin/env python -# -*- encoding: utf-8 -""" -GitHub clone (git.io/ghclone) - -Usage: - ghclone [-t | --token=] - ghclone (-h | --help) - ghclone (-v | --version) - -Examples: - ghclone https://github.com/HR/Crypter/tree/master/app - ghclone https://github.com/HR/Crypter/tree/dev/app - ghclone https://github.com/HR/Crypter/tree/v3.1.0/build - ghclone https://github.com/HR/Crypter/tree/cbee54dd720bb8aaa3a2111fcec667ca5f700510/build - ghclone https://github.com/HR/Picturesque/tree/master/app/src -t li50d67757gm20556d53f08126215725a698560b - -Options: - -h --help Show this screen. - -v --version Show version. - -t --token= Set a GitHub OAuth token (see https://developer.github.com/v3/#rate-limiting). - -(C) 2019 Habib Rehman (git.io/HR) -""" -import requests -import re -import os -import errno -from sys import exit -from docopt import docopt - -__version__ = '1.2.0' -GH_API_BASE_URL = 'https://api.github.com' -GH_REPO_CONTENTS_ENDPOINT = GH_API_BASE_URL + '/repos/{}/{}/contents' -BASE_NORMALIZE_REGEX = re.compile(r'.*github\.com\/') - -req = requests.Session() -req.headers.update({'User-Agent': 'git.io/ghclone ' + __version__}) - - -def exit_with_m(m='An error occured'): - print(m) - exit(1) - - -def mkdir_p(path): - try: - os.makedirs(path) - except OSError as err: # Python >2.5 - if err.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise - - -def clone_file(download_url, file_path): - """ - Clones the file at the download_url to the file_path - """ - r = req.get(download_url, stream=True) - try: - r.raise_for_status() - except Exception as e: - exit_with_m('Failed to clone ' + download_url) - - with open(file_path, 'wb') as fd: - for chunk in r.iter_content(chunk_size=128): - fd.write(chunk) - - -def clone(base_url, rel_url=None, path=None, ref=None): - """ - Recursively clones the path - """ - req_url = base_url + '/' + rel_url if rel_url else base_url - - # Get path metadata - r = req.get(req_url) if not ref else req.get(req_url, params={'ref': ref}) - try: - r.raise_for_status() - except Exception as e: - exit_with_m('Failed to fetch metadata for ' + path) - repo_data = r.json() - - # Recursively clone content - for item in repo_data: - if item['type'] == 'dir': - # Fetch dir recursively - clone(base_url, item['path'], path, ref) - else: - # Fetch the file - new_file_path = resolve_path(item['path'], path) - new_path = os.path.dirname(new_file_path) - # Create path locally - mkdir_p(new_path) - # Download the file - clone_file(item['download_url'], new_file_path) - # print('Cloned', item['path']) - - -def resolve_path(path, dir): - index = path.find(dir) - if index is -1: - return os.path.abspath(os.path.join(dir, path)) - else: - return os.path.abspath(path[index:]) - - -### -# Main -### -def main(): - arguments = docopt(__doc__) - if arguments['--version']: - print(__version__) - exit(0) - - # Get params - gh_url = arguments[''] - token = arguments['--token'] - if token: - req.headers.update({'Authorization': 'token ' + token[0]}) - # Normalize & parse input - normal_gh_url = re.sub(BASE_NORMALIZE_REGEX, '', gh_url) - gh_args = normal_gh_url.replace('/tree', '').split('/') - - if len(gh_args) < 2 or normal_gh_url == gh_url: - exit_with_m('Invalid GitHub URI') - - user, repo = gh_args[:2] - ref = None - rel_url = None - - if len(gh_args) >= 2: - # Clone entire repo - path = repo - - if len(gh_args) >= 3: - # Clone entire repo at the branch - ref = gh_args[2] - - if len(gh_args) >= 4: - # Clone subdirectory - rel_url = os.path.join(*gh_args[3:]) - path = gh_args[-1] - - api_req_url = GH_REPO_CONTENTS_ENDPOINT.format(user, repo) - - print("Cloning into '%s'..." % path) - clone(api_req_url, rel_url, path, ref) - print("done.") - - -if __name__ == '__main__': - main() diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..b5a3c46 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,6 @@ +[build-system] +requires = [ + "setuptools>=42", + "wheel" +] +build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/setup.py b/setup.py index 1803e9a..2a5131e 100644 --- a/setup.py +++ b/setup.py @@ -1,58 +1,55 @@ #!/usr/bin/env python # -*- encoding: utf-8 -*- -import codecs -import os import re from setuptools import find_packages, setup version_regex = r'__version__ = ["\']([^"\']*)["\']' -with open('ghclone/ghclone.py',) as f: +with open('ghclone/__init__.py',) as f: text = f.read() match = re.search(version_regex, text) if match: version = match.group(1) else: - raise RuntimeError("No version number found!") + raise RuntimeError('No version number found!') -def local_file(name): - return os.path.relpath(os.path.join(os.path.dirname(__file__), name)) - - -README = local_file('README.md') -long_description = codecs.open(README, encoding='utf-8').read() +with open('README.md', 'r', encoding='utf-8') as fh: + long_description = fh.read() setup( - name='ghclone', - packages=['ghclone'], + name='github-clone', + packages=find_packages(), version=version, - description='A script for cloning any sub-directories of any GitHub repository', + description='Clone any subdirectory of a GitHub repo with just the GitHub URL', long_description=long_description, + long_description_content_type='text/markdown', url='https://github.com/HR/github-clone', author='Habib Rehman', - author_email='h@rehman.email', + author_email='Hi@HabibRehman.com', license='Apache 2.0', + project_urls={ + "Bug Tracker": "https://github.com/HR/github-clone/issues", + }, classifiers=[ 'Development Status :: 5 - Production/Stable', 'Environment :: Console', 'Intended Audience :: Developers', 'License :: OSI Approved :: Apache Software License', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', + 'Operating System :: OS Independent' ], + python_requires='>=3.6', install_requires=[ 'requests>=2.20.0', 'docopt>=0.6.2', ], entry_points={ 'console_scripts': [ - 'ghclone=ghclone.ghclone:main', + 'ghclone=ghclone:main', ], }, ) \ No newline at end of file From 821cdc9f4c1b2183757d912439a458aa4047840b Mon Sep 17 00:00:00 2001 From: Habib Rehman Date: Sun, 4 Jul 2021 19:57:17 +0100 Subject: [PATCH 17/18] Update README.md --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 9a223cb..a023903 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,7 @@ # GitHub clone + +[![PyPi Downloads](https://img.shields.io/pypi/dm/github-clone.svg?style=for-the-badge&logo=pypi)](https://pypi.org/project/github-clone/) + Git clone (download) any sub-directories of any GitHub repository (at any reference) without having to clone the entire repository, with only its GitHub URL. Uses the GitHub API to recursively clone the sub-directories tree and files. From 398cd718b2d1b9ab41ca8c0adbab12b88188e2e4 Mon Sep 17 00:00:00 2001 From: Habib Rehman Date: Sun, 4 Jul 2021 19:58:56 +0100 Subject: [PATCH 18/18] Update README.md --- README.md | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index a023903..8206461 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,10 @@ GitHub clone makes an initial request to fetch repo metadata and then, a request To clone private repositories you need to supply an _OAuth token_ for an account with access to the private repository (to get one see https://help.github.com/en/articles/creating-a-personal-access-token-for-the-command-line). # Installation -Install the script via the `pip`: + +Available on PyPi https://pypi.org/project/github-clone/. + +Install the script via `pip`: ``` pip install github-clone ```