# Purpose: # This script downloads files needed for (lang_en) translation using thcrap # so that they are accessible to thcrap without thcrap itself having to download them (which it cannot do on my computer, for unknown reasons). # One benefit of this script compared to thcrap is that it's cross platform and relatively simple. Modify and distribute this script as you please, no credit needed. # Use at your own risk: this script downloads files from a remote location, and uses data from the same server to specify where the files are saved locally on your computer. It can potentially be exploited to write arbitrary files anywhere on your file system. The intended behaviour is to only populate the directory ./repos/ with files, but this behaviour is not enforced at the moment!! # Usage: # > python3 download_files.py # It will take a long time to download. # It may look like it freezes on some files (likely because the server is reluctant), but my experience is that it should resume as long as you leave it be. # Requirements: # urllib3, shutil, json, pathlib, zipfile # six==1.15.0 or above is an undocumented requirement for urllib3. I had to install this manually on Ubuntu 20.04.2 LTS. # sudo pip3 install six==1.15.0 # sudo pip3 install urllib3 # (On Ubuntu, pip3 insists on you specifying some keyring to install packages with pip3. Disable this with: # > python -m keyring --disable # see: https://pypi.org/project/keyring/) # TODO: figure out what the hashes(?) are in files.js so I can check if the local files are up to date # Credit: # Thanks to Window Dump for telling me essentially everything that's gone into writing this script. # Potentially useful links: # https://github.com/thpatch/thcrap-tsa # https://urllib3.readthedocs.io/en/latest/ # https://docs.python.org/3/library/pathlib.html # https://docs.python.org/3/library/zipfile.html # https://docs.python.org/3/library/shutil.html # https://www.thpatch.net # https://gist.github.com/WindowDump/76ee68f05b539c970e079d3ff52718d0 import urllib3 # get files from the web import shutil # write a file downloaded with urllib3 to an actual file. Move a dir. import json # parse files.js for a list of files to download from pathlib import Path # easily create hierarchy of dirs import zipfile # to unzip a repo from Window Dump's github language_patch = "lang_en" local_dir = "repos/thpatch/" + language_patch + "/" # create local_dir if it doesn't exist if not Path(local_dir).exists(): Path(local_dir).mkdir(parents=True) # fake some user agent, in case the server hates robot. user_agent = {'user-agent': 'Mozilla/5.0 (Windows NT 6.3; rv:36.0) ..'} http = urllib3.PoolManager(headers=user_agent) ############### # Step 1 (downloading requirements) # download https://github.com/thpatch/thcrap-tsa/archive/refs/heads/master.zip # many patches (including lang_en) depend on these files. # extract the files from the .zip to repos/nmlgc ################ # download and write to local file r = http.request('GET', "https://github.com/thpatch/thcrap-tsa/archive/refs/heads/master.zip", preload_content=False) with open("repos/nmlgc.zip", 'wb') as obj: shutil.copyfileobj(r, obj) # extract and move files to right directory with zipfile.ZipFile("repos/nmlgc.zip", 'r') as zip_ref: zip_ref.extractall("repos") shutil.move("repos/thcrap-tsa-master","repos/nmlgc") ############### # Step 2 (bootstrapping) # Initial file(s) to download. # 'files.js' is used to generate a list of files to download during the next step. # '../repo.js' is convenient to download now since it is not a part of lang_en and thus not specified in 'files.js'. ############### # remote files files_to_get = ["files.js", "../repo.js"] file_dict_url_base = "https://srv.thpatch.net/" + language_patch + "/" file_dict_urls = [file_dict_url_base + f for f in files_to_get] # corresponding local filenames local_files = [local_dir + f for f in files_to_get] for file_dict_url,local_file in zip(file_dict_urls,local_files): print("Downloading '" + file_dict_url +"' to '" + local_file +"'...") # download and write json file r = http.request('GET', file_dict_url, retries=10,timeout=urllib3.Timeout(connect=1.0, read=2.0)) content = json.loads(r.data.decode('utf-8')) with open(local_file, 'w') as f: json.dump(content,f) # get the files to download from files.js # TODO: get hashes as well? if file_dict_url.rsplit("/",1)[-1] == "files.js": keys = content.keys() # files to download things_to_download = [file_dict_url_base + k for k in keys] # corresponding local filenames local_filenames = [local_dir + k for k in keys] ############### # Step 3 (downloading the bulk of the files) # Download the files specified by files.js # See the TODO at the top for improvements. ############### print("Downloading files...") for k,thing_to_download,local_filename in zip(keys,things_to_download,local_filenames): print(k) # create dir if needed nlevels = len(k.split("/")) if nlevels > 1: _dir = local_filename.rsplit("/",1)[0] if not Path(_dir).exists(): Path(_dir).mkdir(parents=True) # download and write to local file r = http.request('GET', thing_to_download, preload_content=False) with open(local_filename, 'wb') as obj: shutil.copyfileobj(r, obj)