Update - Deploy a Hugo website from GitHub to S3 using GitHub Webhooks, API Gateway and Lambda

Avid readers, I bring to you a nice little update to one of my more popular posts on how to deploy a Hugo website from GitHub to S3 using GitHub Webhooks, API Gateway and Lambda.

Since that post, AWS has stated that they will deprecate support for Python 2.7 starting 31st December 2020. Not only that, the requests module has been removed from the AWS SDK for Python so the file downloads that happen in my Lambda function stopped working.

To solve this, I switched my Lambda function to the Python 3.7 runtime, and I’ve had to write another Python function that handled file downloads using the urllib3 module. It’s not as elegant as it once was but it does the job and it does a damn fine job at that:

 1def downloadFromUrl(sourceUrl, destFile):
 2    http = urllib3.PoolManager()
 3    r = http.request('GET', sourceUrl, preload_content=False)
 4    with open(destFile, 'wb') as out:
 5        while True:
 6            data = r.read(64)
 7            if not data:
 8                break
 9            out.write(data)
10    r.release_conn()

Place this function at the top of the script (after importing modules of course) to start using it. You’ll also want to remove references to the requests module throughout the script, specifically in the downloadHugo function. The highlighted lines here:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
def downloadHugo(repo):
    logger.info("Downloading latest Hugo")
    pattern = re.compile("hugo\\_\\d.+\\_Linux-64bit.tar.gz") # Setting the RegEx to grab what we need from the Assets array
    response = requests.get("https://api.github.com/repos/gohugoio/hugo/releases/latest") # GitHub API for the releases
    release = response.json()
    assets = release["assets"]
    for asset in assets:
        if pattern.match(asset["name"]):
            downloadUrl = asset["browser_download_url"] # Grab the download URL for the Asset
            logger.info("Value of downloadUrl: " + downloadUrl)
    urllib.urlretrieve(downloadUrl, '/tmp/hugo.tar.gz') # Download the file
    logger.info("Hugo download complete")
    logger.info("Extracting Hugo")
    tar = tarfile.open("/tmp/hugo.tar.gz")
    tar.extractall("/tmp/" + repo + "-master")
    tar.close()

Need to be updated to this:

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
def downloadHugo(repo):
    logger.info("Downloading latest Hugo")
    pattern = re.compile("hugo\\_\\d.+\\_Linux-64bit.tar.gz")
    
    http = urllib3.PoolManager()
    r = http.request('GET', "https://api.github.com/repos/gohugoio/hugo/releases/latest", preload_content=False, headers={'User-Agent': environ['GITHUB_ACCOUNT']})
    assets = json.loads(r.data.decode('utf-8'))["assets"]
    for asset in assets:
        if pattern.match(asset["name"]):
            downloadUrl = asset["browser_download_url"]
            logger.info("Value of downloadUrl: " + downloadUrl)
    downloadFromUrl(downloadUrl, '/tmp/hugo.tar.gz')
    logger.info("Hugo download complete")
    logger.info("Extracting Hugo")
    tar = tarfile.open("/tmp/hugo.tar.gz")
    tar.extractall("/tmp/" + repo + "-master")
    tar.close()
    r.release_conn()

You’ll see I didn’t re-use the download function here.. I didn’t build the download function to deal with headers, so I just skipped over it and wrote the code raw anyway. GitHub required a User-Agent so I just threw it in.

Anyway, here’s the full script!

 1import logging
 2import os
 3from zipfile import ZipFile
 4import json
 5import tarfile
 6import re
 7import boto3
 8import mimetypes
 9import urllib3
10
11logger = logging.getLogger()
12logger.setLevel(logging.INFO)
13
14def downloadFromUrl(sourceUrl, destFile):
15    http = urllib3.PoolManager()
16    r = http.request('GET', sourceUrl, preload_content=False)
17    with open(destFile, 'wb') as out:
18        while True:
19            data = r.read(64)
20            if not data:
21                break
22            out.write(data)
23    r.release_conn()
24
25def downloadSite(account, repo):
26    logger.info("Downloading master zip of " + repo + " from GitHub")
27    url = 'https://github.com/' + account + '/' + repo + '/archive/master.zip'
28    logger.info(url)
29    siteZip = "/tmp/master.zip"
30    downloadFromUrl(url,siteZip)
31
32    with ZipFile(siteZip, 'r') as zip: 
33        logger.info("Extracting site files now")
34        zip.extractall("/tmp") 
35        logger.info("Extraction complete!")
36
37def downloadHugo(repo):
38    logger.info("Downloading latest Hugo")
39    pattern = re.compile("hugo\\_\\d.+\\_Linux-64bit.tar.gz")
40    http = urllib3.PoolManager()
41    r = http.request('GET', "https://api.github.com/repos/gohugoio/hugo/releases/latest", preload_content=False, headers={'User-Agent': os.environ['GITHUB_ACCOUNT']})
42    assets = json.loads(r.data.decode('utf-8'))["assets"]
43    for asset in assets:
44        if pattern.match(asset["name"]):
45            downloadUrl = asset["browser_download_url"]
46            logger.info("Value of downloadUrl: " + downloadUrl)
47    downloadFromUrl(downloadUrl, '/tmp/hugo.tar.gz')
48    logger.info("Hugo download complete")
49    logger.info("Extracting Hugo")
50    tar = tarfile.open("/tmp/hugo.tar.gz")
51    tar.extractall("/tmp/" + repo + "-master")
52    tar.close()
53    r.release_conn()
54    
55def buildSite(repo):
56    logger.info("Building site")
57    os.chdir("/tmp/" + repo + "-master")
58    os.system('./hugo')
59    logger.info("Site built with Hugo")
60    buildDir = os.getcwd() + "/public"
61    return buildDir
62    
63def syncS3(path, s3Bucket):
64    # Copied from https://www.developerfiles.com/upload-files-to-s3-with-python-keeping-the-original-folder-structure/
65    logger.info("Copying to S3")
66    session = boto3.Session()
67    s3 = session.resource('s3')
68    bucket = s3.Bucket(s3Bucket)
69    logger.info("Emptying bucket first")
70    bucket.objects.all().delete()
71    mimetypes.init()
72    mimetypes.types_map['.css'] = 'text/css'
73    
74    for subdir, dirs, files in os.walk(path):
75        for file in files:
76            full_path = os.path.join(subdir, file)
77            _, ext = os.path.splitext(full_path)
78            mimetype = mimetypes.types_map[ext]
79            with open(full_path, 'rb') as data:
80                bucket.put_object(Key=full_path[len(path)+1:], Body=data, ContentType=mimetype)
81    logger.info("Generated site uploaded to S3 successfully.")
82
83def lambda_handler(event, context):
84    sourceRepo = os.environ['GITHUB_REPO']
85    gitAccount = os.environ['GITHUB_ACCOUNT']
86    targetBucket = os.environ['TARGET_BUCKET']
87    
88    downloadSite(gitAccount, sourceRepo)
89    downloadHugo(sourceRepo)
90    buildDir = buildSite(sourceRepo)
91    syncS3(buildDir, targetBucket)
92    
93    response = {
94        'statusCode': 200,
95        'body': "Site deployed successfully"
96    }
97    
98    return response

If you have any feedback or questions please feel free to comment.