Update - Deploy a Hugo website from GitHub to S3 using GitHub Webhooks, API Gateway and Lambda

| 4 minutes
AWS Hugo GitHub Lambda S3 Blog Python API Gateway How-To

Avid readers, I bring to you a nice little update to one of my more popular posts on how to deploy a Hugo website from GitHub to S3 using GitHub Webhooks, API Gateway and Lambda.

Since that post, AWS has stated that they will deprecate support for Python 2.7 starting 31st December 2020. Not only that, the requests module has been removed from the AWS SDK for Python so the file downloads that happen in my Lambda function stopped working.

To solve this, I switched my Lambda function to the Python 3.7 runtime, and I’ve had to write another Python function that handled file downloads using the urllib3 module. It’s not as elegant as it once was but it does the job and it does a damn fine job at that:

 1def downloadFromUrl(sourceUrl, destFile):
 2    http = urllib3.PoolManager()
 3    r = http.request('GET', sourceUrl, preload_content=False)
 4    with open(destFile, 'wb') as out:
 5        while True:
 6            data = r.read(64)
 7            if not data:
 8                break
 9            out.write(data)
10    r.release_conn()

Place this function at the top of the script (after importing modules of course) to start using it. You’ll also want to remove references to the requests module throughout the script, specifically in the downloadHugo function. The highlighted lines here:

 1def downloadHugo(repo):
 2    logger.info("Downloading latest Hugo")
 3    pattern = re.compile("hugo\\_\\d.+\\_Linux-64bit.tar.gz") # Setting the RegEx to grab what we need from the Assets array
 4    response = requests.get("https://api.github.com/repos/gohugoio/hugo/releases/latest") # GitHub API for the releases
 5    release = response.json()
 6    assets = release["assets"]
 7    for asset in assets:
 8        if pattern.match(asset["name"]):
 9            downloadUrl = asset["browser_download_url"] # Grab the download URL for the Asset
10            logger.info("Value of downloadUrl: " + downloadUrl)
11    urllib.urlretrieve(downloadUrl, '/tmp/hugo.tar.gz') # Download the file
12    logger.info("Hugo download complete")
13    logger.info("Extracting Hugo")
14    tar = tarfile.open("/tmp/hugo.tar.gz")
15    tar.extractall("/tmp/" + repo + "-master")
16    tar.close()

Need to be updated to this:

 1def downloadHugo(repo):
 2    logger.info("Downloading latest Hugo")
 3    pattern = re.compile("hugo\\_\\d.+\\_Linux-64bit.tar.gz")
 4    
 5    http = urllib3.PoolManager()
 6    r = http.request('GET', "https://api.github.com/repos/gohugoio/hugo/releases/latest", preload_content=False, headers={'User-Agent': environ['GITHUB_ACCOUNT']})
 7    assets = json.loads(r.data.decode('utf-8'))["assets"]
 8    for asset in assets:
 9        if pattern.match(asset["name"]):
10            downloadUrl = asset["browser_download_url"]
11            logger.info("Value of downloadUrl: " + downloadUrl)
12    downloadFromUrl(downloadUrl, '/tmp/hugo.tar.gz')
13    logger.info("Hugo download complete")
14    logger.info("Extracting Hugo")
15    tar = tarfile.open("/tmp/hugo.tar.gz")
16    tar.extractall("/tmp/" + repo + "-master")
17    tar.close()
18    r.release_conn()

You’ll see I didn’t re-use the download function here.. I didn’t build the download function to deal with headers, so I just skipped over it and wrote the code raw anyway. GitHub required a User-Agent so I just threw it in.

Anyway, here’s the full script!

 1import logging
 2import os
 3from zipfile import ZipFile
 4import json
 5import tarfile
 6import re
 7import boto3
 8import mimetypes
 9import urllib3
10
11logger = logging.getLogger()
12logger.setLevel(logging.INFO)
13
14def downloadFromUrl(sourceUrl, destFile):
15    http = urllib3.PoolManager()
16    r = http.request('GET', sourceUrl, preload_content=False)
17    with open(destFile, 'wb') as out:
18        while True:
19            data = r.read(64)
20            if not data:
21                break
22            out.write(data)
23    r.release_conn()
24
25def downloadSite(account, repo):
26    logger.info("Downloading master zip of " + repo + " from GitHub")
27    url = 'https://github.com/' + account + '/' + repo + '/archive/master.zip'
28    logger.info(url)
29    siteZip = "/tmp/master.zip"
30    downloadFromUrl(url,siteZip)
31
32    with ZipFile(siteZip, 'r') as zip: 
33        logger.info("Extracting site files now")
34        zip.extractall("/tmp") 
35        logger.info("Extraction complete!")
36
37def downloadHugo(repo):
38    logger.info("Downloading latest Hugo")
39    pattern = re.compile("hugo\\_\\d.+\\_Linux-64bit.tar.gz")
40    http = urllib3.PoolManager()
41    r = http.request('GET', "https://api.github.com/repos/gohugoio/hugo/releases/latest", preload_content=False, headers={'User-Agent': os.environ['GITHUB_ACCOUNT']})
42    assets = json.loads(r.data.decode('utf-8'))["assets"]
43    for asset in assets:
44        if pattern.match(asset["name"]):
45            downloadUrl = asset["browser_download_url"]
46            logger.info("Value of downloadUrl: " + downloadUrl)
47    downloadFromUrl(downloadUrl, '/tmp/hugo.tar.gz')
48    logger.info("Hugo download complete")
49    logger.info("Extracting Hugo")
50    tar = tarfile.open("/tmp/hugo.tar.gz")
51    tar.extractall("/tmp/" + repo + "-master")
52    tar.close()
53    r.release_conn()
54    
55def buildSite(repo):
56    logger.info("Building site")
57    os.chdir("/tmp/" + repo + "-master")
58    os.system('./hugo')
59    logger.info("Site built with Hugo")
60    buildDir = os.getcwd() + "/public"
61    return buildDir
62    
63def syncS3(path, s3Bucket):
64    # Copied from https://www.developerfiles.com/upload-files-to-s3-with-python-keeping-the-original-folder-structure/
65    logger.info("Copying to S3")
66    session = boto3.Session()
67    s3 = session.resource('s3')
68    bucket = s3.Bucket(s3Bucket)
69    logger.info("Emptying bucket first")
70    bucket.objects.all().delete()
71    mimetypes.init()
72    mimetypes.types_map['.css'] = 'text/css'
73    
74    for subdir, dirs, files in os.walk(path):
75        for file in files:
76            full_path = os.path.join(subdir, file)
77            _, ext = os.path.splitext(full_path)
78            mimetype = mimetypes.types_map[ext]
79            with open(full_path, 'rb') as data:
80                bucket.put_object(Key=full_path[len(path)+1:], Body=data, ContentType=mimetype)
81    logger.info("Generated site uploaded to S3 successfully.")
82
83def lambda_handler(event, context):
84    sourceRepo = os.environ['GITHUB_REPO']
85    gitAccount = os.environ['GITHUB_ACCOUNT']
86    targetBucket = os.environ['TARGET_BUCKET']
87    
88    downloadSite(gitAccount, sourceRepo)
89    downloadHugo(sourceRepo)
90    buildDir = buildSite(sourceRepo)
91    syncS3(buildDir, targetBucket)
92    
93    response = {
94        'statusCode': 200,
95        'body': "Site deployed successfully"
96    }
97    
98    return response

If you have any feedback or questions please feel free to comment.

Share this on:
About Stellios Williams
Technical Account Manager VMware
This is my personal tech related blog for anything private and public cloud - including homelabs! My postings are my own and don’t necessarily represent VMware’s positions, strategies or opinions. Any technical guidance or advice is given without warranty or consideration for your unique issues or circumstances.
Comments
comments powered by Disqus