Download Canvas Classes in Bulk Using Python

I have over 250 canvas courses in my account after 10 years at Delta State, and recently I decided I wanted to back them up. Which is a great job for Python and ChatGPT. Here’s what I tweaked into existence. It provides a list of classes, you select the ones you want, and it does the rest. It tries to get term data, but in my case that wasn’t accurate, so it also gets the course start date. And also appends the export time/date to the end so it doesn’t accidentally overwrite previous exports of the same name without a start time.

To get set up,

  1. Get your API Token from your Canvas settings page. Hint: Click the “+ New Access Token” button.
  2. Have Python up and running, and the requests library installed. I did this through a virtual environment, so my code on Windows more or less looked like this:
python -m venv venv
cd venv/Scripts
activate
pip install requests

Then download the code here, name it something like “downloadcanvas.py” and save in the venv/Scripts directory

Code:

import os
import requests
from pathlib import Path
from datetime import datetime

# Canvas API Configuration
CANVAS_BASE_URL = "https://<your_canvas_instance>.instructure.com/api/v1"  # Replace <your_canvas_instance> with your Canvas domain
API_TOKEN = "<your_api_token>"  # Replace with your Canvas API token
OUTPUT_DIR = "canvas_backups"  # Directory to save backups

# Headers for API requests
HEADERS = {
    "Authorization": f"Bearer {API_TOKEN}"
}

def fetch_all_pages(url):
    """Fetch all paginated results from a Canvas API endpoint."""
    results = []
    while url:
        response = requests.get(url, headers=HEADERS)
        response.raise_for_status()
        results.extend(response.json())
        # Check for next page in the "Link" header
        links = response.headers.get("Link", "")
        next_link = [link.split(";")[0].strip("<>") for link in links.split(",") if 'rel="next"' in link]
        url = next_link[0] if next_link else None
    return results

def get_courses():
    """Fetch a list of all Canvas courses for the authenticated user."""
    url = f"{CANVAS_BASE_URL}/courses"
    return fetch_all_pages(url)

def get_terms():
    """Fetch a mapping of term IDs to term names."""
    url = f"{CANVAS_BASE_URL}/accounts/self/terms"
    response = requests.get(url, headers=HEADERS)
    response.raise_for_status()
    data = response.json()
    if 'enrollment_terms' in data:
        return {term['id']: term.get('name', f"Term_{term['id']}") for term in data['enrollment_terms']}
    return {}

def display_courses(courses, term_map):
    """Display the list of courses with their terms."""
    print("\nAvailable Courses:")
    for idx, course in enumerate(courses):
        course_name = course.get('name', f"Untitled Course {course.get('id', 'Unknown ID')}")
        term_id = course.get('enrollment_term_id', None)
        term_name = term_map.get(term_id, 'Unknown Term')
        print(f"{idx + 1}. {course_name} (Term: {term_name})")
    print()

def parse_selection(selection, total_courses):
    """Parse the user's selection of courses into a list of indices."""
    indices = []
    for part in selection.split(','):
        if '-' in part:
            start, end = map(int, part.split('-'))
            indices.extend(range(start - 1, end))  # Convert to 0-based indexing
        else:
            indices.append(int(part.strip()) - 1)  # Convert to 0-based indexing
    return [i for i in indices if 0 <= i < total_courses]  # Ensure indices are within valid range

def select_courses(courses):
    """Prompt the user to select courses to download."""
    print("Enter the numbers or ranges of the courses you want to download (e.g., 1-3,5,7-9):")
    selection = input("Your choice: ")
    try:
        selected_indices = parse_selection(selection, len(courses))
        return [courses[idx] for idx in selected_indices]
    except (ValueError, IndexError):
        print("Invalid input. Please try again.")
        return select_courses(courses)

def format_start_date(start_date):
    """Format the start date or return 'nostartdate' if empty."""
    if not start_date:
        return "nostartdate"
    try:
        return datetime.strptime(start_date, "%Y-%m-%dT%H:%M:%SZ").strftime("%Y-%m-%d")
    except ValueError:
        return "nostartdate"

def get_current_datetime():
    """Get the current date and time in YYYY-MM-DD_HH-MM-SS format."""
    return datetime.now().strftime("%Y-%m-%d_%H-%M-%S")

def download_course_export(course_id, course_name, term_name, start_date):
    """Initiate and download the course export for a given course."""
    # Initiate export
    export_url = f"{CANVAS_BASE_URL}/courses/{course_id}/content_exports"
    export_data = {"export_type": "common_cartridge"}
    response = requests.post(export_url, headers=HEADERS, json=export_data)
    response.raise_for_status()
    export = response.json()

    # Poll until export is complete
    export_id = export['id']
    export_status_url = f"{export_url}/{export_id}"
    while True:
        export_status = requests.get(export_status_url, headers=HEADERS).json()
        if export_status['workflow_state'] == 'exported':
            break
        print(f"Export for course {course_name} is {export_status['workflow_state']}...")

    # Download the file
    download_url = export_status['attachment']['url']
    response = requests.get(download_url, stream=True)
    response.raise_for_status()

    # Format start date and current datetime
    formatted_start_date = format_start_date(start_date)
    export_datetime = get_current_datetime()

    # Save to disk
    sanitized_course_name = "".join(c if c.isalnum() or c in " _-" else "_" for c in course_name)
    sanitized_term_name = "".join(c if c.isalnum() or c in " _-" else "_" for c in term_name)
    output_path = Path(OUTPUT_DIR) / f"{sanitized_course_name}_{sanitized_term_name}_{formatted_start_date}_{export_datetime}.imscc"
    with open(output_path, "wb") as file:
        for chunk in response.iter_content(chunk_size=1024):
            file.write(chunk)
    print(f"Course {course_name} ({term_name}) has been backed up to {output_path}")

def backup_selected_courses():
    """Backup selected courses for the authenticated user."""
    Path(OUTPUT_DIR).mkdir(parents=True, exist_ok=True)
    
    # Get term mapping
    term_map = get_terms()
    
    # Get courses
    courses = get_courses()
    display_courses(courses, term_map)
    selected_courses = select_courses(courses)
    
    for course in selected_courses:
        try:
            course_id = course['id']
            course_name = course.get('name', f"Untitled Course {course.get('id', 'Unknown ID')}")
            term_id = course.get('enrollment_term_id', None)
            term_name = term_map.get(term_id, 'Unknown Term')
            start_date = course.get('start_at')
            print(f"Backing up course: {course_name} (Term: {term_name})")
            download_course_export(course_id, course_name, term_name, start_date)
        except Exception as e:
            print(f"Failed to backup course {course.get('name', 'Untitled Course')}: {e}")

if __name__ == "__main__":
    backup_selected_courses()

Once it’s saved, just run “python downloadcanvas.py” and you should be good to go!