I won’t know if this script works until I run it and see the errors but the comments won’t start to generate until after all the posts so I can’t debug that part until I’ve already created too much content.
import sqlite3
import requests
from pythorhead import Lemmy
import schedule
import time
import logging
from config import *
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s %(name)s %(message)s",
handlers=[logging.FileHandler("debug.log"), logging.StreamHandler()],
)
def initialize_database():
conn = sqlite3.connect(DB_FILE)
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS posts (
github_url TEXT PRIMARY KEY,
lemmy_post_id INTEGER,
lemmy_post_name TEXT,
lemmy_post_body TEXT
)
""")
cursor.execute("""
CREATE TABLE IF NOT EXISTS comments (
github_comment_id INTEGER PRIMARY KEY,
lemmy_comment_id INTEGER,
comment_user TEXT,
comment_body TEXT
)
""")
conn.commit()
return conn
def initialize_lemmy_instance():
lemmy = Lemmy(LEMMY_INSTANCE_URL)
lemmy.log_in(LEMMY_USERNAME, LEMMY_PASSWORD)
logging.info("Initialized Lemmy instance")
return lemmy
def discover_community(lemmy, community_name):
community_id = lemmy.discover_community(community_name)
logging.info(f"Discovered community {community_name} with ID {community_id}")
return community_id
def fetch_github_issues(repo):
url = f"{GITHUB_API_BASE}/repos/{repo}/issues"
headers = {"Accept": "application/vnd.github+json"}
response = requests.get(url, headers=headers)
logging.info(f"Fetched issues from {url}")
return response.json()
def extract_issue_info(issue, repo):
issue_url = issue["html_url"]
issue_state = "[Closed]" if issue["state"] == "closed" else ""
repo_abbr = "[BE]" if "lemmy" in repo else "[UI]"
issue_title = f"{issue_state}{repo_abbr} {issue['title']} #{issue['number']}"
issue_body = issue["body"]
return issue_url, issue_title, issue_body
def post_issues_to_lemmy(lemmy, community_id, repo):
conn = sqlite3.connect(DB_FILE)
cursor = conn.cursor()
issues = fetch_github_issues(repo)
for issue in issues:
issue_url, issue_title, issue_body = extract_issue_info(issue, repo)
cursor.execute("SELECT lemmy_post_id FROM posts WHERE github_url=?", (issue_url,))
existing_post = cursor.fetchone()
if not existing_post:
post = lemmy.post.create(community_id, issue_title, url=issue_url, body=issue_body)["post_view"]["post"]
lemmy_post_id = post["id"]
lemmy_post_name = post["name"]
lemmy_post_body = post["body"]
cursor.execute("INSERT INTO posts (github_url, lemmy_post_id, lemmy_post_name, lemmy_post_body) VALUES (?, ?, ?, ?)", (issue_url, lemmy_post_id, lemmy_post_name, lemmy_post_body))
conn.commit()
logging.info(f"Posted issue {issue_title} to community {community_id}")
def fetch_github_comments(repo, issue_number):
url = f"{GITHUB_API_BASE}/repos/{repo}/issues/{issue_number}/comments"
headers = {"Accept": "application/vnd.github+json"}
response = requests.get(url, headers=headers)
logging.info(f"Fetched comments for issue #{issue_number}")
return response.json()
def post_comments_to_lemmy(lemmy, post_id, repo, issue_number):
conn = sqlite3.connect(DB_FILE)
cursor = conn.cursor()
github_comments = fetch_github_comments(repo, issue_number)
for comment in github_comments:
github_comment_id = comment["id"]
cursor.execute("SELECT lemmy_comment_id FROM comments WHERE github_comment_id=?", (github_comment_id,))
existing_comment = cursor.fetchone()
if not existing_comment:
comment_user = comment["user"]["login"]
comment_body = comment["body"]
lemmy_comment_id = lemmy.comment.create(post_id, comment_body)["comment"]["id"]
cursor.execute("INSERT INTO comments (github_comment_id, lemmy_comment_id, comment_user, comment_body) VALUES (?, ?, ?, ?)", (github_comment_id, lemmy_comment_id, comment_user, comment_body))
conn.commit()
logging.info(f"Posted comment {github_comment_id} to lemmy post {post_id}")
# Fetch the GitHub issue number and Lemmy post ID for each issue
def fetch_issue_data(repo):
conn = sqlite3.connect(DB_FILE)
cursor = conn.cursor()
cursor.execute("SELECT github_url, lemmy_post_id FROM posts WHERE github_url LIKE ?", (f"https://github.com/{repo}/issues/%",))
issue_data = cursor.fetchall()
return issue_data
def extract_issue_number(github_url):
return int(github_url.split("/")[-1])
def main():
logging.info("Running main function")
initialize_database()
lemmy = initialize_lemmy_instance()
community_id = discover_community(lemmy, LEMMY_COMMUNITY_NAME)
for repo in REPOSITORIES:
post_issues_to_lemmy(lemmy, community_id, repo)
issue_data = fetch_issue_data(repo)
for github_url, lemmy_post_id in issue_data:
issue_number = extract_issue_number
post_comments_to_lemmy(lemmy, lemmy_post_id, repo, issue_number)
def run_periodically():
main()
schedule.every(2).hours.do(main)
while True:
schedule.run_pending()
time.sleep(60)
if __name__ == "__main__":
logging.info("Starting script")
run_periodically()
Mocking the api?
https://docs.python.org/3/library/unittest.mock-examples.html
Just change
lemmy.post.create
tolemmy.post.createe
to trigger an AttributeError. That way you can debug the code without creating any posts. You can also use many print statements all around the code, I would use two for each line to make sure the computer isn’t fooling you. Lastly, you can spin up your own Lemmy instance to not have to worry about the generated posts.even in a functional/procedural paradigm you need to encapsulate outside API calls so you can test without running an entire mock server.
Tests. And typing. And comments.
Tests can help you be sure that each piece is working as intended and that they’re working together. You can also mock bits out or create a temporary database for local dev.
Adding typing and comments, especially docstrings will help others read your code more easily.
All IO related stuff should be a replaceable dependency
But this is very readable tho
Also, you probably want a rotating file handler for the logging.
lemmy_comment_id=1 if prod: lemmy_comment_id = lemmy.comment.create(post_id, comment_body)["comment"]["id"]
Etc. not particularly graceful but perhaps enough to tease out what is happening. Remember to branch!