this post was submitted on 23 Sep 2023
4 points (70.0% liked)

Python

6331 readers
36 users here now

Welcome to the Python community on the programming.dev Lemmy instance!

๐Ÿ“… Events

PastNovember 2023

October 2023

July 2023

August 2023

September 2023

๐Ÿ Python project:
๐Ÿ’“ Python Community:
โœจ Python Ecosystem:
๐ŸŒŒ Fediverse
Communities
Projects
Feeds

founded 1 year ago
MODERATORS
4
... (programming.dev)
submitted 1 year ago* (last edited 8 months ago) by [email protected] to c/[email protected]
 

The instance where I was using it changed it's rules to prevent bots from posting in it and I didn't care enough to search for another instance.

https://lemm.ee/c/issue_tracker?dataType=Post&page=1&sort=Active

`config_template.py

LEMMY_INSTANCE_URL = ""
LEMMY_COMMUNITY_NAME = ""
LEMMY_USERNAME = ""
LEMMY_PASSWORD = ""
GITHUB_API_BASE = "https://api.github.com"
GITHUB_URL_BASE = "https://github.com"
REPOSITORIES = ["LemmyNet/lemmy", "LemmyNet/lemmy-ui"]
DB_FILE = "lemmy_github.db"
DELAY = 1
MAX_BACKOFF_TIME = 300
PERSONAL_ACCESS_TOKEN = ""

github_lemmy_issue_reposter.py

import backoff
import datetime
import logging
import requests
import schedule
import sqlite3
import time

from config import *
from pythorhead import Lemmy
from typing import Any, Dict, Generator, List, Optional, Tuple, Callable, TypeVar

T = TypeVar('T')

# "[%(levelname)s]:%(asctime)s:%(name)s [%(filename)s:%(lineno)s - %(funcName)s()] %(message)s"
FORMAT = "%(message)s"
logging.basicConfig(
    level=logging.INFO,
    format=FORMAT,
    handlers=[logging.FileHandler("debug.log", mode="w"), logging.StreamHandler()],
)


def on_giveup(details: Dict[str, int]) -> None:
    logging.error(f"Failed to fetch issues after {details['tries']} attempts", exc_info=True)


def handle_errors(message: Optional[str] = None) -> Callable[[Callable[..., T]], Callable[..., T]]:
    def decorator(function: Callable[..., T]) -> Callable[..., T]:
        def wrapper(*args: Tuple[Any], **kwargs: Dict[str, Any]) -> T:
            try:
                return function(*args, **kwargs)
            except Exception as e:
                if message:
                    logging.exception(f"{message} - Error in {function.__name__}:\n{e}")
                else:
                    logging.exception(f"Error in {function.__name__}:\n{e}")
                raise

        return wrapper

    return decorator


class GitHubIssue:
    def __init__(self, issue_dict: dict[str, Any], github_repo: str) -> None:
        try:
            self.url = issue_dict["html_url"]
            logging.info(f"Creating issue {self.url}")
            self.state = issue_dict["state"]
            self.state_fmt = "[Closed]" if issue_dict["state"] == "closed" else ""
            self.repo_abbr = "[UI]" if "lemmy-ui" in github_repo else "[BE]"
            self.title = f"{self.state_fmt}{self.repo_abbr} {issue_dict['title']} #{issue_dict['number']}"
            self.title = self.title[:200]
            self.body = issue_dict["body"]
            if self.body is not None:
                self.body = self.body[:30000]
            self.user = issue_dict["user"]["login"]
            self.user_url = issue_dict["user"]["html_url"]
            self.updated_at = datetime.datetime.strptime(issue_dict["updated_at"], '%Y-%m-%dT%H:%M:%SZ')
        except Exception as e:
            log_message: str = (
                f"Formatted issue:\n"
                f"  - Repo: {github_repo}\n"
                f"  - Issue State: {self.state}\n"
                f"  - Repo Abbreviation: {self.repo_abbr}\n"
                f"  - Title: {self.title}\n"
                f"  - URL: {self.url}\n"
                f"  - User: {self.user}\n"
                f"  - User URL: {self.user_url}\n"
                f"  - Updated At: {self.updated_at}\n"
            )
            logging.exception(log_message)
            logging.exception(e)

    @property
    def formatted_body(self) -> str:
        formatted_body: str = self.body
        try:
            if self.body is not None:
                formatted_body = self.body.replace("\n", "\n> ")
                formatted_body = f"> {formatted_body}\n> \n> *Originally posted by [{self.user}]({self.user_url}) in [#{self.number}]({self.url})*"
        except Exception as e:
            logging.exception(f"Error formatting body for {self.url}\n{e}")
        return formatted_body

    @property
    def number(self) -> int:
        return int(self.url.split("/")[-1])


class GitHubComment:
    def __init__(self, comment_dict: dict[str, Any], issue_number: int) -> None:
        self.id = comment_dict["id"]
        self.body = comment_dict["body"]
        self.user = comment_dict["user"]["login"]
        self.user_url = comment_dict["user"]["html_url"]
        self.url = comment_dict["html_url"]
        self.issue_number = issue_number

    @property
    def formatted_comment(self) -> str:
        formatted_body:str = self.body.replace("\n", "\n> ")
        formatted_body = f"> {formatted_body}\n> \n> *Originally posted by [{self.user}]({self.user_url}) in [#{self.issue_number}]({self.url})*"
        return formatted_body


@handle_errors("Error initializing database")
def initialize_database() -> sqlite3.Connection:
    logging.info("Initializing database")
    conn: sqlite3.Connection = sqlite3.connect(DB_FILE)
    cursor: sqlite3.Cursor = conn.cursor()
    cursor.execute(
        """
        CREATE TABLE IF NOT EXISTS posts (
            issue_number INTEGER PRIMARY KEY,
            lemmy_post_id INTEGER NOT NULL UNIQUE,
            issue_title TEXT,
            issue_body TEXT,
            updated_at TIMESTAMP DEFAULT NULL
        )
    """
    )
    cursor.execute(
        """
        CREATE TABLE IF NOT EXISTS comments (
            github_comment_id INTEGER PRIMARY KEY,
            lemmy_comment_id INTEGER NOT NULL UNIQUE,
            comment_user TEXT,
            comment_body TEXT
            updated_at TIMESTAMP DEFAULT NULL
        )
    """
    )
    cursor.execute(
    """
    CREATE TABLE IF NOT EXISTS last_updated (
        id INTEGER PRIMARY KEY,
        last_updated_time TIMESTAMP
    );
    """
    )
    conn.commit()
    return conn


def get_last_updated_time() -> str:
    conn = sqlite3.connect(DB_FILE)
    cursor = conn.cursor()
    cursor.execute("SELECT last_updated_time FROM last_updated WHERE id = 1")
    last_updated_time: str = cursor.fetchone()[0]
    conn.close()

    return last_updated_time


def update_last_updated_time() -> None:
    conn = sqlite3.connect(DB_FILE)
    cursor = conn.cursor()
    current_time = datetime.datetime.utcnow().isoformat()
    
    cursor.execute("UPDATE last_updated SET last_updated_time = ? WHERE id = 1", (current_time,))
    if cursor.rowcount == 0:
        cursor.execute("INSERT INTO last_updated (id, last_updated_time) VALUES (1, ?)", (current_time,))
    
    conn.commit()
    conn.close()
    logging.info("Updated last updated time")


def update_post_time(post_id: int, updated_at: datetime.datetime) -> None:
    conn: sqlite3.Connection = sqlite3.connect(DB_FILE)
    cursor: sqlite3.Cursor = conn.cursor()
    time_formatted = updated_at.strftime('%Y-%m-%d %H:%M:%S')
    SQL = "UPDATE posts SET updated_at = ? WHERE lemmy_post_id = ?"
    cursor.execute(SQL, (time_formatted, post_id))
    conn.commit()
    conn.close()


def check_updated_at(issue_number: int) -> Optional[Tuple[int, str, str, Optional[str]]]:
    logging.info(f"Checking last post update for {issue_number}")
    conn: sqlite3.Connection = sqlite3.connect(DB_FILE)
    cursor: sqlite3.Cursor = conn.cursor()
    SQL = "SELECT lemmy_post_id, issue_title, issue_body, updated_at FROM posts WHERE issue_number = ?"
    cursor.execute(SQL, (issue_number,))
    result: Tuple[int, str, str, Optional[str]] = cursor.fetchone()
    conn.close()

    if result is None:
        logging.info(f"No post found for {issue_number}")
        return None
    else:
        logging.info(f"Found post for {issue_number}")
        return result


@handle_errors("Error initializing Lemmy instance")
def initialize_lemmy_instance() -> Lemmy:
    logging.info("Initializing Lemmy instance")
    lemmy = Lemmy(LEMMY_INSTANCE_URL)
    logging.info(f"Initialized Lemmy instance in {LEMMY_INSTANCE_URL}")
    lemmy.log_in(LEMMY_USERNAME, LEMMY_PASSWORD)
    logging.info(f"Logged in to Lemmy instance with user {LEMMY_USERNAME}")
    return lemmy


@backoff.on_exception(
    backoff.expo,
    (requests.exceptions.RequestException, TypeError),
    max_time=MAX_BACKOFF_TIME,
    on_giveup=on_giveup,
)
def fetch_github_data(url: str) -> List[Dict[str, Any]]:
    global LAST_REQUEST_TIME
    try:
        headers = {
            "Accept": "application/vnd.github+json",
            "Authorization": f"Bearer {PERSONAL_ACCESS_TOKEN}",
            "X-GitHub-Api-Version": "2022-11-28",
        }
        time_elapsed = time.time() - LAST_REQUEST_TIME
        required_delay = max(0, DELAY - time_elapsed)
        time.sleep(required_delay)
        response = requests.get(url, headers=headers)
        LAST_REQUEST_TIME = time.time()
        logging.info(f"Fetched data from {url}")
        res: List[Dict[str, Any]] = response.json()
        return res
    except requests.exceptions.RequestException as e:
        logging.exception(f"Error fetching data from {url}\n{e}")
        raise


def check_existing_post(issue_number: str) -> Optional[int]:
    conn: sqlite3.Connection = sqlite3.connect(DB_FILE)
    cursor: sqlite3.Cursor = conn.cursor()
    SQL = "SELECT lemmy_post_id FROM posts WHERE issue_number=?"
    cursor.execute(SQL, (issue_number,))
    post_id: Optional[tuple[int]] = cursor.fetchone()
    if post_id:
        return post_id[0]
    return None


def insert_post_to_db(issue: GitHubIssue, lemmy_post_id: Optional[int]) -> None:
    try:
        conn: sqlite3.Connection = sqlite3.connect(DB_FILE)
        cursor: sqlite3.Cursor = conn.cursor()
        SQL = "INSERT INTO posts (issue_number, lemmy_post_id, issue_title, issue_body, updated_at) VALUES (?, ?, ?, ?, ?)"
        cursor.execute(SQL, (issue.number, lemmy_post_id, issue.title, issue.formatted_body, issue.updated_at))
        conn.commit()
        logging.info(f"Inserted new Lemmy post {lemmy_post_id} into the database")
    except sqlite3.Error as e:
        logging.exception(f"Error inserting post into the database for issue {issue.title} with url {issue.url}\n{e}")
        raise


def insert_comment_to_database(cursor: sqlite3.Cursor, github_comment_id: int, lemmy_comment_id: int, comment: GitHubComment) -> None:
    try:
        SQL = "INSERT INTO comments (github_comment_id, lemmy_comment_id, comment_user, comment_body) VALUES (?, ?, ?, ?)"
        cursor.execute(SQL, (github_comment_id, lemmy_comment_id, comment.user, comment.formatted_comment,))
        logging.info(f"Inserted comment {github_comment_id} into the database")
    except Exception as e:
        logging.exception(f"Error encountered while inserting comment {github_comment_id} to database\n{e}")


@backoff.on_exception(
    backoff.expo,
    (requests.exceptions.RequestException, TypeError),
    max_time=MAX_BACKOFF_TIME,
    on_giveup=on_giveup,
)
def create_lemmy_post(lemmy: Any, community_id: int, issue: GitHubIssue) -> Optional[int]:
    lemmy_post_id: Optional[int] = None
    lemmy_post_id = lemmy.post.create(community_id, issue.title, url=issue.url, body=issue.body)["post_view"]["post"]["id"]
    lemmy_url = f"{LEMMY_INSTANCE_URL}/post/{lemmy_post_id}"
    logging.info(f"Posted issue {lemmy_url}")

    return lemmy_post_id


@backoff.on_exception(
    backoff.expo,
    (requests.exceptions.RequestException, TypeError),
    max_time=MAX_BACKOFF_TIME,
    on_giveup=on_giveup,
)
def create_lemmy_comment(lemmy: Any, post_id: Optional[int], comment: GitHubComment) -> Optional[int]:
    logging.info(f"Creating new Lemmy comment in {LEMMY_INSTANCE_URL}/post/{post_id}")

    if not post_id:
        logging.warning("Post ID is empty. Skipping comment creation")
        return None

    response = lemmy.comment.create(post_id, comment.formatted_comment)
    lemmy_comment_id:int = response["comment_view"]["comment"]["id"]
    logging.info(f"Successfully created Lemmy comment {LEMMY_INSTANCE_URL}/comment/{lemmy_comment_id}")

    return lemmy_comment_id


def get_total_issues(github_repo: str) -> int:
    url: str = f"https://api.github.com/repos/{github_repo}"
    data: List[Dict[str, Any]] = fetch_github_data(url)
    total_issues: int = data["open_issues_count"]
    return total_issues


def fetch_issues(github_repo: str, last_updated_time: str) -> Generator[Dict[str, Any], None, None]:
    page = 1
    per_page = 100
    issues_url = (f"{GITHUB_API_BASE}/repos/{github_repo}/issues?state=all&since={last_updated_time}&per_page={per_page}")

    while True:
        page_url = f"{issues_url}&page={page}"
        issues: List[Dict[str, Any]] = fetch_github_data(page_url)

        if not issues:
            break

        for issue_dict in issues:
            yield issue_dict

        page += 1


@backoff.on_exception(
    backoff.expo,
    (requests.exceptions.RequestException, TypeError),
    max_time=MAX_BACKOFF_TIME,
    on_giveup=on_giveup,
)
def edit_lemmy_post(lemmy: Any, lemmy_post_id: int, issue: GitHubIssue) -> None:
    lemmy.post.edit(lemmy_post_id, name=issue.title, url=issue.url, body=issue.body)


def process_issues(lemmy: Any, community_id: int, github_repo: str) -> None:
    last_updated_time = get_last_updated_time()
    update_last_updated_time()
    for issue_dict in fetch_issues(github_repo, last_updated_time):
        process_issue(lemmy, community_id, github_repo, issue_dict)


def process_issue(lemmy: Any, community_id: int, github_repo: str, issue_dict: dict[str, Any]) -> None:
    issue: GitHubIssue = GitHubIssue(issue_dict, github_repo)
    res: Optional[Tuple[int, str, str, Optional[str]]] = check_updated_at(issue.number)

    if res is None:
        create_new_lemmy_post(lemmy, community_id, github_repo, issue)
    else:
        lemmy_post_id, existing_title, existing_body, updated_at = res
        if updated_at is None or has_enough_time_passed(updated_at, issue.updated_at):
            update_issue_if_needed(lemmy, lemmy_post_id, existing_title, existing_body, issue)
            process_comments(lemmy, lemmy_post_id, github_repo, issue)
            update_post_time(lemmy_post_id, issue.updated_at)


def has_enough_time_passed(old_updated_at_str: str, new_updated_at: datetime.datetime) -> bool:
    old_updated_at = datetime.datetime.strptime(old_updated_at_str, '%Y-%m-%d %H:%M:%S')
    time_difference: datetime.timedelta = new_updated_at - old_updated_at
    return time_difference >= datetime.timedelta(hours=2)


def update_issue_if_needed(lemmy: Any, lemmy_post_id: int, existing_title: str, existing_body: str, issue: GitHubIssue) -> None:
    if existing_title != issue.title or existing_body != issue.formatted_body:
        edit_lemmy_post(lemmy, lemmy_post_id, issue)


def create_new_lemmy_post(lemmy: Any, community_id: int, github_repo: str, issue: GitHubIssue) -> None:
    lemmy_post_id: Optional[int] = post_issue_to_lemmy(lemmy, community_id, issue)
    insert_post_to_db(issue, lemmy_post_id)
    process_comments(lemmy, lemmy_post_id, github_repo, issue)


def post_issue_to_lemmy(lemmy: Any, community_id: int, issue: GitHubIssue) -> Optional[int]:
    try:
        logging.info(f"Start posting issue {issue.title} to community {community_id}")
        lemmy_post_id: Optional[int] = create_lemmy_post(lemmy, community_id, issue)
        return lemmy_post_id
    except Exception as e:
        logging.exception(f"Error posting issue {issue.title} to community {community_id}\n{e}")
        return None


def process_comments(lemmy: Any, post_id: Optional[int], github_repo: str, issue: GitHubIssue) -> None:
    try:
        logging.info(f"Posting comments from issue #{issue.number} to Lemmy post {LEMMY_INSTANCE_URL}/post/{post_id}")
        comments_url: str = f"{GITHUB_API_BASE}/repos/{github_repo}/issues/{issue.number}/comments"
        comments: Dict[str, Any] = fetch_github_data(comments_url)
        for comment_data in comments:
            if isinstance(comment_data, str):
                logging.warning(f"Skipping comment {comment_data}")
                continue
            process_comment(lemmy, github_repo, comment_data, post_id, issue.number)
    except Exception as e:
        logging.exception(f"Error posting comments to lemmy post {post_id}\n{e}")

def process_comment(lemmy: Any, github_repo: str, comment_data: Dict[str, Any], post_id: Optional[int], issue_number: int) -> None:
    conn: sqlite3.Connection = sqlite3.connect(DB_FILE)
    cursor: sqlite3.Cursor = conn.cursor()
    comment = GitHubComment(comment_data, issue_number)

    existing_comment_id: Optional[int] = get_existing_comment_id(cursor, comment.id)
    if existing_comment_id:
        logging.info(f"Skipping existing comment with GitHub comment ID: {comment.id}")
        return

    post_comment_to_lemmy(cursor, lemmy, github_repo, comment, post_id, issue_number)
    conn.commit()


def post_comment_to_lemmy(cursor: sqlite3.Cursor, lemmy: Any, github_repo: str, comment: GitHubComment, post_id: Optional[int], issue_number: int) -> None:
    lemmy_post_url = f"{LEMMY_INSTANCE_URL}/post/{post_id}"
    comment_url = f"{GITHUB_URL_BASE}/{github_repo}/issues/{issue_number}#issuecomment-{comment.id}"
    logging.info(f"Posting comment {comment.url} to Lemmy post {lemmy_post_url}")
    lemmy_comment_id: Optional[int] = create_lemmy_comment(lemmy, post_id, comment)

    if not lemmy_comment_id:
        logging.exception(f"Error creating Lemmy comment {lemmy_comment_id} to {lemmy_post_url} from Github comment {comment.url}")
        return

    logging.info(f"Posted comment {comment_url} to Lemmy post {lemmy_post_url}")
    insert_comment_to_database(cursor, comment.id, lemmy_comment_id, comment)


def get_existing_comment_id(cursor: sqlite3.Cursor, github_comment_id: int) -> Optional[int]:
    logging.info(f"Checking if comment with GitHub comment ID: {github_comment_id} exists")
    cursor.execute("SELECT lemmy_comment_id FROM comments WHERE github_comment_id=?", (github_comment_id,))
    existing_comment = cursor.fetchone()
    if existing_comment is not None:
        logging.info(f"Found existing comment with GitHub comment ID: {github_comment_id}")
        existing_comment_id: int = existing_comment[0]
        return existing_comment_id
    else:
        logging.info(f"No existing comment found with GitHub comment ID: {github_comment_id}")
        return None


def fetch_issue_data(github_repo: str) -> List[Tuple[str, Optional[int]]]:
    logging.info("Fetching the GitHub issue number and Lemmy post ID for all issues")
    conn: sqlite3.Connection = sqlite3.connect(DB_FILE)
    cursor: sqlite3.Cursor = conn.cursor()
    SQL = "SELECT issue_url, lemmy_post_id FROM posts WHERE issue_url LIKE ?"
    issues_url = f"https://github.com/{github_repo}/issues/%"
    issue_data = cursor.execute(SQL, (issues_url,)).fetchall()
    logging.info(f"Fetched {len(issue_data)} issues")
    return issue_data


def process_repo(lemmy: Any, community_id: int, github_repo: str) -> None:
    try:
        logging.info(f"Processing repository {github_repo}")
        process_issues(lemmy, community_id, github_repo)
    except Exception as e:
        logging.exception(f"Error occurred while processing repository {github_repo}\n{e}")


def main() -> None:
    logging.info("Running main function")
    initialize_database()
    lemmy = initialize_lemmy_instance()
    community_id = lemmy.discover_community(LEMMY_COMMUNITY_NAME)

    for github_repo in REPOSITORIES:
        process_repo(lemmy, community_id, github_repo)


def run_periodically() -> None:
    logging.info("Starting periodic run")
    schedule.every(1).hours.do(main)

    while True:
        try:
            schedule.run_pending()
        except Exception as e:
            logging.exception(f"Error occurred during scheduling\n{e}")
        time.sleep(60)


if __name__ == "__main__":
    try:
        logging.info("Starting script")
        main()
        run_periodically()
    except Exception as e:
        logging.exception(f"Error occurred during script execution\n{e}")

requirements.txt

pythorhead==0.12.3
schedule==1.2.0
backoff==2.2.1
feedparser==6.0.10
top 1 comments
sorted by: hot top controversial new old
[โ€“] [email protected] 0 points 1 year ago

Hi there! Looks like you linked to a Lemmy community using a URL instead of its name, which doesn't work well for people on different instances. Try fixing it like this: [email protected]