From 89a83ecd42cbc506180e57073e6f8afaafc7159d Mon Sep 17 00:00:00 2001
From: Elijah Ahianyo <elijahahianyo@gmail.com>
Date: Wed, 15 May 2024 06:19:46 +0000
Subject: [PATCH] [REF-2803] Add imports benchmarks (#3272)

---
 .github/workflows/integration_tests.yml |  24 +++-
 scripts/benchmarks/benchmark_imports.py | 160 ++++++++++++++++++++++++
 2 files changed, 182 insertions(+), 2 deletions(-)
 create mode 100644 scripts/benchmarks/benchmark_imports.py

diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml
index b1b622cb2..432a7f946 100644
--- a/.github/workflows/integration_tests.yml
+++ b/.github/workflows/integration_tests.yml
@@ -35,6 +35,8 @@ env:
 
 jobs:
   example-counter:
+    env:
+      OUTPUT_FILE: import_benchmark.json
     timeout-minutes: 30
     strategy:
       # Prioritize getting more information out of the workflow (even if something fails)
@@ -98,13 +100,31 @@ jobs:
           npm -v
           poetry run bash scripts/integration.sh ./reflex-examples/counter dev
       - name: Measure and upload .web size
-        if: ${{ env.DATABASE_URL }}
+        if: ${{ env.DATABASE_URL  && github.event.pull_request.merged == true }}
         run:
           poetry run python scripts/benchmarks/benchmark_reflex_size.py --os "${{ matrix.os }}"
           --python-version "${{ matrix.python-version }}" --commit-sha "${{ github.sha }}"
           --pr-id "${{ github.event.pull_request.id }}" --db-url "${{ env.DATABASE_URL }}"
           --branch-name "${{ github.head_ref || github.ref_name }}"
           --measurement-type "counter-app-dot-web" --path ./reflex-examples/counter/.web
+      - name: Install hyperfine
+        if: github.event.pull_request.merged == true
+        run: cargo install --locked hyperfine
+      - name: Benchmark imports
+        if: github.event.pull_request.merged == true
+        working-directory: ./reflex-examples/counter
+        run: hyperfine --warmup 3 "export POETRY_VIRTUALENVS_PATH=../../.venv; poetry run python counter/counter.py" --show-output --export-json "${{ env.OUTPUT_FILE }}" --shell bash
+      - name: Upload Benchmarks
+        if : ${{ env.DATABASE_URL  && github.event.pull_request.merged == true }}
+        run:
+          poetry run python scripts/benchmarks/benchmark_imports.py --os "${{ matrix.os }}"
+          --python-version "${{ matrix.python-version }}" --commit-sha "${{ github.sha }}"
+          --benchmark-json "./reflex-examples/counter/${{ env.OUTPUT_FILE }}"
+          --db-url "${{ env.DATABASE_URL }}" --branch-name "${{ github.head_ref || github.ref_name }}"
+          --event-type "${{ github.event_name }}" --actor "${{ github.actor }}" --pr-id "${{ github.event.pull_request.id }}"
+
+
+
 
   reflex-web:
     strategy:
@@ -146,7 +166,7 @@ jobs:
           npm -v
           poetry run bash scripts/integration.sh ./reflex-web prod
       - name: Measure and upload .web size
-        if: ${{ env.DATABASE_URL }}
+        if: ${{ env.DATABASE_URL  && github.event.pull_request.merged == true }}
         run:
           poetry run python scripts/benchmarks/benchmark_reflex_size.py --os "${{ matrix.os }}"
           --python-version "${{ matrix.python-version }}" --commit-sha "${{ github.sha }}"
diff --git a/scripts/benchmarks/benchmark_imports.py b/scripts/benchmarks/benchmark_imports.py
new file mode 100644
index 000000000..6258434d6
--- /dev/null
+++ b/scripts/benchmarks/benchmark_imports.py
@@ -0,0 +1,160 @@
+"""Runs the benchmarks and inserts the results into the database."""
+
+from __future__ import annotations
+
+import argparse
+import json
+import os
+from datetime import datetime
+
+import psycopg2
+
+
+def extract_stats_from_json(json_file: str) -> dict:
+    """Extracts the stats from the JSON data and returns them as dictionaries.
+
+    Args:
+        json_file: The JSON file to extract the stats data from.
+
+    Returns:
+        dict: The stats for each test.
+    """
+    with open(json_file, "r") as file:
+        json_data = json.load(file)
+
+    # Load the JSON data if it is a string, otherwise assume it's already a dictionary
+    data = json.loads(json_data) if isinstance(json_data, str) else json_data
+
+    result = data.get("results", [{}])[0]
+    return {
+        k: v
+        for k, v in result.items()
+        if k in ("mean", "stddev", "median", "min", "max")
+    }
+
+
+def insert_benchmarking_data(
+    db_connection_url: str,
+    os_type_version: str,
+    python_version: str,
+    performance_data: dict,
+    commit_sha: str,
+    pr_title: str,
+    branch_name: str,
+    event_type: str,
+    actor: str,
+    pr_id: str,
+):
+    """Insert the benchmarking data into the database.
+
+    Args:
+        db_connection_url: The URL to connect to the database.
+        os_type_version: The OS type and version to insert.
+        python_version: The Python version to insert.
+        performance_data: The imports performance data to insert.
+        commit_sha: The commit SHA to insert.
+        pr_title: The PR title to insert.
+        branch_name: The name of the branch.
+        event_type: Type of github event(push, pull request, etc)
+        actor: Username of the user that triggered the run.
+        pr_id: Id of the PR.
+    """
+    # Serialize the JSON data
+    simple_app_performance_json = json.dumps(performance_data)
+    # Get the current timestamp
+    current_timestamp = datetime.now()
+
+    # Connect to the database and insert the data
+    with psycopg2.connect(db_connection_url) as conn, conn.cursor() as cursor:
+        insert_query = """
+            INSERT INTO import_benchmarks (os, python_version, commit_sha, time, pr_title, branch_name, event_type, actor, performance, pr_id)
+            VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
+            """
+        cursor.execute(
+            insert_query,
+            (
+                os_type_version,
+                python_version,
+                commit_sha,
+                current_timestamp,
+                pr_title,
+                branch_name,
+                event_type,
+                actor,
+                simple_app_performance_json,
+                pr_id,
+            ),
+        )
+        # Commit the transaction
+        conn.commit()
+
+
+def main():
+    """Runs the benchmarks and inserts the results."""
+    # Get the commit SHA and JSON directory from the command line arguments
+    parser = argparse.ArgumentParser(description="Run benchmarks and process results.")
+    parser.add_argument(
+        "--os", help="The OS type and version to insert into the database."
+    )
+    parser.add_argument(
+        "--python-version", help="The Python version to insert into the database."
+    )
+    parser.add_argument(
+        "--commit-sha", help="The commit SHA to insert into the database."
+    )
+    parser.add_argument(
+        "--benchmark-json",
+        help="The JSON file containing the benchmark results.",
+    )
+    parser.add_argument(
+        "--db-url",
+        help="The URL to connect to the database.",
+        required=True,
+    )
+    parser.add_argument(
+        "--pr-title",
+        help="The PR title to insert into the database.",
+    )
+    parser.add_argument(
+        "--branch-name",
+        help="The current branch",
+        required=True,
+    )
+    parser.add_argument(
+        "--event-type",
+        help="The github event type",
+        required=True,
+    )
+    parser.add_argument(
+        "--actor",
+        help="Username of the user that triggered the run.",
+        required=True,
+    )
+    parser.add_argument(
+        "--pr-id",
+        help="ID of the PR.",
+        required=True,
+    )
+    args = parser.parse_args()
+
+    # Get the PR title from env or the args. For the PR merge or push event, there is no PR title, leaving it empty.
+    pr_title = args.pr_title or os.getenv("PR_TITLE", "")
+
+    cleaned_benchmark_results = extract_stats_from_json(args.benchmark_json)
+    # Insert the data into the database
+    insert_benchmarking_data(
+        db_connection_url=args.db_url,
+        os_type_version=args.os,
+        python_version=args.python_version,
+        performance_data=cleaned_benchmark_results,
+        commit_sha=args.commit_sha,
+        pr_title=pr_title,
+        branch_name=args.branch_name,
+        event_type=args.event_type,
+        actor=args.actor,
+        pr_id=args.pr_id,
+    )
+
+
+if __name__ == "__main__":
+    main()