From 89a83ecd42cbc506180e57073e6f8afaafc7159d Mon Sep 17 00:00:00 2001 From: Elijah Ahianyo Date: Wed, 15 May 2024 06:19:46 +0000 Subject: [PATCH] [REF-2803] Add imports benchmarks (#3272) --- .github/workflows/integration_tests.yml | 24 +++- scripts/benchmarks/benchmark_imports.py | 160 ++++++++++++++++++++++++ 2 files changed, 182 insertions(+), 2 deletions(-) create mode 100644 scripts/benchmarks/benchmark_imports.py diff --git a/.github/workflows/integration_tests.yml b/.github/workflows/integration_tests.yml index b1b622cb2..432a7f946 100644 --- a/.github/workflows/integration_tests.yml +++ b/.github/workflows/integration_tests.yml @@ -35,6 +35,8 @@ env: jobs: example-counter: + env: + OUTPUT_FILE: import_benchmark.json timeout-minutes: 30 strategy: # Prioritize getting more information out of the workflow (even if something fails) @@ -98,13 +100,31 @@ jobs: npm -v poetry run bash scripts/integration.sh ./reflex-examples/counter dev - name: Measure and upload .web size - if: ${{ env.DATABASE_URL }} + if: ${{ env.DATABASE_URL && github.event.pull_request.merged == true }} run: poetry run python scripts/benchmarks/benchmark_reflex_size.py --os "${{ matrix.os }}" --python-version "${{ matrix.python-version }}" --commit-sha "${{ github.sha }}" --pr-id "${{ github.event.pull_request.id }}" --db-url "${{ env.DATABASE_URL }}" --branch-name "${{ github.head_ref || github.ref_name }}" --measurement-type "counter-app-dot-web" --path ./reflex-examples/counter/.web + - name: Install hyperfine + if: github.event.pull_request.merged == true + run: cargo install --locked hyperfine + - name: Benchmark imports + if: github.event.pull_request.merged == true + working-directory: ./reflex-examples/counter + run: hyperfine --warmup 3 "export POETRY_VIRTUALENVS_PATH=../../.venv; poetry run python counter/counter.py" --show-output --export-json "${{ env.OUTPUT_FILE }}" --shell bash + - name: Upload Benchmarks + if : ${{ env.DATABASE_URL && github.event.pull_request.merged == true }} + run: + poetry run python scripts/benchmarks/benchmark_imports.py --os "${{ matrix.os }}" + --python-version "${{ matrix.python-version }}" --commit-sha "${{ github.sha }}" + --benchmark-json "./reflex-examples/counter/${{ env.OUTPUT_FILE }}" + --db-url "${{ env.DATABASE_URL }}" --branch-name "${{ github.head_ref || github.ref_name }}" + --event-type "${{ github.event_name }}" --actor "${{ github.actor }}" --pr-id "${{ github.event.pull_request.id }}" + + + reflex-web: strategy: @@ -146,7 +166,7 @@ jobs: npm -v poetry run bash scripts/integration.sh ./reflex-web prod - name: Measure and upload .web size - if: ${{ env.DATABASE_URL }} + if: ${{ env.DATABASE_URL && github.event.pull_request.merged == true }} run: poetry run python scripts/benchmarks/benchmark_reflex_size.py --os "${{ matrix.os }}" --python-version "${{ matrix.python-version }}" --commit-sha "${{ github.sha }}" diff --git a/scripts/benchmarks/benchmark_imports.py b/scripts/benchmarks/benchmark_imports.py new file mode 100644 index 000000000..6258434d6 --- /dev/null +++ b/scripts/benchmarks/benchmark_imports.py @@ -0,0 +1,160 @@ +"""Runs the benchmarks and inserts the results into the database.""" + +from __future__ import annotations + +import argparse +import json +import os +from datetime import datetime + +import psycopg2 + + +def extract_stats_from_json(json_file: str) -> dict: + """Extracts the stats from the JSON data and returns them as dictionaries. + + Args: + json_file: The JSON file to extract the stats data from. + + Returns: + dict: The stats for each test. + """ + with open(json_file, "r") as file: + json_data = json.load(file) + + # Load the JSON data if it is a string, otherwise assume it's already a dictionary + data = json.loads(json_data) if isinstance(json_data, str) else json_data + + result = data.get("results", [{}])[0] + return { + k: v + for k, v in result.items() + if k in ("mean", "stddev", "median", "min", "max") + } + + +def insert_benchmarking_data( + db_connection_url: str, + os_type_version: str, + python_version: str, + performance_data: dict, + commit_sha: str, + pr_title: str, + branch_name: str, + event_type: str, + actor: str, + pr_id: str, +): + """Insert the benchmarking data into the database. + + Args: + db_connection_url: The URL to connect to the database. + os_type_version: The OS type and version to insert. + python_version: The Python version to insert. + performance_data: The imports performance data to insert. + commit_sha: The commit SHA to insert. + pr_title: The PR title to insert. + branch_name: The name of the branch. + event_type: Type of github event(push, pull request, etc) + actor: Username of the user that triggered the run. + pr_id: Id of the PR. + """ + # Serialize the JSON data + simple_app_performance_json = json.dumps(performance_data) + # Get the current timestamp + current_timestamp = datetime.now() + + # Connect to the database and insert the data + with psycopg2.connect(db_connection_url) as conn, conn.cursor() as cursor: + insert_query = """ + INSERT INTO import_benchmarks (os, python_version, commit_sha, time, pr_title, branch_name, event_type, actor, performance, pr_id) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s); + """ + cursor.execute( + insert_query, + ( + os_type_version, + python_version, + commit_sha, + current_timestamp, + pr_title, + branch_name, + event_type, + actor, + simple_app_performance_json, + pr_id, + ), + ) + # Commit the transaction + conn.commit() + + +def main(): + """Runs the benchmarks and inserts the results.""" + # Get the commit SHA and JSON directory from the command line arguments + parser = argparse.ArgumentParser(description="Run benchmarks and process results.") + parser.add_argument( + "--os", help="The OS type and version to insert into the database." + ) + parser.add_argument( + "--python-version", help="The Python version to insert into the database." + ) + parser.add_argument( + "--commit-sha", help="The commit SHA to insert into the database." + ) + parser.add_argument( + "--benchmark-json", + help="The JSON file containing the benchmark results.", + ) + parser.add_argument( + "--db-url", + help="The URL to connect to the database.", + required=True, + ) + parser.add_argument( + "--pr-title", + help="The PR title to insert into the database.", + ) + parser.add_argument( + "--branch-name", + help="The current branch", + required=True, + ) + parser.add_argument( + "--event-type", + help="The github event type", + required=True, + ) + parser.add_argument( + "--actor", + help="Username of the user that triggered the run.", + required=True, + ) + parser.add_argument( + "--pr-id", + help="ID of the PR.", + required=True, + ) + args = parser.parse_args() + + # Get the PR title from env or the args. For the PR merge or push event, there is no PR title, leaving it empty. + pr_title = args.pr_title or os.getenv("PR_TITLE", "") + + cleaned_benchmark_results = extract_stats_from_json(args.benchmark_json) + # Insert the data into the database + insert_benchmarking_data( + db_connection_url=args.db_url, + os_type_version=args.os, + python_version=args.python_version, + performance_data=cleaned_benchmark_results, + commit_sha=args.commit_sha, + pr_title=pr_title, + branch_name=args.branch_name, + event_type=args.event_type, + actor=args.actor, + pr_id=args.pr_id, + ) + + +if __name__ == "__main__": + main()