powd/survey.py

import os
import csv
import json
from pprint import pprint
import numpy as np
import requests
import matplotlib.pyplot as plt


def write_data_file():
    cookie = {"survey-admin-auth": os.environ["SURVEY_ADMIN_AUTH"]}

    page = 0
    results = {}

    while True:
        resp = requests.get(
            f"https://survey.mcaptcha.org/admin/api/v1/campaign/b81b9214-aa60-4c1b-9ee4-29d842e3039f/results?page={page}",
            cookies=cookie,
        )
        print(page)
        data = resp.json()
        if len(data) == 0:
            break
        for element in data:
            if element["id"] not in results:
                results[element["id"]] = element

        page += 1

    pprint(results)

    with open("data.json", "w", encoding="utf-8") as f:
        json.dump(json.dumps(results), f, ensure_ascii=False, indent=4)


def plot():
    with open("data.json") as data:
        f = json.loads(json.load(data))

    diff1_time = []
    diff2_time = []
    diff3_time = []
    diff4_time = []
    diff1_diff = []
    diff2_diff = []
    diff3_diff = []
    diff4_diff = []

    print(type(f))
    for element in f:
        element = f[element]
        benches = element["benches"]
        for b in benches:
            duration = b["duration"]
            duration /= 10**3
            difficulty = b["difficulty"] / 10**7
            if b["difficulty"] == 14_760_000:
                diff1_time.append(duration)
                diff1_diff.append(difficulty)
            if b["difficulty"] == 1_069_993:
                diff2_time.append(duration)
                diff2_diff.append(difficulty)
            if b["difficulty"] == 4_150_002:
                diff3_time.append(duration)
                diff3_diff.append(difficulty)
            if b["difficulty"] == 6_550_004:
                diff4_time.append(duration)
                diff4_diff.append(difficulty)

    time = diff1_time + diff2_time + diff3_time + diff4_time
    diff = diff1_diff + diff2_diff + diff3_diff + diff4_diff
    print(len(f))

    for diff, arr in [
        (diff1_diff, diff1_time),
        (diff2_diff, diff2_time),
        (diff3_diff, diff3_time),
        (diff4_diff, diff4_time),
    ]:
        print(f"difficulty factor: {diff[0]}")
        print("50th percentile of time : ", np.percentile(arr, 50))
        print("25th percentile of time : ", np.percentile(arr, 25))
        print("75th percentile of time : ", np.percentile(arr, 75))
        print("90th percentile of time : ", np.percentile(arr, 90))
        print("99th percentile of time : ", np.percentile(arr, 99))
        print()

    plt.scatter(diff1_diff, diff1_time, c="green")
    plt.scatter(diff2_diff, diff2_time, c="red")
    plt.scatter(diff3_diff, diff3_time, c="blue")
    plt.scatter(diff4_diff, diff4_time, c="orange")

    plt.xlabel("Difficulty Factor (1e7)")
    plt.ylabel("Time (s)")
    plt.show()


def write_csv():
    with open("data.json") as data:
        f = json.loads(json.load(data))

    rows = []

    for element in f:
        element = f[element]
        diff1 = 0
        diff2 = 0
        diff3 = 0
        diff4 = 0
        benches = element["benches"]
        for b in benches:
            duration = b["duration"]
            if b["difficulty"] == 1_069_993:
                diff1 = duration
            if b["difficulty"] == 4_150_002:
                diff2 = duration
            if b["difficulty"] == 6_550_004:
                diff3 = duration
            if b["difficulty"] == 14_760_000:
                diff4 = duration
        device_software_recognised = element["device_software_recognised"]
        device_software_recognised = format(f'"{device_software_recognised}"')
        row = [
            element["id"],
            element["user"]["id"],
            element["device_user_provided"],
            device_software_recognised,
            element["threads"],
            element["submitted_at"],
            element["submission_type"],
            diff1,
            diff2,
            diff3,
            diff4,
        ]

        print(row[5])
        rows.append(row)

    filename = "survey-export.csv"
    with open(filename, "w") as csvfile:
        fields = [
            "ID",
            "user",
            "device_user_provided",
            "device_software_recognised",
            "threads",
            "submitted_at",
            "submission_type",
            "Difficulty 1069993",
            "Difficulty 4150002",
            "Difficulty 6550004",
            "Difficulty 14760000",
        ]

        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(fields)
        csvwriter.writerows(rows)


write_csv()