import os import csv import json from pprint import pprint import numpy as np import requests import matplotlib.pyplot as plt def write_data_file(): cookie = {"survey-admin-auth": os.environ["SURVEY_ADMIN_AUTH"]} page = 0 results = {} while True: resp = requests.get( f"https://survey.mcaptcha.org/admin/api/v1/campaign/b81b9214-aa60-4c1b-9ee4-29d842e3039f/results?page={page}", cookies=cookie, ) print(page) data = resp.json() if len(data) == 0: break for element in data: if element["id"] not in results: results[element["id"]] = element page += 1 pprint(results) with open("data.json", "w", encoding="utf-8") as f: json.dump(json.dumps(results), f, ensure_ascii=False, indent=4) def plot(): with open("data.json") as data: f = json.loads(json.load(data)) diff1_time = [] diff2_time = [] diff3_time = [] diff4_time = [] diff1_diff = [] diff2_diff = [] diff3_diff = [] diff4_diff = [] print(type(f)) for element in f: element = f[element] benches = element["benches"] for b in benches: duration = b["duration"] duration /= 10**3 difficulty = b["difficulty"] / 10**7 if b["difficulty"] == 14_760_000: diff1_time.append(duration) diff1_diff.append(difficulty) if b["difficulty"] == 1_069_993: diff2_time.append(duration) diff2_diff.append(difficulty) if b["difficulty"] == 4_150_002: diff3_time.append(duration) diff3_diff.append(difficulty) if b["difficulty"] == 6_550_004: diff4_time.append(duration) diff4_diff.append(difficulty) time = diff1_time + diff2_time + diff3_time + diff4_time diff = diff1_diff + diff2_diff + diff3_diff + diff4_diff print(len(f)) for diff, arr in [ (diff1_diff, diff1_time), (diff2_diff, diff2_time), (diff3_diff, diff3_time), (diff4_diff, diff4_time), ]: print(f"difficulty factor: {diff[0]}") print("50th percentile of time : ", np.percentile(arr, 50)) print("25th percentile of time : ", np.percentile(arr, 25)) print("75th percentile of time : ", np.percentile(arr, 75)) print("90th percentile of time : ", np.percentile(arr, 90)) print("99th percentile of time : ", np.percentile(arr, 99)) print() plt.scatter(diff1_diff, diff1_time, c="green") plt.scatter(diff2_diff, diff2_time, c="red") plt.scatter(diff3_diff, diff3_time, c="blue") plt.scatter(diff4_diff, diff4_time, c="orange") plt.xlabel("Difficulty Factor (1e7)") plt.ylabel("Time (s)") plt.show() def write_csv(): with open("data.json") as data: f = json.loads(json.load(data)) rows = [] for element in f: element = f[element] diff1 = 0 diff2 = 0 diff3 = 0 diff4 = 0 benches = element["benches"] for b in benches: duration = b["duration"] if b["difficulty"] == 1_069_993: diff1 = duration if b["difficulty"] == 4_150_002: diff2 = duration if b["difficulty"] == 6_550_004: diff3 = duration if b["difficulty"] == 14_760_000: diff4 = duration device_software_recognised = element["device_software_recognised"] device_software_recognised = format(f'"{device_software_recognised}"') row = [ element["id"], element["user"]["id"], element["device_user_provided"], device_software_recognised, element["threads"], element["submitted_at"], element["submission_type"], diff1, diff2, diff3, diff4, ] print(row[5]) rows.append(row) filename = "survey-export.csv" with open(filename, "w") as csvfile: fields = [ "ID", "user", "device_user_provided", "device_software_recognised", "threads", "submitted_at", "submission_type", "Difficulty 1069993", "Difficulty 4150002", "Difficulty 6550004", "Difficulty 14760000", ] csvwriter = csv.writer(csvfile) csvwriter.writerow(fields) csvwriter.writerows(rows) write_csv()