diff --git a/survey.py b/survey.py new file mode 100644 index 0000000..28d416e --- /dev/null +++ b/survey.py @@ -0,0 +1,162 @@ +import os +import csv +import json +from pprint import pprint +import numpy as np +import requests +import matplotlib.pyplot as plt + + +def write_data_file(): + cookie = {"survey-admin-auth": os.environ["SURVEY_ADMIN_AUTH"]} + + page = 0 + results = {} + + while True: + resp = requests.get( + f"https://survey.mcaptcha.org/admin/api/v1/campaign/b81b9214-aa60-4c1b-9ee4-29d842e3039f/results?page={page}", + cookies=cookie, + ) + print(page) + data = resp.json() + if len(data) == 0: + break + for element in data: + if element["id"] not in results: + results[element["id"]] = element + + page += 1 + + pprint(results) + + with open("data.json", "w", encoding="utf-8") as f: + json.dump(json.dumps(results), f, ensure_ascii=False, indent=4) + + +def plot(): + with open("data.json") as data: + f = json.loads(json.load(data)) + + diff1_time = [] + diff2_time = [] + diff3_time = [] + diff4_time = [] + diff1_diff = [] + diff2_diff = [] + diff3_diff = [] + diff4_diff = [] + + print(type(f)) + for element in f: + element = f[element] + benches = element["benches"] + for b in benches: + duration = b["duration"] + duration /= 10**3 + difficulty = b["difficulty"] / 10**7 + if b["difficulty"] == 14_760_000: + diff1_time.append(duration) + diff1_diff.append(difficulty) + if b["difficulty"] == 1_069_993: + diff2_time.append(duration) + diff2_diff.append(difficulty) + if b["difficulty"] == 4_150_002: + diff3_time.append(duration) + diff3_diff.append(difficulty) + if b["difficulty"] == 6_550_004: + diff4_time.append(duration) + diff4_diff.append(difficulty) + + time = diff1_time + diff2_time + diff3_time + diff4_time + diff = diff1_diff + diff2_diff + diff3_diff + diff4_diff + print(len(f)) + + for diff, arr in [ + (diff1_diff, diff1_time), + (diff2_diff, diff2_time), + (diff3_diff, diff3_time), + (diff4_diff, diff4_time), + ]: + print(f"difficulty factor: {diff[0]}") + print("50th percentile of time : ", np.percentile(arr, 50)) + print("25th percentile of time : ", np.percentile(arr, 25)) + print("75th percentile of time : ", np.percentile(arr, 75)) + print("90th percentile of time : ", np.percentile(arr, 90)) + print("99th percentile of time : ", np.percentile(arr, 99)) + print() + + plt.scatter(diff1_diff, diff1_time, c="green") + plt.scatter(diff2_diff, diff2_time, c="red") + plt.scatter(diff3_diff, diff3_time, c="blue") + plt.scatter(diff4_diff, diff4_time, c="orange") + + plt.xlabel("Difficulty Factor (1e7)") + plt.ylabel("Time (s)") + plt.show() + + +def write_csv(): + with open("data.json") as data: + f = json.loads(json.load(data)) + + rows = [] + + for element in f: + element = f[element] + diff1 = 0 + diff2 = 0 + diff3 = 0 + diff4 = 0 + benches = element["benches"] + for b in benches: + duration = b["duration"] + if b["difficulty"] == 1_069_993: + diff1 = duration + if b["difficulty"] == 4_150_002: + diff2 = duration + if b["difficulty"] == 6_550_004: + diff3 = duration + if b["difficulty"] == 14_760_000: + diff4 = duration + device_software_recognised = element["device_software_recognised"] + device_software_recognised = format(f'"{device_software_recognised}"') + row = [ + element["id"], + element["user"]["id"], + element["device_user_provided"], + device_software_recognised, + element["threads"], + element["submitted_at"], + element["submission_type"], + diff1, + diff2, + diff3, + diff4, + ] + + print(row[5]) + rows.append(row) + + filename = "survey-export.csv" + with open(filename, "w") as csvfile: + fields = [ + "ID", + "user", + "device_user_provided", + "device_software_recognised", + "threads", + "submitted_at", + "submission_type", + "Difficulty 1069993", + "Difficulty 4150002", + "Difficulty 6550004", + "Difficulty 14760000", + ] + + csvwriter = csv.writer(csvfile) + csvwriter.writerow(fields) + csvwriter.writerows(rows) + + +write_csv()