feat: download survey results, plot graph and export to csv

This commit is contained in:
Aravinth Manivannan 2023-08-12 17:01:09 +05:30
parent e61a4e0a17
commit bbd913dff8
Signed by: realaravinth
GPG Key ID: AD9F0F08E855ED88
1 changed files with 162 additions and 0 deletions

162
survey.py Normal file
View File

@ -0,0 +1,162 @@
import os
import csv
import json
from pprint import pprint
import numpy as np
import requests
import matplotlib.pyplot as plt
def write_data_file():
cookie = {"survey-admin-auth": os.environ["SURVEY_ADMIN_AUTH"]}
page = 0
results = {}
while True:
resp = requests.get(
f"https://survey.mcaptcha.org/admin/api/v1/campaign/b81b9214-aa60-4c1b-9ee4-29d842e3039f/results?page={page}",
cookies=cookie,
)
print(page)
data = resp.json()
if len(data) == 0:
break
for element in data:
if element["id"] not in results:
results[element["id"]] = element
page += 1
pprint(results)
with open("data.json", "w", encoding="utf-8") as f:
json.dump(json.dumps(results), f, ensure_ascii=False, indent=4)
def plot():
with open("data.json") as data:
f = json.loads(json.load(data))
diff1_time = []
diff2_time = []
diff3_time = []
diff4_time = []
diff1_diff = []
diff2_diff = []
diff3_diff = []
diff4_diff = []
print(type(f))
for element in f:
element = f[element]
benches = element["benches"]
for b in benches:
duration = b["duration"]
duration /= 10**3
difficulty = b["difficulty"] / 10**7
if b["difficulty"] == 14_760_000:
diff1_time.append(duration)
diff1_diff.append(difficulty)
if b["difficulty"] == 1_069_993:
diff2_time.append(duration)
diff2_diff.append(difficulty)
if b["difficulty"] == 4_150_002:
diff3_time.append(duration)
diff3_diff.append(difficulty)
if b["difficulty"] == 6_550_004:
diff4_time.append(duration)
diff4_diff.append(difficulty)
time = diff1_time + diff2_time + diff3_time + diff4_time
diff = diff1_diff + diff2_diff + diff3_diff + diff4_diff
print(len(f))
for diff, arr in [
(diff1_diff, diff1_time),
(diff2_diff, diff2_time),
(diff3_diff, diff3_time),
(diff4_diff, diff4_time),
]:
print(f"difficulty factor: {diff[0]}")
print("50th percentile of time : ", np.percentile(arr, 50))
print("25th percentile of time : ", np.percentile(arr, 25))
print("75th percentile of time : ", np.percentile(arr, 75))
print("90th percentile of time : ", np.percentile(arr, 90))
print("99th percentile of time : ", np.percentile(arr, 99))
print()
plt.scatter(diff1_diff, diff1_time, c="green")
plt.scatter(diff2_diff, diff2_time, c="red")
plt.scatter(diff3_diff, diff3_time, c="blue")
plt.scatter(diff4_diff, diff4_time, c="orange")
plt.xlabel("Difficulty Factor (1e7)")
plt.ylabel("Time (s)")
plt.show()
def write_csv():
with open("data.json") as data:
f = json.loads(json.load(data))
rows = []
for element in f:
element = f[element]
diff1 = 0
diff2 = 0
diff3 = 0
diff4 = 0
benches = element["benches"]
for b in benches:
duration = b["duration"]
if b["difficulty"] == 1_069_993:
diff1 = duration
if b["difficulty"] == 4_150_002:
diff2 = duration
if b["difficulty"] == 6_550_004:
diff3 = duration
if b["difficulty"] == 14_760_000:
diff4 = duration
device_software_recognised = element["device_software_recognised"]
device_software_recognised = format(f'"{device_software_recognised}"')
row = [
element["id"],
element["user"]["id"],
element["device_user_provided"],
device_software_recognised,
element["threads"],
element["submitted_at"],
element["submission_type"],
diff1,
diff2,
diff3,
diff4,
]
print(row[5])
rows.append(row)
filename = "survey-export.csv"
with open(filename, "w") as csvfile:
fields = [
"ID",
"user",
"device_user_provided",
"device_software_recognised",
"threads",
"submitted_at",
"submission_type",
"Difficulty 1069993",
"Difficulty 4150002",
"Difficulty 6550004",
"Difficulty 14760000",
]
csvwriter = csv.writer(csvfile)
csvwriter.writerow(fields)
csvwriter.writerows(rows)
write_csv()