powd/survey.py

import os
import csv
import json
from pprint import pprint
import numpy as np
import requests
import matplotlib.pyplot as plt


def write_data_file():
    cookie = {"survey-admin-auth": os.environ["SURVEY_ADMIN_AUTH"]}

    page = 0
    results = {}

    while True:
        resp = requests.get(
            f"https://survey.mcaptcha.org/admin/api/v1/campaign/b81b9214-aa60-4c1b-9ee4-29d842e3039f/results?page={page}",
            cookies=cookie,
        )
        print(page)
        data = resp.json()
        if len(data) == 0:
            break
        for element in data:
            if element["id"] not in results:
                results[element["id"]] = element

        page += 1

    pprint(results)

    with open("data.json", "w", encoding="utf-8") as f:
        json.dump(json.dumps(results), f, ensure_ascii=False, indent=4)


def plot():
    with open("data.json") as data:
        f = json.loads(json.load(data))

    diff1_time = []
    diff2_time = []
    diff3_time = []
    diff4_time = []
    diff1_diff = []
    diff2_diff = []
    diff3_diff = []
    diff4_diff = []

    print(type(f))
    for element in f:
        element = f[element]
        benches = element["benches"]
        for b in benches:
            duration = b["duration"]
            duration /= 10**3
            difficulty = b["difficulty"] / 10**7
            if b["difficulty"] == 14_760_000:
                diff1_time.append(duration)
                diff1_diff.append(difficulty)
            if b["difficulty"] == 1_069_993:
                diff2_time.append(duration)
                diff2_diff.append(difficulty)
            if b["difficulty"] == 4_150_002:
                diff3_time.append(duration)
                diff3_diff.append(difficulty)
            if b["difficulty"] == 6_550_004:
                diff4_time.append(duration)
                diff4_diff.append(difficulty)

    time = diff1_time + diff2_time + diff3_time + diff4_time
    diff = diff1_diff + diff2_diff + diff3_diff + diff4_diff
    print(len(f))

    for diff, arr in [
        (diff1_diff, diff1_time),
        (diff2_diff, diff2_time),
        (diff3_diff, diff3_time),
        (diff4_diff, diff4_time),
    ]:
        print(f"difficulty factor: {diff[0]}")
        print("50th percentile of time : ", np.percentile(arr, 50))
        print("25th percentile of time : ", np.percentile(arr, 25))
        print("75th percentile of time : ", np.percentile(arr, 75))
        print("90th percentile of time : ", np.percentile(arr, 90))
        print("99th percentile of time : ", np.percentile(arr, 99))
        print()

    plt.scatter(diff1_diff, diff1_time, c="green")
    plt.scatter(diff2_diff, diff2_time, c="red")
    plt.scatter(diff3_diff, diff3_time, c="blue")
    plt.scatter(diff4_diff, diff4_time, c="orange")

    plt.xlabel("Difficulty Factor (1e7)")
    plt.ylabel("Time (s)")
    plt.show()


def write_csv():
    with open("data.json") as data:
        f = json.loads(json.load(data))

    rows = []

    for element in f:
        element = f[element]
        diff1 = 0
        diff2 = 0
        diff3 = 0
        diff4 = 0
        benches = element["benches"]
        for b in benches:
            duration = b["duration"]
            if b["difficulty"] == 1_069_993:
                diff1 = duration
            if b["difficulty"] == 4_150_002:
                diff2 = duration
            if b["difficulty"] == 6_550_004:
                diff3 = duration
            if b["difficulty"] == 14_760_000:
                diff4 = duration
        device_software_recognised = element["device_software_recognised"]
        device_software_recognised = format(f'"{device_software_recognised}"')
        row = [
            element["id"],
            element["user"]["id"],
            element["device_user_provided"],
            device_software_recognised,
            element["threads"],
            element["submitted_at"],
            element["submission_type"],
            diff1,
            diff2,
            diff3,
            diff4,
        ]

        print(row[5])
        rows.append(row)

    filename = "survey-export.csv"
    with open(filename, "w") as csvfile:
        fields = [
            "ID",
            "user",
            "device_user_provided",
            "device_software_recognised",
            "threads",
            "submitted_at",
            "submission_type",
            "Difficulty 1069993",
            "Difficulty 4150002",
            "Difficulty 6550004",
            "Difficulty 14760000",
        ]

        csvwriter = csv.writer(csvfile)
        csvwriter.writerow(fields)
        csvwriter.writerows(rows)


write_csv()
feat: download survey results, plot graph and export to csv 2023-08-12 17:01:09 +05:30			`import os`
			`import csv`
			`import json`
			`from pprint import pprint`
			`import numpy as np`
			`import requests`
			`import matplotlib.pyplot as plt`


			`def write_data_file():`
			`cookie = {"survey-admin-auth": os.environ["SURVEY_ADMIN_AUTH"]}`

			`page = 0`
			`results = {}`

			`while True:`
			`resp = requests.get(`
			`f"https://survey.mcaptcha.org/admin/api/v1/campaign/b81b9214-aa60-4c1b-9ee4-29d842e3039f/results?page={page}",`
			`cookies=cookie,`
			`)`
			`print(page)`
			`data = resp.json()`
			`if len(data) == 0:`
			`break`
			`for element in data:`
			`if element["id"] not in results:`
			`results[element["id"]] = element`

			`page += 1`

			`pprint(results)`

			`with open("data.json", "w", encoding="utf-8") as f:`
			`json.dump(json.dumps(results), f, ensure_ascii=False, indent=4)`


			`def plot():`
			`with open("data.json") as data:`
			`f = json.loads(json.load(data))`

			`diff1_time = []`
			`diff2_time = []`
			`diff3_time = []`
			`diff4_time = []`
			`diff1_diff = []`
			`diff2_diff = []`
			`diff3_diff = []`
			`diff4_diff = []`

			`print(type(f))`
			`for element in f:`
			`element = f[element]`
			`benches = element["benches"]`
			`for b in benches:`
			`duration = b["duration"]`
			`duration /= 10**3`
			`difficulty = b["difficulty"] / 10**7`
			`if b["difficulty"] == 14_760_000:`
			`diff1_time.append(duration)`
			`diff1_diff.append(difficulty)`
			`if b["difficulty"] == 1_069_993:`
			`diff2_time.append(duration)`
			`diff2_diff.append(difficulty)`
			`if b["difficulty"] == 4_150_002:`
			`diff3_time.append(duration)`
			`diff3_diff.append(difficulty)`
			`if b["difficulty"] == 6_550_004:`
			`diff4_time.append(duration)`
			`diff4_diff.append(difficulty)`

			`time = diff1_time + diff2_time + diff3_time + diff4_time`
			`diff = diff1_diff + diff2_diff + diff3_diff + diff4_diff`
			`print(len(f))`

			`for diff, arr in [`
			`(diff1_diff, diff1_time),`
			`(diff2_diff, diff2_time),`
			`(diff3_diff, diff3_time),`
			`(diff4_diff, diff4_time),`
			`]:`
			`print(f"difficulty factor: {diff[0]}")`
			`print("50th percentile of time : ", np.percentile(arr, 50))`
			`print("25th percentile of time : ", np.percentile(arr, 25))`
			`print("75th percentile of time : ", np.percentile(arr, 75))`
			`print("90th percentile of time : ", np.percentile(arr, 90))`
			`print("99th percentile of time : ", np.percentile(arr, 99))`
			`print()`

			`plt.scatter(diff1_diff, diff1_time, c="green")`
			`plt.scatter(diff2_diff, diff2_time, c="red")`
			`plt.scatter(diff3_diff, diff3_time, c="blue")`
			`plt.scatter(diff4_diff, diff4_time, c="orange")`

			`plt.xlabel("Difficulty Factor (1e7)")`
			`plt.ylabel("Time (s)")`
			`plt.show()`


			`def write_csv():`
			`with open("data.json") as data:`
			`f = json.loads(json.load(data))`

			`rows = []`

			`for element in f:`
			`element = f[element]`
			`diff1 = 0`
			`diff2 = 0`
			`diff3 = 0`
			`diff4 = 0`
			`benches = element["benches"]`
			`for b in benches:`
			`duration = b["duration"]`
			`if b["difficulty"] == 1_069_993:`
			`diff1 = duration`
			`if b["difficulty"] == 4_150_002:`
			`diff2 = duration`
			`if b["difficulty"] == 6_550_004:`
			`diff3 = duration`
			`if b["difficulty"] == 14_760_000:`
			`diff4 = duration`
			`device_software_recognised = element["device_software_recognised"]`
			`device_software_recognised = format(f'"{device_software_recognised}"')`
			`row = [`
			`element["id"],`
			`element["user"]["id"],`
			`element["device_user_provided"],`
			`device_software_recognised,`
			`element["threads"],`
			`element["submitted_at"],`
			`element["submission_type"],`
			`diff1,`
			`diff2,`
			`diff3,`
			`diff4,`
			`]`

			`print(row[5])`
			`rows.append(row)`

			`filename = "survey-export.csv"`
			`with open(filename, "w") as csvfile:`
			`fields = [`
			`"ID",`
			`"user",`
			`"device_user_provided",`
			`"device_software_recognised",`
			`"threads",`
			`"submitted_at",`
			`"submission_type",`
			`"Difficulty 1069993",`
			`"Difficulty 4150002",`
			`"Difficulty 6550004",`
			`"Difficulty 14760000",`
			`]`

			`csvwriter = csv.writer(csvfile)`
			`csvwriter.writerow(fields)`
			`csvwriter.writerows(rows)`


			`write_csv()`