import-with-gh-duckdb.md

#!/bin/bash

$ gh api -H "Accept: application/vnd.github+json" -H "X-GitHub-Api-Version: 2022-11-28" "/repos/pypi/support/issues?state=all" --paginate >pypi_issues_gh.json
$ duckdb pypi_issues.db
v1.0.0 1f98600c2c
Enter ".help" for usage hints.
D .read import-json.sql
D .tables
pypi_issues
D select count(id) from pypi_issues;
┌───────────┐
│ count(id) │
│   int64   │
├───────────┤
│      4143 │
└───────────┘
D SELECT id, count(id) AS nb FROM pypi_issues GROUP BY id HAVING nb > 1;
┌───────┬───────┐
│  id   │  nb   │
│ int64 │ int64 │
├───────┴───────┤
│    0 rows     │
└───────────────┘
D
#!/bin/bash

duckdb pypi_issues.db -noheader -csv -c "SELECT number FROM pypi_issues WHERE state='closed'" | while read inb
do
    echo "Get $inb issues events";
    gh api \
        -H "Accept: application/vnd.github+json" \
        -H "X-GitHub-Api-Version: 2022-11-28" \
        "/repos/pypi/support/issues/${inb}/events" \
        --paginate >"events/pypi_issue_${inb}.json"
    sleep 0.2;
done
< pypi_issue_3835.json jq '.[] | select(.event == "closed")'
#!/bin/bash

duckdb pypi_issues.db -noheader -csv -c "SELECT number FROM pypi_issues WHERE state='closed'" | while read inb
do
    echo "Filtering closed event in events/pypi_issue_${inb}.json"
    jq '.[] | select(.event == "closed")' "events/pypi_issue_${inb}.json" > "events_closed/pypi_issue_${inb}_closed.json"
done
#!/bin/bash

duckdb pypi_issues.db -noheader -csv -c "SELECT number FROM pypi_issues WHERE state='closed'" | while read inb
do
    echo "Filtering closed event in events/pypi_issue_${inb}.json"
    #         format = 'array', \
    query="INSERT INTO pypi_issues_events ( \
            SELECT \
              ${inb}, \
              id, \
              node_id, \
              actor.id, \
              actor.login, \
              actor.node_id, \
              actor.type, \
              actor.site_admin, \
              event, \
              created_at, \
              state_reason \
            FROM read_json( \
              'events_closed/pypi_issue_${inb}_closed.json', \
              columns = { \
                'id': 'BIGINT', \
                'node_id': 'VARCHAR', \
                'actor': 'STRUCT(login VARCHAR, id BIGINT, node_id VARCHAR, type VARCHAR, site_admin VARCHAR)', \
                'state': 'VARCHAR', \
                'event': 'VARCHAR', \
                'created_at': 'TIMESTAMP', \
                'state_reason': 'VARCHAR' \
              } \
            ) \
          );"

    # echo $query
    duckdb pypi_issues.db -noheader -csv -c "${query}"
done
$ duckdb pypi_issues.db
v1.0.0 1f98600c2c
Enter ".help" for usage hints.
D select count(event_id) from pypi_issues_events;
┌─────────────────┐
│ count(event_id) │
│      int64      │
├─────────────────┤
│            3365 │
└─────────────────┘
D select state, count(id) as nb from pypi_issues group by state;
┌─────────┬───────┐
│  state  │  nb   │
│ varchar │ int64 │
├─────────┼───────┤
│ closed  │  3320 │
│ open    │   823 │
└─────────┴───────┘