From 6b7297545b71fda63aff0a74e680660e8c22222b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Asbj=C3=B8rn=20Sloth=20T=C3=B8nnesen?= Date: Fri, 6 Sep 2019 13:43:35 +0000 Subject: initial commit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Asbjørn Sloth Tønnesen --- .gitignore | 3 ++ LICENSE | 22 ++++++++ README.md | 60 ++++++++++++++++++++++ config.py.sample | 8 +++ schema.sql | 115 ++++++++++++++++++++++++++++++++++++++++++ sync.py | 150 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 358 insertions(+) create mode 100644 .gitignore create mode 100644 LICENSE create mode 100644 README.md create mode 100644 config.py.sample create mode 100644 schema.sql create mode 100755 sync.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b3f4a98 --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +__pycache__ +.*.sw? +config.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..7ef5413 --- /dev/null +++ b/LICENSE @@ -0,0 +1,22 @@ +Copyright 2019 Fiberby + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR +IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES +OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, +INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..814d561 --- /dev/null +++ b/README.md @@ -0,0 +1,60 @@ +peeringdb-simplesync - Minimalistic PeeringDB synchronization tool +================================================================== + +A simple tool for keeping a local PostgreSQL database up-to-date with PeeringDB. + +Requirements +------------ + +* PostgreSQL 9.5+ +* Python 3.6+ +* psycopg2 2.7+ +* python-requests 2+ + + +Installation +----- + +```sh +sudo apt install git python3 python3-requests python3-psycopg2 + +git clone https://git.2e8.dk/peeringdb-simplesync +cd peeringdb-simplesync + +cp config.py.sample config.py +editor config.py # change credentials + +createdb pdbtest +psql pdbtest < schema.sql +``` + +Usage +----- + +```sh +./sync.py # initial sync takes about 10 minutes +``` + +Then try the following query with `psql pdbtest`: + +```sql +SELECT + asn, + data->>'irr_as_set', + (data->>'info_prefixes4')::int pfx4, + (data->>'info_prefixes6')::int pfx6 +FROM peeringdb.net ORDER BY asn; +``` + +Note: if you add an `info_prefixes4` column to the schema, +then it will automatically be used, just like the `asn` column. + +Known issues +------------ + +* Objects are not deleted when they are deleted from PeeringDB. + +Disclaimer +---------- + +This tool is not affiliated with PeeringDB. diff --git a/config.py.sample b/config.py.sample new file mode 100644 index 0000000..d85e4ef --- /dev/null +++ b/config.py.sample @@ -0,0 +1,8 @@ +from requests.auth import HTTPBasicAuth + +def get_config(): + return { + 'db_conn_str': 'dbname=pdbtest', + 'db_schema': 'peeringdb', + 'auth': HTTPBasicAuth('use_a_dedicated_peeringdb_account', 'and_a_long_random_password'), + } diff --git a/schema.sql b/schema.sql new file mode 100644 index 0000000..593f4c2 --- /dev/null +++ b/schema.sql @@ -0,0 +1,115 @@ +create schema peeringdb; + +create table peeringdb.org ( + id int not null, + status text not null, + data jsonb not null, + created timestamptz not null, + updated timestamptz not null, + deleted timestamptz, + primary key (id) +); + +create table peeringdb.net ( + id int not null, + org_id int not null, + asn int not null, + status text not null, + data jsonb not null, + created timestamptz not null, + updated timestamptz not null, + deleted timestamptz, + primary key (id) +); + +create table peeringdb.ix ( + id int not null, + org_id int not null, + status text not null, + data jsonb not null, + created timestamptz not null, + updated timestamptz not null, + deleted timestamptz, + primary key (id) +); + +create table peeringdb.fac ( + id int not null, + org_id int not null, + status text not null, + data jsonb not null, + created timestamptz not null, + updated timestamptz not null, + deleted timestamptz, + primary key (id) +); + +create table peeringdb.poc ( + id int not null, + net_id int not null, + status text not null, + data jsonb not null, + created timestamptz not null, + updated timestamptz not null, + deleted timestamptz, + primary key (id) +); + +create table peeringdb.ixlan ( + id int not null, + ix_id int not null, + status text not null, + data jsonb not null, + created timestamptz not null, + updated timestamptz not null, + deleted timestamptz, + primary key (id) +); + +create table peeringdb.ixpfx ( + id int not null, + ixlan_id int not null, + status text not null, + data jsonb not null, + created timestamptz not null, + updated timestamptz not null, + deleted timestamptz, + primary key (id) +); + +create table peeringdb.ixfac ( + id int not null, + ix_id int not null, + fac_id int not null, + status text not null, + data jsonb not null, + created timestamptz not null, + updated timestamptz not null, + deleted timestamptz, + primary key (id) +); + +create table peeringdb.netfac ( + id int not null, + net_id int not null, + fac_id int not null, + status text not null, + data jsonb not null, + created timestamptz not null, + updated timestamptz not null, + deleted timestamptz, + primary key (id) +); + +create table peeringdb.netixlan ( + id int not null, + net_id int not null, + ix_id int not null, + ixlan_id int not null, + status text not null, + data jsonb not null, + created timestamptz not null, + updated timestamptz not null, + deleted timestamptz, + primary key (id) +); diff --git a/sync.py b/sync.py new file mode 100755 index 0000000..340b6be --- /dev/null +++ b/sync.py @@ -0,0 +1,150 @@ +#!/usr/bin/env python3 + +import psycopg2 +from psycopg2 import sql +psycopg2.extensions.register_type(psycopg2.extensions.UNICODE) + +import requests +from requests.adapters import HTTPAdapter +from requests.packages.urllib3.util.retry import Retry + +import json + +import time + +import logging +from config import get_config +schema_name = get_config()['db_schema'] + +#logging.basicConfig(level=logging.DEBUG) + +def open_db(): + global conn + db_conn_str = get_config()['db_conn_str'] + conn = psycopg2.connect(db_conn_str) + +column_cache = {} +def get_columns(table_name): + global column_cache + if table_name in column_cache: + return column_cache[table_name] + sql_str = """ + SELECT column_name FROM information_schema.columns + WHERE table_schema = %s AND table_name = %s + """ + cur = conn.cursor() + cur.execute(sql_str, (schema_name, table_name)) + rows = cur.fetchall() + ret = [row[0] for row in rows] + column_cache[table_name] = ret + return ret + +def test_table(table_name): + cols = get_columns(table_name) + try: + assert('id' in cols) + assert(cols[0] == 'id') + assert('updated' in cols) + assert('data' in cols) + except AssertionError: + print(f'Invalid or missing table schema for {table_name:s}') + raise + +def gen_sql(table_name, cols): + updates = [sql.Identifier(col) + sql.SQL(' = EXCLUDED.') + sql.Identifier(col) for col in cols[1:]] + comp = sql.SQL(""" + INSERT INTO {}.{} + ({}) + VALUES ({}) + ON CONFLICT ({}) + DO UPDATE SET {}; + """).format(sql.Identifier(schema_name), + sql.Identifier(table_name), + sql.SQL(', ').join(map(sql.Identifier, cols)), + sql.SQL(', ').join(map(sql.Placeholder, cols)), + sql.Identifier(cols[0]), + sql.SQL(', ').join(updates)) + return comp + +def update_object(kind, obj): + cols = get_columns(kind) + comp = gen_sql(kind, cols) + t = {} + for key in cols: + if key in obj: + t[key] = obj[key] + else: + t[key] = None + t['data'] = json.dumps(obj) + cur = conn.cursor() + cur.execute(comp, t) + +def last_updated(kind): + comp = sql.SQL('SELECT EXTRACT(EPOCH FROM MAX(updated)) - 1 FROM {}.{};' + ).format(sql.Identifier(schema_name), sql.Identifier(kind)) + cur = conn.cursor() + cur.execute(comp) + last = cur.fetchone()[0] + if last is None: return None + return int(last) + +def fetch_objects(s, kind, params): + endpoint = f'https://peeringdb.com/api/{kind:s}' + r = s.get(endpoint, params=params ) + objs = json.loads(r.text)['data'] + for obj in objs: + update_object(kind, obj) + return len(objs) + +def initial_sync(s, kind): + initial_step = 100 + step = initial_step + lows = 0 + low = 0 + while lows < 5: + high = low + step + 1 + n = fetch_objects(s, kind, { 'id__gt': low, 'id__lt': high, 'depth': 0 } ) + low += step + if n > 0: + step = initial_step + lows = 0 + else: + step *= 2 + lows += 1 + fetch_objects(s, kind, { 'id__gt': low, 'depth': 0 } ) + +def sync_table(s, kind): + test_table(kind) + endpoint = f'https://peeringdb.com/api/{kind:s}' + last = last_updated(kind) + if last is None: + last = int(time.time()) - 3600 + initial_sync(s, kind) + fetch_objects(s, kind, { 'since': last, 'depth': 0 } ) + +def main(): + open_db() + + s = requests.Session() + retries = Retry(total=5, backoff_factor=1, status_forcelist=[ 502, 503, 504 ]) + s.mount('https://', HTTPAdapter(max_retries=retries)) + s.auth = get_config()['auth'] + + req_agent = s.headers.get('User-Agent') + s.headers.update({'User-Agent': f'peeringdb-simplesync/0.1 {req_agent:s}'}) + r = s.get('https://peeringdb.com/apidocs/') + s.headers.update({'Accept': 'application/json'}) + + ignored = [ 'as_set' ] + + apidoc = json.loads(r.text) + for key in apidoc: + if key[0] == '_' or key in ignored: continue + try: + sync_table(s, key) + except AssertionError: + print(f'skipping {key:s}...') + + conn.commit() + +main() -- cgit v1.2.1