aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--.gitignore3
-rw-r--r--LICENSE22
-rw-r--r--README.md60
-rw-r--r--config.py.sample8
-rw-r--r--schema.sql115
-rwxr-xr-xsync.py150
6 files changed, 358 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..b3f4a98
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,3 @@
+__pycache__
+.*.sw?
+config.py
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..7ef5413
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,22 @@
+Copyright 2019 Fiberby
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..814d561
--- /dev/null
+++ b/README.md
@@ -0,0 +1,60 @@
+peeringdb-simplesync - Minimalistic PeeringDB synchronization tool
+==================================================================
+
+A simple tool for keeping a local PostgreSQL database up-to-date with PeeringDB.
+
+Requirements
+------------
+
+* PostgreSQL 9.5+
+* Python 3.6+
+* psycopg2 2.7+
+* python-requests 2+
+
+
+Installation
+-----
+
+```sh
+sudo apt install git python3 python3-requests python3-psycopg2
+
+git clone https://git.2e8.dk/peeringdb-simplesync
+cd peeringdb-simplesync
+
+cp config.py.sample config.py
+editor config.py # change credentials
+
+createdb pdbtest
+psql pdbtest < schema.sql
+```
+
+Usage
+-----
+
+```sh
+./sync.py # initial sync takes about 10 minutes
+```
+
+Then try the following query with `psql pdbtest`:
+
+```sql
+SELECT
+ asn,
+ data->>'irr_as_set',
+ (data->>'info_prefixes4')::int pfx4,
+ (data->>'info_prefixes6')::int pfx6
+FROM peeringdb.net ORDER BY asn;
+```
+
+Note: if you add an `info_prefixes4` column to the schema,
+then it will automatically be used, just like the `asn` column.
+
+Known issues
+------------
+
+* Objects are not deleted when they are deleted from PeeringDB.
+
+Disclaimer
+----------
+
+This tool is not affiliated with PeeringDB.
diff --git a/config.py.sample b/config.py.sample
new file mode 100644
index 0000000..d85e4ef
--- /dev/null
+++ b/config.py.sample
@@ -0,0 +1,8 @@
+from requests.auth import HTTPBasicAuth
+
+def get_config():
+ return {
+ 'db_conn_str': 'dbname=pdbtest',
+ 'db_schema': 'peeringdb',
+ 'auth': HTTPBasicAuth('use_a_dedicated_peeringdb_account', 'and_a_long_random_password'),
+ }
diff --git a/schema.sql b/schema.sql
new file mode 100644
index 0000000..593f4c2
--- /dev/null
+++ b/schema.sql
@@ -0,0 +1,115 @@
+create schema peeringdb;
+
+create table peeringdb.org (
+ id int not null,
+ status text not null,
+ data jsonb not null,
+ created timestamptz not null,
+ updated timestamptz not null,
+ deleted timestamptz,
+ primary key (id)
+);
+
+create table peeringdb.net (
+ id int not null,
+ org_id int not null,
+ asn int not null,
+ status text not null,
+ data jsonb not null,
+ created timestamptz not null,
+ updated timestamptz not null,
+ deleted timestamptz,
+ primary key (id)
+);
+
+create table peeringdb.ix (
+ id int not null,
+ org_id int not null,
+ status text not null,
+ data jsonb not null,
+ created timestamptz not null,
+ updated timestamptz not null,
+ deleted timestamptz,
+ primary key (id)
+);
+
+create table peeringdb.fac (
+ id int not null,
+ org_id int not null,
+ status text not null,
+ data jsonb not null,
+ created timestamptz not null,
+ updated timestamptz not null,
+ deleted timestamptz,
+ primary key (id)
+);
+
+create table peeringdb.poc (
+ id int not null,
+ net_id int not null,
+ status text not null,
+ data jsonb not null,
+ created timestamptz not null,
+ updated timestamptz not null,
+ deleted timestamptz,
+ primary key (id)
+);
+
+create table peeringdb.ixlan (
+ id int not null,
+ ix_id int not null,
+ status text not null,
+ data jsonb not null,
+ created timestamptz not null,
+ updated timestamptz not null,
+ deleted timestamptz,
+ primary key (id)
+);
+
+create table peeringdb.ixpfx (
+ id int not null,
+ ixlan_id int not null,
+ status text not null,
+ data jsonb not null,
+ created timestamptz not null,
+ updated timestamptz not null,
+ deleted timestamptz,
+ primary key (id)
+);
+
+create table peeringdb.ixfac (
+ id int not null,
+ ix_id int not null,
+ fac_id int not null,
+ status text not null,
+ data jsonb not null,
+ created timestamptz not null,
+ updated timestamptz not null,
+ deleted timestamptz,
+ primary key (id)
+);
+
+create table peeringdb.netfac (
+ id int not null,
+ net_id int not null,
+ fac_id int not null,
+ status text not null,
+ data jsonb not null,
+ created timestamptz not null,
+ updated timestamptz not null,
+ deleted timestamptz,
+ primary key (id)
+);
+
+create table peeringdb.netixlan (
+ id int not null,
+ net_id int not null,
+ ix_id int not null,
+ ixlan_id int not null,
+ status text not null,
+ data jsonb not null,
+ created timestamptz not null,
+ updated timestamptz not null,
+ deleted timestamptz,
+ primary key (id)
+);
diff --git a/sync.py b/sync.py
new file mode 100755
index 0000000..340b6be
--- /dev/null
+++ b/sync.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python3
+
+import psycopg2
+from psycopg2 import sql
+psycopg2.extensions.register_type(psycopg2.extensions.UNICODE)
+
+import requests
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.retry import Retry
+
+import json
+
+import time
+
+import logging
+from config import get_config
+schema_name = get_config()['db_schema']
+
+#logging.basicConfig(level=logging.DEBUG)
+
+def open_db():
+ global conn
+ db_conn_str = get_config()['db_conn_str']
+ conn = psycopg2.connect(db_conn_str)
+
+column_cache = {}
+def get_columns(table_name):
+ global column_cache
+ if table_name in column_cache:
+ return column_cache[table_name]
+ sql_str = """
+ SELECT column_name FROM information_schema.columns
+ WHERE table_schema = %s AND table_name = %s
+ """
+ cur = conn.cursor()
+ cur.execute(sql_str, (schema_name, table_name))
+ rows = cur.fetchall()
+ ret = [row[0] for row in rows]
+ column_cache[table_name] = ret
+ return ret
+
+def test_table(table_name):
+ cols = get_columns(table_name)
+ try:
+ assert('id' in cols)
+ assert(cols[0] == 'id')
+ assert('updated' in cols)
+ assert('data' in cols)
+ except AssertionError:
+ print(f'Invalid or missing table schema for {table_name:s}')
+ raise
+
+def gen_sql(table_name, cols):
+ updates = [sql.Identifier(col) + sql.SQL(' = EXCLUDED.') + sql.Identifier(col) for col in cols[1:]]
+ comp = sql.SQL("""
+ INSERT INTO {}.{}
+ ({})
+ VALUES ({})
+ ON CONFLICT ({})
+ DO UPDATE SET {};
+ """).format(sql.Identifier(schema_name),
+ sql.Identifier(table_name),
+ sql.SQL(', ').join(map(sql.Identifier, cols)),
+ sql.SQL(', ').join(map(sql.Placeholder, cols)),
+ sql.Identifier(cols[0]),
+ sql.SQL(', ').join(updates))
+ return comp
+
+def update_object(kind, obj):
+ cols = get_columns(kind)
+ comp = gen_sql(kind, cols)
+ t = {}
+ for key in cols:
+ if key in obj:
+ t[key] = obj[key]
+ else:
+ t[key] = None
+ t['data'] = json.dumps(obj)
+ cur = conn.cursor()
+ cur.execute(comp, t)
+
+def last_updated(kind):
+ comp = sql.SQL('SELECT EXTRACT(EPOCH FROM MAX(updated)) - 1 FROM {}.{};'
+ ).format(sql.Identifier(schema_name), sql.Identifier(kind))
+ cur = conn.cursor()
+ cur.execute(comp)
+ last = cur.fetchone()[0]
+ if last is None: return None
+ return int(last)
+
+def fetch_objects(s, kind, params):
+ endpoint = f'https://peeringdb.com/api/{kind:s}'
+ r = s.get(endpoint, params=params )
+ objs = json.loads(r.text)['data']
+ for obj in objs:
+ update_object(kind, obj)
+ return len(objs)
+
+def initial_sync(s, kind):
+ initial_step = 100
+ step = initial_step
+ lows = 0
+ low = 0
+ while lows < 5:
+ high = low + step + 1
+ n = fetch_objects(s, kind, { 'id__gt': low, 'id__lt': high, 'depth': 0 } )
+ low += step
+ if n > 0:
+ step = initial_step
+ lows = 0
+ else:
+ step *= 2
+ lows += 1
+ fetch_objects(s, kind, { 'id__gt': low, 'depth': 0 } )
+
+def sync_table(s, kind):
+ test_table(kind)
+ endpoint = f'https://peeringdb.com/api/{kind:s}'
+ last = last_updated(kind)
+ if last is None:
+ last = int(time.time()) - 3600
+ initial_sync(s, kind)
+ fetch_objects(s, kind, { 'since': last, 'depth': 0 } )
+
+def main():
+ open_db()
+
+ s = requests.Session()
+ retries = Retry(total=5, backoff_factor=1, status_forcelist=[ 502, 503, 504 ])
+ s.mount('https://', HTTPAdapter(max_retries=retries))
+ s.auth = get_config()['auth']
+
+ req_agent = s.headers.get('User-Agent')
+ s.headers.update({'User-Agent': f'peeringdb-simplesync/0.1 {req_agent:s}'})
+ r = s.get('https://peeringdb.com/apidocs/')
+ s.headers.update({'Accept': 'application/json'})
+
+ ignored = [ 'as_set' ]
+
+ apidoc = json.loads(r.text)
+ for key in apidoc:
+ if key[0] == '_' or key in ignored: continue
+ try:
+ sync_table(s, key)
+ except AssertionError:
+ print(f'skipping {key:s}...')
+
+ conn.commit()
+
+main()