An attempt to teach Open Data members in the Government of Ontario Open Data initiative the use of Cassandra, Time Series DB and Kairos DB specifically. This POC was completed in python and is open sourced on my github.
13. @
Keys in C*
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
1. First part of composite key [inside the inner brackets] is called
“Partition Key”, rest [no inside the inner brackets] are “Cluster
Keys”.
2. Cassandra stores columns differently when composite keys
are used. Partition key becomes row key. Remaining keys are
concatenated with each column name (“:” as separator) to form
column names (cluster keys). Column values remain
unchanged.
3. Cluster keys (other than partition keys) are ordered, and you
cannot allowed search on random columns, you have to specify
the entire cluster key and can run a range query on the final
portion of it.
#TCUG
14. @
A bit of data modelling
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
USER ACTIVITY DATA MODEL
CREATE TABLE user_activity (
… username varchar,
… interaction_time timeuuid,
… activity_code varchar,
… detail varchar
… PRIMARY KEY (username, interaction time)
… ) WITH CLUSTERING ORDER BY (interaction_time
DESC);
CREATE TABLE user_activity_history (
… username varchar,
… interaction_date varchar,
… interaction_time timeuuid,
… activity_code varchar,
… detail varchar,
… PRIMARY KEY
((username,interaction_date),interaction_time)
… );
#TCUG
15. @
Data modelling 4 QUERIES
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
FIND A CAR IN A LOT
CREATE TABLE car_location_index (
… make varchar,
… model varchar,
… colour varchar,
… vehicle_id int,
… lot_id,
… PRIMARY KEY ((make,model,colour),vehicle_id)
… );
#TCUG
16. @
Data modelling 4 QUERIES
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
FIND A CAR IN A LOT
Truth(iness) Table
#TCUG
17. @
Data modelling 4 QUERIES
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
FIND A CAR IN A LOT
INSERT INTO car_location_index (make,model,colour,vehicle_id,lot_id)
VALUES (‘Ford’,’Mustang’,’Blue’,1234,8675309)
INSERT INTO car_location_index (make,model,colour,vehicle_id,lot_id)
VALUES (‘Ford’,’Mustang’,’’,1234,8675309)
INSERT INTO car_location_index (make,model,colour,vehicle_id,lot_id)
VALUES (‘Ford’,’’,’Blue’,1234,8675309)
INSERT INTO car_location_index (make,model,colour,vehicle_id,lot_id)
VALUES (‘Ford’,’’,’’,1234,8675309)
INSERT INTO car_location_index (make,model,colour,vehicle_id,lot_id)
VALUES (‘’,’Mustang’,’Blue’,1234,8675309)
INSERT INTO car_location_index (make,model,colour,vehicle_id,lot_id)
VALUES (‘’,’Mustang’,’’,1234,8675309)
INSERT INTO car_location_index (make,model,colour,vehicle_id,lot_id)
VALUES (‘’,’’,’Blue’,1234,8675309)
#TCUG
18. @
Data modelling 4 QUERIES
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
FIND A CAR IN A LOT
SELECT vehicle_id, lot_id
FROM car_location_index
WHERE make = ‘Ford’
AND model = ‘’
AND colour= ‘Blue’;
vehicle_id | lot_id
--------------+-----------
1234 | 8675309
SELECT vehicle_id, lot_id
FROM car_location_index
WHERE make = ‘’
AND model = ‘’
AND colour = ‘Blue’;
vehicle_id | lot_id
--------------+-----------
1234 | 8675309
8765 | 5551212
#TCUG
19. @
A Bucketized Counter
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
#TCUG
void prepareTimeBucketStatements(Session session) {
Map<TimeUnit, String> ttl =
ImmutableMap.of(TimeUnit.SECONDS, String.valueOf(TimeUnit.DAYS.toSeconds(2)),
TimeUnit.MINUTES, String.valueOf(TimeUnit.DAYS.toSeconds(14)),
TimeUnit.HOURS, String.valueOf(TimeUnit.DAYS.toSeconds(2 * 365)),
TimeUnit.DAYS, String.valueOf(TimeUnit.DAYS.toSeconds(3 * 365)));
for (TimeUnit unit: mMetricUnits) {
String unitName = unit.toString().toLowerCase().substring(0, unit.toString().length() - 1);
switch (mDeliveryType) {
case Transactional:
mTimeInsertStatements.put(unit, session.prepare("INSERT INTO metrics_by_"
+ unitName + "_count (row_section_uuid, row_route_verb, row_parameters, row_tschunk, "
+ "cluster_response_code, cluster_section_uuid, txid, value)" +
" VALUES (?, ?, ?, ?, ?, ?, ?, ?) USING TTL "
+ ttl.get(unit)));
mTimeReadStatements.put(unit, session.prepare("SELECT txid, value FROM metrics_by_"
+ unitName
+ "_count WHERE row_route_verb = ? AND row_parameters = ? AND row_section_uuid = ? " +
"AND row_tschunk = ? AND cluster_response_code = ? AND cluster_section_uuid = ?"));
break;
case NonTransactional:
mTimeUpdateStatements.put(unit, session.prepare("UPDATE metrics_by_"
+ unitName + "_counter USING TTL " + ttl.get(unit)
+ " SET value = value + ? WHERE row_route_verb = ? AND row_parameters = ? AND row_section_uuid = ? AND "
+ "row_tschunk = ? AND cluster_response_code = ? AND cluster_section_uuid = ?"));
}
}
}
void prepareMetricStatement(Session session) {
mStatement = session.prepare("INSERT INTO metrics (row_route_verb, row_parameters, row_section_uuid, "
+ "row_tschunk, cluster_response_code, cluster_ts, route, verb, parameters, response_time) VALUES "
+ "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)");
}
21. @
JAVA to KairosDB
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
#TCUG
public class KairosSynchronousWriter implements Writer {
private final Gson mGson;
private final HttpClient mClient = new DefaultHttpClient();
private final String mKairosHost;
private final String mKairosPort;
public KairosSynchronousWriter(VfConfig config) {
GsonBuilder gsonBuilder = new GsonBuilder();
gsonBuilder.registerTypeAdapter(Datapoint.class, new Datapoint.DatapointJsonSerializer());
mGson = gsonBuilder.create();
mKairosHost = config.getString("Writer.kairosHost");
mKairosPort = config.getString("Writer.kairosPort");
}
@Override
public void enqueue(Collection<Datapoint> results) {
HttpPost post = null;
try {
post = new HttpPost("http://" + mKairosHost + ":" + mKairosPort + "/api/v1/datapoints");
} catch (URISyntaxException e) {
e.printStackTrace();
}
StringEntity input = null;
try {
input = new StringEntity(mGson.toJson(results));
input.setContentType("application/json");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
post.setEntity(input);
try {
HttpResponse response = mClient.execute(post);
} catch (HttpException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
22. @
JAVA/KairosDB Monitoring
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
#TCUG
public class MonitoringClient {
private final String mHostName;
private final VfConfig mConfig;
private final AggregatorPool mAggregatorPool;
private final Writer mWriter;
private boolean isActive = true;
public enum AggregationType {
...
}
public MonitoringClient() {
this(new VfConfig("MonitoringClient.properties", "VfMonitoringClient/MonitoringClient.properties"));
}
public MonitoringClient(VfConfig config) {
mConfig = config;
try {
mHostName = InetAddress.getLocalHost().getHostName();
} catch (UnknownHostException e) {
throw new RuntimeException("Unable to initialize Monitoring client", e);
}
mWriter = createWriter();
mAggregatorPool = new AggregatorPool(mConfig, mWriter);
}
public void record(String metricName, double value, AggregationType type, String[] tags) {
if (isActive) {
mAggregatorPool.enqueueInput(new Datapoint(new DatapointKey(metricName, makeTagMap(tags)), value,
System.currentTimeMillis(), type));
}
}
private Writer createWriter() {
String type = mConfig.getString("Writer.type");
if (type.equals("log")) {
return new LogWriter();
} else if(type.equals("kairosSync")) {
return new KairosSynchronousWriter(mConfig);
} else {
throw new RuntimeException(
"Invalid configuration: Writer.type given invalid value, valid values are: kairosSync, log");
}
}
23. @
PYTHON to KairosDB
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
#TCUG
def pushToKairos(metrics):
"""
Let's push into KairosDB
Data will come in as such:
metrics: {
'name' : 'filterList:<overall|entityName>:<entity|count>',
'time_queried' : <timestamp>,
'value' : <somevalue>,
'tags' : {
'filter|user1' : <filter|user1>,
...
'filter|userN' : <filter|userN>,
'entity1' : <entity1>,
...
'entityN' : <entityN>,
...
'textSentiment' : <positive|negative|neutral>
}
}
"""
import json, requests
### YOU NEED TO CHANGE THIS TO YOUR KAIROS INSTALLATION ENDPOINT ###
PORT = 8080
BASE_URL = 'http://localhost:' + str(PORT) + '/api/v1/datapoints'
return requests.post(url=BASE_URL, data=json.dumps(metrics))
24. @
KairosDB Twitter Sentiment
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
#TCUG
metrics_base = {
'name' : '_'.join(self.filters) + '/overall/sentiment',
'timestamp' : time_queried,
'value' : sentiment_score
}
metrics_entity = {}
if entities:
for entity in entities:
for what in ['sentiment', 'count', 'relevance']:
what_name = what if what != 'sentiment' else 'entity_sentiment'
value = entity[what] if 'score' not in entity[what] else entity[what]['score']
print 'What_name: ', what_name, ' value: ', value, ' from: ', entity[what], 'n'
metrics_entity = {
'name' : '_'.join(self.filters) + '/' + entity['text'].lower().replace(' ', '_') + '/' + what_name,
'timestamp' : time_queried,
'value' : value if value and type(value) is not dict else 0
}
for eachtype in entity['type']:
tags = {'type': eachtype }
metrics.append(
dict(metrics_entity, **{'tags': tags})
)
if 'type' in entity[what]:
tags = {'textSentiment': entity[what]['type'] }
metrics.append(
dict(metrics_entity, **{'tags': tags})
)
for filter in self.filters:
tags = {'filter':filter}
tags['textSentiment'] = sentiment_type if sentiment_type else 'not_applicable'
metrics.append(
dict(metrics_base, **{'tags': tags})
)
for individual_metric in metrics:
status = pushToKairos(individual_metric)
if status.status_code != 204:
raise Exception('KairosDB Issue...', status.text)
25. @
All rolled into ONE!!!
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
https://gist.github.com/vanjos/6169734
Install CCM
Install KairosDB
https://code.google.com/p/kairosdb/wiki/GettingStarted
#TCUG
26. @
EMPTY SLIDE
TWEET
ABOUT US
@VictorFAnjos
@Viafoura
@PlanetCassandra
#TCUG
- overview of why real-time
- show some data modeling
- show a use for logging (our own Storm code)
- show a use for a/b testing (our API counters)
- show a use for debugging (our API counters)
- show KairosDB
- describe some features
- show some visualizations (using Alchemy & twitter)
- conlude with Gists
- announce next meetup with Calliope