Global Terrorism since 1970

Global Terrorism since 1970

Global Terrorism development animation since 1970 by D3.js(Please be patience with the server)

Initialize

1
2
3
4
5
6
7
8
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from matplotlib.ticker import AutoMinorLocator, FuncFormatter
import datetime
import gzip
1
2
3
%matplotlib inline
plt.rcParams["axes.labelsize"] = 30
sns.set(font_scale=1.8)
1
2
from IPython.display import display_html
display_html("""<button onclick="$('.input, .prompt, .output_stderr, .output_error').toggle();">Toggle Code</button>""", raw=True)

data process

1
df = pd.read_csv("./static/data/globalterrorismdb_0617dist.csv", encoding='ISO-8859-1')
1
# df.columns.values
1
df.shape
(170350, 135)

dataset have 170350 records and 135 variables

1
2
3
4
dfs = df[["eventid", "iyear", "imonth", "iday",
"country_txt", "city", "latitude","longitude",
"success", "suicide", "attacktype1_txt", "targtype1_txt", "weaptype1_txt",
"gname", "motive", "summary", "nkill", "nwound"]]
1
2
3
4
5
6
dfs = dfs.rename(
columns={"eventid": "id", 'iyear':'year', 'imonth':'month', 'iday':'day', 'country_txt':'country',
'attacktype1_txt':'attacktype', 'targtype1_txt':'target', 'weaptype1_txt':'weapon',
'gname': 'group', 'nkill':'fatalities', 'nwound':'injuries'})
dfs['fatalities'] = dfs['fatalities'].fillna(0).astype(int)
dfs['injuries'] = dfs['injuries'].fillna(0).astype(int)
1
2
3
4
5
dfs['country'].replace('United States', 'United States of America',inplace=True)

dfs['country'].replace('Bosnia-Herzegovina', 'Bosnia and Herzegovina',inplace=True)

dfs['country'].replace('Slovak Republic', 'Slovakia',inplace=True)

statistics

1
len(set(dfs["country"]))
205

205 countries have been attacked by terrorism activities

1
len(dfs[dfs["fatalities"] != 0]) / len(dfs)
0.4563721749339595

45.6% of terrorism activities caused fatalities

1
len(dfs[dfs["injuries"] != 0]) / len(dfs)
0.33955385970061636

34.0% of terrorism activities caused injuries

1
len(dfs[(dfs["injuries"] != 0) | (dfs["fatalities"] != 0)]) / len(dfs)
0.5911006750807162

59% of terrorism activities caused casualities

1
dfs['fatalities'].sum()
383554

total fatalities in terrorism activities since 1970

1
dfs['injuries'].sum()
496117

total injuries in terrorism activities since 1970

Data used for map animaion

1
2
3
4
dfs_map = dfs[['id', 'year', 'latitude', 'longitude',
'attacktype', 'target', 'summary', 'fatalities', 'injuries']]

dfs_map = dfs_map.rename(columns={'latitude': 'lat', 'longitude': 'lon', 'attachtype': 'attack'})
1
2
3
dfs_map['intensity'] = dfs_map['fatalities'] + dfs_map['injuries'] + 1

dfs_map['id'] = dfs_map.index
1
2
3
dfs_map = dfs_map[dfs_map["lat"].notnull() & dfs_map["lon"].notnull()]

dfs_map = dfs_map[(dfs_map["lon"] >= -180) & (dfs_map["lon"] <= 180) & (dfs_map["lat"] >= -90) & (dfs_map["lat"] <= 90)]
1
len(dfs_map)
165744
1
dfs_map.to_json("./static/data/global_terrorism_map.json", orient='records')
1
dfs_map['intensity'].max()
8750

Visualize

1
2
3
4
def freq_table(df, column):
frequency_table = pd.crosstab(index=df[column],
columns="count").sort_values("count", ascending=False)
return frequency_table
1
2
3
4
5
6
7
8
9
10
11
12
fig, (ax1, ax2) = plt.subplots(2, figsize=(3, 5),  sharex=True)
n = 0
for i in ["success", "suicide"]:
n += 1
ax = locals().get('ax' + str(n))
frequency_table = freq_table(dfs, i)
# if "Unknown" in frequency_table.index:
# frequency_table = frequency_table.drop("Unknown") .iloc[:20]
# else:
# frequency_table = frequency_table.iloc[:20]

sns.barplot(y="count", x=frequency_table.index, data=frequency_table, color="grey", ax=ax)

country, city, group frequency

1
2
3
4
5
6
7
8
9
10
11
# fig, (ax1, ax2, ax3) = plt.subplots(figsize=(16, 9), nrows=3)
for i in ["country", "city", "group"]:
fig, ax = plt.subplots(figsize=(16, 9))
ax.set_title(i, fontsize= 30)
frequency_table = freq_table(dfs, i)
if "Unknown" in frequency_table.index:
frequency_table = frequency_table.drop("Unknown") .iloc[:20]
else:
frequency_table = frequency_table.iloc[:20]

sns.barplot(y=frequency_table.index, x="count", data=frequency_table, orient="h", color="grey")

attacktype, target, weapon frequency

1
2
3
4
5
6
7
8
# fig, (ax1, ax2, ax3) = plt.subplots(figsize=(16, 9), nrows=3)
for i in ["attacktype", "target", "weapon"]:
fig, ax = plt.subplots(figsize=(16, 3))
ax.set_title(i, fontsize= 30)
frequency_table = freq_table(dfs, i)
frequency_table = frequency_table.iloc[:5]

sns.barplot(y=frequency_table.index, x="count", data=frequency_table, orient="h", color="grey")
1
2
3
4
5
dfs_time = dfs.copy()

dfs_time = dfs_time[(dfs_time["month"] >=1) & (dfs_time["month"] <= 12) & (dfs_time["day"] >=1) & (dfs_time["day"] <= 31)]

dfs_time["date"] = pd.to_datetime(dfs_time[["year", "month", "day"]])
1
dfs_time

attacktype, target, weapon frequency time series

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
for i in ["attacktype", "target", "weapon"]:
top = freq_table(dfs, i)[:5].index.values
dfs_ts= dfs_time[dfs_time[i].isin(top)]
dfs_ts = dfs_ts.groupby(["date", i])[i].count()
dfs_ts = dfs_ts.reset_index(level=1, name="count")
dfs_ts = dfs_ts.groupby([pd.TimeGrouper(freq='Q'), i])["count"].sum()
dfs_ts = dfs_ts.reset_index()
fig, ax = plt.subplots(figsize=(16, 9))
# assign locator and formatter for the xaxis ticks.
ax.xaxis.set_major_formatter(FuncFormatter(lambda x, pos: pd.to_datetime(x).year))
# ax.xaxis.set_major_locator(mdates.AutoDateLocator())
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y.%m.%d'))
fig.autofmt_xdate()
sns.tsplot(dfs_ts,
time="date", value="count", unit=i, condition=i,
color=sns.color_palette("Set1"))

The graphs shows that the bombing and explosion are the favorite of terrorists while the private citicens are the most suffered.

success and suicide attack percent time series

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
fig, (ax1, ax2) = plt.subplots(2, figsize=(16, 9), sharex=True)
n = 0
for i in ["success", "suicide"]:
n += 1
ax = locals().get('ax' + str(n))

dfs_ts = dfs_time.groupby(["date", i])[i].count()
dfs_ts = dfs_ts.reset_index(level=1, name="count")
dfs_ts = dfs_ts.groupby([pd.TimeGrouper(freq='Q'), i])["count"].sum()
dfs_ts = dfs_ts.groupby(level=0).apply(lambda x: 100 * x / float(x.sum()))

dfs_ts = dfs_ts.reset_index()
dfs_ts = dfs_ts[dfs_ts[i] == 1]

ax.xaxis.set_major_locator(mdates.YearLocator(5))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
ax.xaxis.set_minor_locator(AutoMinorLocator())

datemin = datetime.date(dfs_ts.date.min().year, 1, 1)
datemax = datetime.date(dfs_ts.date.max().year + 1, 1, 1)
ax.set_xlim(datemin, datemax)
ax.axvline("2001-09-11", color="black", linestyle="dashed", alpha=0.6)
ax.axvline("2007-08-06", color="black", linestyle="dashed", alpha=0.6)
ax.axvline("2008-09-15", color="black", linestyle="dashed", alpha=0.6)

if n == 1:
ax.text("2008-09-15", 88, "Lehman Brothers collapse",
verticalalignment='center',
rotation=90, size=20)
if n == 2:
ax.text("2001-09-11", 10, "911", verticalalignment='center', size=20)
ax.text("2007-08-06", 5, "AHMI collapse",
verticalalignment='center', horizontalalignment='right',
rotation=90, size=20)


ax.plot(dfs_ts["date"], dfs_ts["count"])

# sns.tsplot(dfs_ts,
# time="date", value="count", unit=i, condition=i,
# color=sns.color_palette("Set2"), ax=ax)
# ax.legend_.remove()

ax.set_title(i + " attack percent", size=20)

fig.autofmt_xdate()

The figures shows that the success percent of terrorism attack drop from around 95% to 80% since around 2010.
And the suicide attack is most common between 911 and abount 2009(because of the president or financial crisis or something else)

Casualities in Terrorism

1
2
3
4
5
dfs_period_casualties = dfs_time.groupby(["date"])["fatalities", "injuries"].sum()

dfs_period_casualties = dfs_period_casualties.groupby(pd.TimeGrouper(freq='M'))["fatalities", "injuries"].sum()

dfs_period_casualties = dfs_period_casualties.reset_index()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
fig, ax = plt.subplots(figsize=(16, 9))
# ax.xaxis.set_major_locator(mdates.AutoDateLocator())
ax.plot(dfs_period_casualties["date"], dfs_period_casualties["fatalities"], color='xkcd:red')
ax.plot(dfs_period_casualties["date"], dfs_period_casualties["injuries"], color='xkcd:orange')
ax.xaxis.set_major_locator(mdates.YearLocator(5))
# ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y.%m.%d'))
ax.xaxis.set_major_formatter(mdates.DateFormatter('%Y'))
ax.xaxis.set_minor_locator(AutoMinorLocator())
datemin = datetime.date(dfs_period_casualties.date.min().year, 1, 1)
datemax = datetime.date(dfs_period_casualties.date.max().year + 1, 1, 1)
ax.set_xlim(datemin, datemax)
fig.autofmt_xdate()

ax.legend(prop={'size': 20})

plt.axvline(x="2001-09-11", color="black", linestyle="dashed", alpha=0.6)
plt.text("2001-09-11", 14000, "911", verticalalignment='center', size=20)

plt.axvline(x="1995-03-20", color="black", linestyle="dashed", alpha=0.6)
plt.text("1995-03-20", 8000, "Tokyo subway sarin attack", rotation=90,
verticalalignment='center', horizontalalignment='right', size=20)

plt.axvline(x="1998-08-07", color="black", linestyle="dashed", alpha=0.6)
plt.text("1998-08-07", 8000, "1998 United States embassy bombings",
horizontalalignment='right', verticalalignment='center',
rotation=90, size=20)

plt.axvline(x="2011-12-18", color="black", linestyle="dashed", alpha=0.6)
plt.text("2011-12-18", 8000, "Last U.S. troops withdrew from Iraq",
horizontalalignment='right', verticalalignment='center',
rotation=90, size=20)

plt.xlabel('Year', size=20)
plt.ylabel('Number', size=20)
plt.title('Casualities in Terrorism', size=30)
<matplotlib.text.Text at 0x7fd363bbf710>

Obviously, the 911 suffered most in the terrorism history.
It seems that after US troops withdrawing from Iraq, the terrorism activities has caused more and more casulities. And we can see from the animated map that is mainly because the Middle East, not the simple terrorism activities but more like the war.

source code